diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index fb185d7702..a48395be04 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -34,6 +34,7 @@ #include "variable.h" #include "random_mars.h" #include "math_const.h" +#include "hashlittle.h" #include "memory.h" #include "error.h" @@ -70,8 +71,7 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : xcmimage(NULL), displace(NULL), eflags(NULL), orient(NULL), dorient(NULL), avec_ellipsoid(NULL), avec_line(NULL), avec_tri(NULL), counts(NULL), itensor(NULL), mass_body(NULL), langextra(NULL), random(NULL), - id_dilate(NULL), onemols(NULL), hash(NULL), bbox(NULL), ctr(NULL), - idclose(NULL), rsqclose(NULL) + id_dilate(NULL), onemols(NULL) { int i; @@ -107,18 +107,18 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : // parse args for rigid body specification int *mask = atom->mask; - tagint *bodyid = NULL; + tagint *bodyID = NULL; int nlocal = atom->nlocal; if (narg < 4) error->all(FLERR,"Illegal fix rigid/small command"); if (strcmp(arg[3],"molecule") == 0) { if (atom->molecule_flag == 0) error->all(FLERR,"Fix rigid/small requires atom attribute molecule"); - bodyid = atom->molecule; + bodyID = atom->molecule; } else if (strcmp(arg[3],"custom") == 0) { if (narg < 5) error->all(FLERR,"Illegal fix rigid/small command"); - bodyid = new tagint[nlocal]; + bodyID = new tagint[nlocal]; customflag = 1; // determine whether atom-style variable or atom property is used. @@ -126,9 +126,11 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : int is_double=0; int custom_index = atom->find_custom(arg[4]+2,is_double); if (custom_index == -1) - error->all(FLERR,"Fix rigid/small custom requires previously defined property/atom"); + error->all(FLERR,"Fix rigid/small custom requires " + "previously defined property/atom"); else if (is_double) - error->all(FLERR,"Fix rigid/small custom requires integer-valued property/atom"); + error->all(FLERR,"Fix rigid/small custom requires " + "integer-valued property/atom"); int minval = INT_MAX; int *value = atom->ivector[custom_index]; @@ -139,15 +141,17 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : for (i = 0; i < nlocal; i++) if (mask[i] & groupbit) - bodyid[i] = (tagint)(value[i] - minval + 1); - else bodyid[i] = 0; + bodyID[i] = (tagint)(value[i] - minval + 1); + else bodyID[i] = 0; } else if (strstr(arg[4],"v_") == arg[4]) { int ivariable = input->variable->find(arg[4]+2); if (ivariable < 0) - error->all(FLERR,"Variable name for fix rigid/small custom does not exist"); + error->all(FLERR,"Variable name for fix rigid/small custom " + "does not exist"); if (input->variable->atomstyle(ivariable) == 0) - error->all(FLERR,"Fix rigid/small custom variable is no atom-style variable"); + error->all(FLERR,"Fix rigid/small custom variable is not " + "atom-style variable"); double *value = new double[nlocal]; input->variable->compute_atom(ivariable,0,value,1,0); int minval = INT_MAX; @@ -158,8 +162,8 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : for (i = 0; i < nlocal; i++) if (mask[i] & groupbit) - bodyid[i] = (tagint)((tagint)value[i] - minval + 1); - else bodyid[0] = 0; + bodyID[i] = (tagint)((tagint)value[i] - minval + 1); + else bodyID[0] = 0; delete[] value; } else error->all(FLERR,"Unsupported fix rigid custom property"); } else error->all(FLERR,"Illegal fix rigid/small command"); @@ -167,10 +171,11 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : if (atom->map_style == 0) error->all(FLERR,"Fix rigid/small requires an atom map, see atom_modify"); - // maxmol = largest bodyid # + // maxmol = largest bodyID # + maxmol = -1; for (i = 0; i < nlocal; i++) - if (mask[i] & groupbit) maxmol = MAX(maxmol,bodyid[i]); + if (mask[i] & groupbit) maxmol = MAX(maxmol,bodyID[i]); tagint itmp; MPI_Allreduce(&maxmol,&itmp,1,MPI_LMP_TAGINT,MPI_MAX,world); @@ -400,8 +405,19 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : // sets bodytag for owned atoms // body attributes are computed later by setup_bodies() - create_bodies(bodyid); - if (customflag) delete [] bodyid; + double time1 = MPI_Wtime(); + + create_bodies(bodyID); + if (customflag) delete [] bodyID; + + double time2 = MPI_Wtime(); + + if (comm->me == 0) { + if (screen) + fprintf(screen," create_bodies CPU = %g secs\n",time2-time1); + if (logfile) + fprintf(logfile," create_bodies CPU = %g secs\n",time2-time1); + } // set nlocal_body and allocate bodies I own @@ -1514,175 +1530,71 @@ void FixRigidSmall::set_v() set bodytag for all owned atoms ------------------------------------------------------------------------- */ -void FixRigidSmall::create_bodies(tagint *bodyid) +void FixRigidSmall::create_bodies(tagint *bodyID) { - int i,m,n; - double unwrap[3]; + int i,m; - // error check on image flags of atoms in rigid bodies - - imageint *image = atom->image; + // allocate buffer for input to rendezvous comm + // ncount = # of my atoms in bodies + int *mask = atom->mask; int nlocal = atom->nlocal; - int *periodicity = domain->periodicity; - int xbox,ybox,zbox; - - int flag = 0; - for (i = 0; i < nlocal; i++) { - if (!(mask[i] & groupbit)) continue; - xbox = (image[i] & IMGMASK) - IMGMAX; - ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX; - zbox = (image[i] >> IMG2BITS) - IMGMAX; - if ((xbox && !periodicity[0]) || (ybox && !periodicity[1]) || - (zbox && !periodicity[2])) flag = 1; - } - - int flagall; - MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); - if (flagall) error->all(FLERR,"Fix rigid/small atom has non-zero image flag " - "in a non-periodic dimension"); - - // allocate buffer for passing messages around ring of procs - // percount = max number of values to put in buffer for each of ncount - int ncount = 0; for (i = 0; i < nlocal; i++) if (mask[i] & groupbit) ncount++; - int percount = 5; - double *buf; - memory->create(buf,ncount*percount,"rigid/small:buf"); + int *proclist; + memory->create(proclist,ncount,"rigid/small:proclist"); + InRvous *inbuf = (InRvous *) + memory->smalloc(ncount*sizeof(InRvous),"rigid/small:inbuf"); - // create map hash for storing unique body IDs of my atoms - // key = body ID - // value = index into per-body data structure - // n = # of entries in hash - - hash = new std::map(); - hash->clear(); - - // setup hash - // key = body ID - // value = index into N-length data structure - // n = count of unique bodies my atoms are part of - - n = 0; - for (i = 0; i < nlocal; i++) { - if (!(mask[i] & groupbit)) continue; - if (hash->find(bodyid[i]) == hash->end()) (*hash)[bodyid[i]] = n++; - } - - // bbox = bounding box of each rigid body my atoms are part of - - memory->create(bbox,n,6,"rigid/small:bbox"); - - for (i = 0; i < n; i++) { - bbox[i][0] = bbox[i][2] = bbox[i][4] = BIG; - bbox[i][1] = bbox[i][3] = bbox[i][5] = -BIG; - } - - // pack my atoms into buffer as body ID, unwrapped coords + // setup buf to pass to rendezvous comm + // one BodyMsg datum for each constituent atom + // datum = me, local index of atom, atomID, bodyID, unwrapped coords + // owning proc for each datum = random hash of bodyID double **x = atom->x; - - m = 0; - for (i = 0; i < nlocal; i++) { - if (!(mask[i] & groupbit)) continue; - domain->unmap(x[i],image[i],unwrap); - buf[m++] = bodyid[i]; - buf[m++] = unwrap[0]; - buf[m++] = unwrap[1]; - buf[m++] = unwrap[2]; - } - - // pass buffer around ring of procs - // func = update bbox with atom coords from every proc - // when done, have full bbox for every rigid body my atoms are part of - - comm->ring(m,sizeof(double),buf,1,ring_bbox,NULL,(void *)this); - - // check if any bbox is size 0.0, meaning rigid body is a single particle - - flag = 0; - for (i = 0; i < n; i++) - if (bbox[i][0] == bbox[i][1] && bbox[i][2] == bbox[i][3] && - bbox[i][4] == bbox[i][5]) flag = 1; - MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); - if (flagall) - error->all(FLERR,"One or more rigid bodies are a single particle"); - - // ctr = center pt of each rigid body my atoms are part of - - memory->create(ctr,n,6,"rigid/small:bbox"); - - for (i = 0; i < n; i++) { - ctr[i][0] = 0.5 * (bbox[i][0] + bbox[i][1]); - ctr[i][1] = 0.5 * (bbox[i][2] + bbox[i][3]); - ctr[i][2] = 0.5 * (bbox[i][4] + bbox[i][5]); - } - - // idclose = ID of atom in body closest to center pt (smaller ID if tied) - // rsqclose = distance squared from idclose to center pt - - memory->create(idclose,n,"rigid/small:idclose"); - memory->create(rsqclose,n,"rigid/small:rsqclose"); - - for (i = 0; i < n; i++) rsqclose[i] = BIG; - - // pack my atoms into buffer as body ID, atom ID, unwrapped coords - tagint *tag = atom->tag; + imageint *image = atom->image; m = 0; for (i = 0; i < nlocal; i++) { if (!(mask[i] & groupbit)) continue; - domain->unmap(x[i],image[i],unwrap); - buf[m++] = bodyid[i]; - buf[m++] = ubuf(tag[i]).d; - buf[m++] = unwrap[0]; - buf[m++] = unwrap[1]; - buf[m++] = unwrap[2]; + proclist[m] = hashlittle(&bodyID[i],sizeof(tagint),0) % nprocs; + inbuf[m].me = me; + inbuf[m].ilocal = i; + inbuf[m].atomID = tag[i]; + inbuf[m].bodyID = bodyID[i]; + domain->unmap(x[i],image[i],inbuf[m].x); + m++; } - // pass buffer around ring of procs - // func = update idclose,rsqclose with atom IDs from every proc - // when done, have idclose for every rigid body my atoms are part of + // perform rendezvous operation + // each proc owns random subset of bodies, receives all atoms in the bodies + // func = compute bbox of each body, flag atom closest to geometric center + // when done: each atom has atom ID of owning atom of its body - comm->ring(m,sizeof(double),buf,2,ring_nearest,NULL,(void *)this); + char *buf; + int nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), + rendezvous_body,buf,sizeof(OutRvous), + (void *) this); + OutRvous *outbuf = (OutRvous *) buf; + + memory->destroy(proclist); + memory->sfree(inbuf); - // set bodytag of all owned atoms, based on idclose - // find max value of rsqclose across all procs + // set bodytag of all owned atoms based on outbuf info for constituent atoms - double rsqmax = 0.0; - for (i = 0; i < nlocal; i++) { - bodytag[i] = 0; - if (!(mask[i] & groupbit)) continue; - m = hash->find(bodyid[i])->second; - bodytag[i] = idclose[m]; - rsqmax = MAX(rsqmax,rsqclose[m]); - } + for (i = 0; i < nlocal; i++) + if (!(mask[i] & groupbit)) bodytag[i] = 0; - // pack my atoms into buffer as bodytag of owning atom, unwrapped coords + for (m = 0; m < nreturn; m++) + bodytag[outbuf[m].ilocal] = outbuf[m].atomID; - m = 0; - for (i = 0; i < nlocal; i++) { - if (!(mask[i] & groupbit)) continue; - domain->unmap(x[i],image[i],unwrap); - buf[m++] = ubuf(bodytag[i]).d; - buf[m++] = unwrap[0]; - buf[m++] = unwrap[1]; - buf[m++] = unwrap[2]; - } + memory->sfree(outbuf); - // pass buffer around ring of procs - // func = update rsqfar for atoms belonging to bodies I own - // when done, have rsqfar for all atoms in bodies I own - - rsqfar = 0.0; - comm->ring(m,sizeof(double),buf,3,ring_farthest,NULL,(void *)this); - - // find maxextent of rsqfar across all procs + // maxextent = max of rsqfar across all procs // if defined, include molecule->maxextent MPI_Allreduce(&rsqfar,&maxextent,1,MPI_DOUBLE,MPI_MAX,world); @@ -1691,125 +1603,151 @@ void FixRigidSmall::create_bodies(tagint *bodyid) for (int i = 0; i < nmol; i++) maxextent = MAX(maxextent,onemols[i]->maxextent); } +} + +/* ---------------------------------------------------------------------- + process rigid bodies assigned to me + buf = list of N BodyMsg datums +------------------------------------------------------------------------- */ + +int FixRigidSmall::rendezvous_body(int n, char *inbuf, + int *&proclist, char *&outbuf, + void *ptr) +{ + int i,j,m; + double delx,dely,delz,rsq; + int *iclose; + tagint *idclose; + double *x,*xown,*rsqclose; + double **bbox,**ctr; + + FixRigidSmall *frsptr = (FixRigidSmall *) ptr; + Memory *memory = frsptr->memory; + Error *error = frsptr->error; + MPI_Comm world = frsptr->world; + + // setup hash + // ncount = number of bodies assigned to me + // key = body ID + // value = index into Ncount-length data structure + + InRvous *in = (InRvous *) inbuf; + std::map hash; + tagint id; + + int ncount = 0; + for (i = 0; i < n; i++) { + id = in[i].bodyID; + if (hash.find(id) == hash.end()) hash[id] = ncount++; + } + + // bbox = bounding box of each rigid body + + memory->create(bbox,ncount,6,"rigid/small:bbox"); + + for (m = 0; m < ncount; m++) { + bbox[m][0] = bbox[m][2] = bbox[m][4] = BIG; + bbox[m][1] = bbox[m][3] = bbox[m][5] = -BIG; + } + + for (i = 0; i < n; i++) { + m = hash.find(in[i].bodyID)->second; + x = in[i].x; + bbox[m][0] = MIN(bbox[m][0],x[0]); + bbox[m][1] = MAX(bbox[m][1],x[0]); + bbox[m][2] = MIN(bbox[m][2],x[1]); + bbox[m][3] = MAX(bbox[m][3],x[1]); + bbox[m][4] = MIN(bbox[m][4],x[2]); + bbox[m][5] = MAX(bbox[m][5],x[2]); + } + + // check if any bbox is size 0.0, meaning rigid body is a single particle + + int flag = 0; + for (m = 0; m < ncount; m++) + if (bbox[m][0] == bbox[m][1] && bbox[m][2] == bbox[m][3] && + bbox[m][4] == bbox[m][5]) flag = 1; + int flagall; + MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); // sync here? + if (flagall) + error->all(FLERR,"One or more rigid bodies are a single particle"); + + // ctr = geometric center pt of each rigid body + + memory->create(ctr,ncount,3,"rigid/small:bbox"); + + for (m = 0; m < ncount; m++) { + ctr[m][0] = 0.5 * (bbox[m][0] + bbox[m][1]); + ctr[m][1] = 0.5 * (bbox[m][2] + bbox[m][3]); + ctr[m][2] = 0.5 * (bbox[m][4] + bbox[m][5]); + } + + // idclose = atomID closest to center point of each body + + memory->create(idclose,ncount,"rigid/small:idclose"); + memory->create(iclose,ncount,"rigid/small:iclose"); + memory->create(rsqclose,ncount,"rigid/small:rsqclose"); + for (m = 0; m < ncount; m++) rsqclose[m] = BIG; + + for (i = 0; i < n; i++) { + m = hash.find(in[i].bodyID)->second; + x = in[i].x; + delx = x[0] - ctr[m][0]; + dely = x[1] - ctr[m][1]; + delz = x[2] - ctr[m][2]; + rsq = delx*delx + dely*dely + delz*delz; + if (rsq <= rsqclose[m]) { + if (rsq == rsqclose[m] && in[i].atomID > idclose[m]) continue; + iclose[m] = i; + idclose[m] = in[i].atomID; + rsqclose[m] = rsq; + } + } + + // compute rsqfar for all bodies I own + // set rsqfar back in caller + + double rsqfar = 0.0; + + for (int i = 0; i < n; i++) { + m = hash.find(in[i].bodyID)->second; + xown = in[iclose[m]].x; + x = in[i].x; + delx = x[0] - xown[0]; + dely = x[1] - xown[1]; + delz = x[2] - xown[2]; + rsq = delx*delx + dely*dely + delz*delz; + rsqfar = MAX(rsqfar,rsq); + } + + frsptr->rsqfar = rsqfar; + + // pass list of OutRvous datums back to comm->rendezvous + + int nout = n; + memory->create(proclist,nout,"rigid/small:proclist"); + OutRvous *out = (OutRvous *) + memory->smalloc(nout*sizeof(OutRvous),"rigid/small:out"); + + for (int i = 0; i < nout; i++) { + proclist[i] = in[i].me; + out[i].ilocal = in[i].ilocal; + m = hash.find(in[i].bodyID)->second; + out[i].atomID = idclose[m]; + } + + outbuf = (char *) out; // clean up + // Comm::rendezvous will delete proclist and out (outbuf) - delete hash; - memory->destroy(buf); memory->destroy(bbox); memory->destroy(ctr); memory->destroy(idclose); + memory->destroy(iclose); memory->destroy(rsqclose); -} -/* ---------------------------------------------------------------------- - process rigid body atoms from another proc - update bounding box for rigid bodies my atoms are part of -------------------------------------------------------------------------- */ - -void FixRigidSmall::ring_bbox(int n, char *cbuf, void *ptr) -{ - FixRigidSmall *frsptr = (FixRigidSmall *) ptr; - std::map *hash = frsptr->hash; - double **bbox = frsptr->bbox; - - double *buf = (double *) cbuf; - int ndatums = n/4; - - int j,imol; - double *x; - - int m = 0; - for (int i = 0; i < ndatums; i++, m += 4) { - imol = static_cast (buf[m]); - if (hash->find(imol) != hash->end()) { - j = hash->find(imol)->second; - x = &buf[m+1]; - bbox[j][0] = MIN(bbox[j][0],x[0]); - bbox[j][1] = MAX(bbox[j][1],x[0]); - bbox[j][2] = MIN(bbox[j][2],x[1]); - bbox[j][3] = MAX(bbox[j][3],x[1]); - bbox[j][4] = MIN(bbox[j][4],x[2]); - bbox[j][5] = MAX(bbox[j][5],x[2]); - } - } -} - -/* ---------------------------------------------------------------------- - process rigid body atoms from another proc - update nearest atom to body center for rigid bodies my atoms are part of -------------------------------------------------------------------------- */ - -void FixRigidSmall::ring_nearest(int n, char *cbuf, void *ptr) -{ - FixRigidSmall *frsptr = (FixRigidSmall *) ptr; - std::map *hash = frsptr->hash; - double **ctr = frsptr->ctr; - tagint *idclose = frsptr->idclose; - double *rsqclose = frsptr->rsqclose; - - double *buf = (double *) cbuf; - int ndatums = n/5; - - int j,imol; - tagint tag; - double delx,dely,delz,rsq; - double *x; - - int m = 0; - for (int i = 0; i < ndatums; i++, m += 5) { - imol = static_cast (buf[m]); - if (hash->find(imol) != hash->end()) { - j = hash->find(imol)->second; - tag = (tagint) ubuf(buf[m+1]).i; - x = &buf[m+2]; - delx = x[0] - ctr[j][0]; - dely = x[1] - ctr[j][1]; - delz = x[2] - ctr[j][2]; - rsq = delx*delx + dely*dely + delz*delz; - if (rsq <= rsqclose[j]) { - if (rsq == rsqclose[j] && tag > idclose[j]) continue; - idclose[j] = tag; - rsqclose[j] = rsq; - } - } - } -} - -/* ---------------------------------------------------------------------- - process rigid body atoms from another proc - update rsqfar = distance from owning atom to other atom -------------------------------------------------------------------------- */ - -void FixRigidSmall::ring_farthest(int n, char *cbuf, void *ptr) -{ - FixRigidSmall *frsptr = (FixRigidSmall *) ptr; - double **x = frsptr->atom->x; - imageint *image = frsptr->atom->image; - int nlocal = frsptr->atom->nlocal; - - double *buf = (double *) cbuf; - int ndatums = n/4; - - int iowner; - tagint tag; - double delx,dely,delz,rsq; - double *xx; - double unwrap[3]; - - int m = 0; - for (int i = 0; i < ndatums; i++, m += 4) { - tag = (tagint) ubuf(buf[m]).i; - iowner = frsptr->atom->map(tag); - if (iowner < 0 || iowner >= nlocal) continue; - frsptr->domain->unmap(x[iowner],image[iowner],unwrap); - xx = &buf[m+1]; - delx = xx[0] - unwrap[0]; - dely = xx[1] - unwrap[1]; - delz = xx[2] - unwrap[2]; - rsq = delx*delx + dely*dely + delz*delz; - frsptr->rsqfar = MAX(frsptr->rsqfar,rsq); - } + return nout; } /* ---------------------------------------------------------------------- @@ -2472,9 +2410,9 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody) int nlocal = atom->nlocal; - hash = new std::map(); + std::map hash; for (i = 0; i < nlocal; i++) - if (bodyown[i] >= 0) (*hash)[atom->molecule[i]] = bodyown[i]; + if (bodyown[i] >= 0) hash[atom->molecule[i]] = bodyown[i]; // open file and read header @@ -2533,11 +2471,11 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody) id = ATOTAGINT(values[0]); if (id <= 0 || id > maxmol) error->all(FLERR,"Invalid rigid body ID in fix rigid/small file"); - if (hash->find(id) == hash->end()) { + if (hash.find(id) == hash.end()) { buf = next + 1; continue; } - m = (*hash)[id]; + m = hash[id]; inbody[m] = 1; if (which == 0) { @@ -2576,7 +2514,6 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody) delete [] buffer; delete [] values; - delete hash; } /* ---------------------------------------------------------------------- diff --git a/src/RIGID/fix_rigid_small.h b/src/RIGID/fix_rigid_small.h index 3f6826f9bb..a820efcdea 100644 --- a/src/RIGID/fix_rigid_small.h +++ b/src/RIGID/fix_rigid_small.h @@ -23,7 +23,7 @@ FixStyle(rigid/small,FixRigidSmall) #include "fix.h" // replace this later -#include +//#include namespace LAMMPS_NS { @@ -180,13 +180,21 @@ class FixRigidSmall : public Fix { // class data used by ring communication callbacks - std::map *hash; - double **bbox; - double **ctr; - tagint *idclose; - double *rsqclose; double rsqfar; + struct InRvous { + int me,ilocal; + tagint atomID,bodyID; + double x[3]; + }; + + struct OutRvous { + int ilocal; + tagint atomID; + }; + + // local methods + void image_shift(); void set_xv(); void set_v(); @@ -199,11 +207,9 @@ class FixRigidSmall : public Fix { void grow_body(); void reset_atom2body(); - // callback functions for ring communication + // callback function for rendezvous communication - static void ring_bbox(int, char *, void *); - static void ring_nearest(int, char *, void *); - static void ring_farthest(int, char *, void *); + static int rendezvous_body(int, char *, int *&, char *&, void *); // debug diff --git a/src/comm.cpp b/src/comm.cpp index f355a562fc..5fcfa141d0 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -28,6 +28,7 @@ #include "dump.h" #include "group.h" #include "procmap.h" +#include "irregular.h" #include "accelerator_kokkos.h" #include "memory.h" #include "error.h" @@ -725,6 +726,56 @@ void Comm::ring(int n, int nper, void *inbuf, int messtag, memory->destroy(bufcopy); } +/* ---------------------------------------------------------------------- + rendezvous communication operation +------------------------------------------------------------------------- */ + +int Comm::rendezvous(int n, int *proclist, char *inbuf, int insize, + int (*callback)(int, char *, int *&, char *&, void *), + char *&outbuf, int outsize, void *ptr) +{ + // comm data from caller decomposition to rendezvous decomposition + + Irregular *irregular = new Irregular(lmp); + + int n_rvous = irregular->create_data(n,proclist); // add sort + char *inbuf_rvous = (char *) memory->smalloc((bigint) n_rvous*insize, + "rendezvous:inbuf_rvous"); + irregular->exchange_data(inbuf,insize,inbuf_rvous); + + irregular->destroy_data(); + delete irregular; + + // peform rendezvous computation via callback() + // callback() allocates proclist_rvous and outbuf_rvous + + int *proclist_rvous; + char *outbuf_rvous; + + int nout_rvous = + callback(n_rvous,inbuf_rvous,proclist_rvous,outbuf_rvous,ptr); + + memory->sfree(inbuf_rvous); + + // comm data from rendezvous decomposition back to caller + // caller will free outbuf + + irregular = new Irregular(lmp); + + int nout = irregular->create_data(nout_rvous,proclist_rvous); + outbuf = (char *) memory->smalloc((bigint) nout*outsize,"rendezvous:outbuf"); + irregular->exchange_data(outbuf_rvous,outsize,outbuf); + + irregular->destroy_data(); + delete irregular; + memory->destroy(proclist_rvous); + memory->sfree(outbuf_rvous); + + // return number of datums + + return nout; +} + /* ---------------------------------------------------------------------- proc 0 reads Nlines from file into buf and bcasts buf to all procs caller allocates buf to max size needed diff --git a/src/comm.h b/src/comm.h index 2579f9b283..8bb057a0c1 100644 --- a/src/comm.h +++ b/src/comm.h @@ -109,6 +109,10 @@ class Comm : protected Pointers { void ring(int, int, void *, int, void (*)(int, char *, void *), void *, void *, int self = 1); + int rendezvous(int, int *, char *, int, + int (*)(int, char *, int *&, char *&, void *), + char *&, int, void *); + int read_lines_from_file(FILE *, int, int, char *); int read_lines_from_file_universe(FILE *, int, int, char *); diff --git a/src/create_atoms.cpp b/src/create_atoms.cpp index cb0da42970..52e4256fca 100644 --- a/src/create_atoms.cpp +++ b/src/create_atoms.cpp @@ -514,9 +514,6 @@ void CreateAtoms::command(int narg, char **arg) if (domain->triclinic) domain->lamda2x(atom->nlocal); } - MPI_Barrier(world); - double time2 = MPI_Wtime(); - // clean up delete ranmol; @@ -526,21 +523,6 @@ void CreateAtoms::command(int narg, char **arg) delete [] ystr; delete [] zstr; - // print status - - if (comm->me == 0) { - if (screen) { - fprintf(screen,"Created " BIGINT_FORMAT " atoms\n", - atom->natoms-natoms_previous); - fprintf(screen," Time spent = %g secs\n",time2-time1); - } - if (logfile) { - fprintf(logfile,"Created " BIGINT_FORMAT " atoms\n", - atom->natoms-natoms_previous); - fprintf(logfile," Time spent = %g secs\n",time2-time1); - } - } - // for MOLECULE mode: // create special bond lists for molecular systems, // but not for atom style template @@ -550,6 +532,25 @@ void CreateAtoms::command(int narg, char **arg) if (atom->molecular == 1 && onemol->bondflag && !onemol->specialflag) { Special special(lmp); special.build(); + + } + } + + // print status + + MPI_Barrier(world); + double time2 = MPI_Wtime(); + + if (comm->me == 0) { + if (screen) { + fprintf(screen,"Created " BIGINT_FORMAT " atoms\n", + atom->natoms-natoms_previous); + fprintf(screen," create_atoms CPU = %g secs\n",time2-time1); + } + if (logfile) { + fprintf(logfile,"Created " BIGINT_FORMAT " atoms\n", + atom->natoms-natoms_previous); + fprintf(logfile," create_atoms CPU = %g secs\n",time2-time1); } } } diff --git a/src/hashlittle.cpp b/src/hashlittle.cpp new file mode 100644 index 0000000000..be930217a1 --- /dev/null +++ b/src/hashlittle.cpp @@ -0,0 +1,333 @@ +// Hash function hashlittle() +// from lookup3.c, by Bob Jenkins, May 2006, Public Domain +// bob_jenkins@burtleburtle.net + +#include "stddef.h" +#include "stdint.h" + +#define HASH_LITTLE_ENDIAN 1 // Intel and AMD are little endian + +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* +------------------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. + +This is reversible, so any information in (a,b,c) before mix() is +still in (a,b,c) after mix(). + +If four pairs of (a,b,c) inputs are run through mix(), or through +mix() in reverse, there are at least 32 bits of the output that +are sometimes the same for one pair and different for another pair. +This was tested for: +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that +satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing +for "differ" defined as + with a one-bit base and a two-bit delta. I +used http://burtleburtle.net/bob/hash/avalanche.html to choose +the operations, constants, and arrangements of the variables. + +This does not achieve avalanche. There are input bits of (a,b,c) +that fail to affect some output bits of (a,b,c), especially of a. The +most thoroughly mixed value is c, but it doesn't really even achieve +avalanche in c. + +This allows some parallelism. Read-after-writes are good at doubling +the number of bits affected, so the goal of mixing pulls in the opposite +direction as the goal of parallelism. I did what I could. Rotates +seem to cost as much as shifts on every machine I could lay my hands +on, and rotates are much kinder to the top and bottom bits, so I used +rotates. +------------------------------------------------------------------------------- +*/ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/* +------------------------------------------------------------------------------- +final -- final mixing of 3 32-bit values (a,b,c) into c + +Pairs of (a,b,c) values differing in only a few bits will usually +produce values of c that look totally different. This was tested for +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 +and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 +------------------------------------------------------------------------------- +*/ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +/* +------------------------------------------------------------------------------- +hashlittle() -- hash a variable-length key into a 32-bit value + k : the key (the unaligned variable-length array of bytes) + length : the length of the key, counting by bytes + initval : can be any 4-byte value +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Two keys differing by one or two bits will have +totally different hash values. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (uint8_t **)k, do it like this: + for (i=0, h=0; i 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : return c; + } + +#endif /* !valgrind */ + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) + { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=k[4]; + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=k[0]; + break; + case 1 : a+=k8[0]; + break; + case 0 : return c; /* zero length requires no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : return c; + } + } + + final(a,b,c); + return c; + +#else /* PURIFY_HATES_HASHLITTLE */ +/* I don't know what it is about Jenkins' hashlittle function, but + * it drives purify insane, even with VALGRIND defined. It makes + * purify unusable!! The code execution doesn't even make sense. + * Below is a (probably) weaker hash function that at least allows + * testing with purify. + */ +#define MAXINT_DIV_PHI 11400714819323198485U + + uint32_t h, rest, *p, bytes, num_bytes; + char *byteptr; + + num_bytes = length; + + /* First hash the uint32_t-sized portions of the key */ + h = 0; + for (p = (uint32_t *)key, bytes=num_bytes; + bytes >= (uint32_t) sizeof(uint32_t); + bytes-=sizeof(uint32_t), p++){ + h = (h^(*p))*MAXINT_DIV_PHI; + } + + /* Then take care of the remaining bytes, if any */ + rest = 0; + for (byteptr = (char *)p; bytes > 0; bytes--, byteptr++){ + rest = (rest<<8) | (*byteptr); + } + + /* If extra bytes, merge the two parts */ + if (rest) + h = (h^rest)*MAXINT_DIV_PHI; + + return h; +#endif /* PURIFY_HATES_HASHLITTLE */ +} diff --git a/src/hashlittle.h b/src/hashlittle.h new file mode 100644 index 0000000000..7b57a35c80 --- /dev/null +++ b/src/hashlittle.h @@ -0,0 +1,5 @@ +// Hash function hashlittle() +// from lookup3.c, by Bob Jenkins, May 2006, Public Domain +// bob_jenkins@burtleburtle.net + +uint32_t hashlittle(const void *key, size_t length, uint32_t); diff --git a/src/irregular.cpp b/src/irregular.cpp index 9c15f135d0..60025249cf 100644 --- a/src/irregular.cpp +++ b/src/irregular.cpp @@ -501,7 +501,8 @@ int compare_standalone(const int i, const int j, void *ptr) void Irregular::exchange_atom(double *sendbuf, int *sizes, double *recvbuf) { - int i,m,n,offset,count; + int i,m,n,count; + bigint offset; // post all receives @@ -739,11 +740,13 @@ int Irregular::create_data(int n, int *proclist, int sortflag) void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf) { - int i,m,n,offset,count; + int i,n,count; + bigint m; // these 2 lines enable send/recv buf to be larger than 2 GB + char *dest; // post all receives, starting after self copies - offset = num_self*nbytes; + bigint offset = num_self*nbytes; for (int irecv = 0; irecv < nrecv_proc; irecv++) { MPI_Irecv(&recvbuf[offset],num_recv[irecv]*nbytes,MPI_CHAR, proc_recv[irecv],0,world,&request[irecv]); @@ -765,18 +768,22 @@ void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf) n = 0; for (int isend = 0; isend < nsend_proc; isend++) { count = num_send[isend]; + dest = buf; for (i = 0; i < count; i++) { m = index_send[n++]; - memcpy(&buf[i*nbytes],&sendbuf[m*nbytes],nbytes); + memcpy(dest,&sendbuf[m*nbytes],nbytes); + dest += nbytes; } MPI_Send(buf,count*nbytes,MPI_CHAR,proc_send[isend],0,world); } // copy datums to self, put at beginning of recvbuf + dest = recvbuf; for (i = 0; i < num_self; i++) { m = index_self[i]; - memcpy(&recvbuf[i*nbytes],&sendbuf[m*nbytes],nbytes); + memcpy(dest,&sendbuf[m*nbytes],nbytes); + dest += nbytes; } // wait on all incoming messages diff --git a/src/read_data.cpp b/src/read_data.cpp index 2f4484010b..08a57a3aac 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -120,6 +120,9 @@ void ReadData::command(int narg, char **arg) { if (narg < 1) error->all(FLERR,"Illegal read_data command"); + MPI_Barrier(world); + double time1 = MPI_Wtime; + // optional args addflag = NONE; @@ -906,6 +909,18 @@ void ReadData::command(int narg, char **arg) force->kspace = saved_kspace; } + + // total time + + MPI_Barrier(world); + double time2 = MPI_Wtime; + + if (comm->me == 0) { + if (screen) + fprintf(screen," read_atoms CPU = %g secs\n",time2-time1); + if (logfile) + fprintf(logfile," read_atoms CPU = %g secs\n",time2-time1); + } } /* ---------------------------------------------------------------------- diff --git a/src/read_restart.cpp b/src/read_restart.cpp index 73dc37f4cb..2eca9b58c4 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -81,6 +81,9 @@ void ReadRestart::command(int narg, char **arg) if (domain->box_exist) error->all(FLERR,"Cannot read_restart after simulation box is defined"); + MPI_Barrier(world); + double time1 = MPI_Wtime; + MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); @@ -562,6 +565,18 @@ void ReadRestart::command(int narg, char **arg) Special special(lmp); special.build(); } + + // total time + + MPI_Barrier(world); + double time2 = MPI_Wtime; + + if (comm->me == 0) { + if (screen) + fprintf(screen," read_restart CPU = %g secs\n",time2-time1); + if (logfile) + fprintf(logfile," read_restart CPU = %g secs\n",time2-time1); + } } /* ---------------------------------------------------------------------- diff --git a/src/replicate.cpp b/src/replicate.cpp index cdadf1fd1f..3c8f4a8aee 100644 --- a/src/replicate.cpp +++ b/src/replicate.cpp @@ -76,7 +76,7 @@ void Replicate::command(int narg, char **arg) if (atom->nextra_grow || atom->nextra_restart || atom->nextra_store) error->all(FLERR,"Cannot replicate with fixes that store atom quantities"); - // Record wall time for atom replication + // record wall time for atom replication MPI_Barrier(world); double time1 = MPI_Wtime(); @@ -762,15 +762,15 @@ void Replicate::command(int narg, char **arg) special.build(); } - // Wall time + // total time MPI_Barrier(world); double time2 = MPI_Wtime(); if (me == 0) { if (screen) - fprintf(screen," Time spent = %g secs\n",time2-time1); + fprintf(screen," replicate CPU = %g secs\n",time2-time1); if (logfile) - fprintf(logfile," Time spent = %g secs\n",time2-time1); + fprintf(logfile," replicate CPU = %g secs\n",time2-time1); } } diff --git a/src/special.cpp b/src/special.cpp index fccc930353..a18c597765 100644 --- a/src/special.cpp +++ b/src/special.cpp @@ -21,9 +21,10 @@ #include "modify.h" #include "fix.h" #include "accelerator_kokkos.h" +#include "hashlittle.h" +#include "atom_masks.h" #include "memory.h" #include "error.h" -#include "atom_masks.h" using namespace LAMMPS_NS; @@ -54,11 +55,12 @@ Special::~Special() void Special::build() { - int i,j,k,size; - int max,maxall,nbuf; - tagint *buf; + int i,j,k,m,n,size,proc; + int max,maxall; + char *buf; MPI_Barrier(world); + double time1 = MPI_Wtime(); int nlocal = atom->nlocal; @@ -87,99 +89,88 @@ void Special::build() // ----------------------------------------------------- // compute nspecial[i][0] = # of 1-2 neighbors of atom i + // create onetwo[i] = list of 1-2 neighbors for atom i // ----------------------------------------------------- - // bond partners stored by atom itself + // ncount = # of my datums to send (newton or newton off) + // include nlocal datums with owner of each atom - for (i = 0; i < nlocal; i++) nspecial[i][0] = num_bond[i]; + int newton_bond = force->newton_bond; - // if newton_bond off, then done - // else only counted 1/2 of all bonds, so count other half + int ncount = 0; + for (i = 0; i < nlocal; i++) ncount += num_bond[i]; + if (newton_bond) ncount *= 2; + ncount += nlocal; - if (force->newton_bond) { + int *proclist; + memory->create(proclist,ncount,"special:proclist"); + InRvous *inbuf = (InRvous *) + memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = global tag of 2nd atom in each bond + // setup input buf to rendezvous comm + // one datum for each owned atom: datum = proc, atomID + // one datum for each bond partner: datum = atomID, bond partner ID + // owning proc for each datum = random hash of atomID - nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += num_bond[i]; - memory->create(buf,nbuf,"special:buf"); + m = 0; + for (i = 0; i < nlocal; i++) { + proc = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; + proclist[m] = proc; + inbuf[m].me = me; + inbuf[m].atomID = tag[i]; + inbuf[m].partnerID = 0; + m++; - // fill buffer with global tags of bond partners of my atoms - - size = 0; - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_bond[i]; j++) - buf[size++] = bond_atom[i][j]; - - // cycle buffer around ring of procs back to self - // when receive buffer, scan tags for atoms I own - // when find one, increment nspecial count for that atom - - comm->ring(size,sizeof(tagint),buf,1,ring_one,NULL,(void *)this); - - memory->destroy(buf); + for (j = 0; j < num_bond[i]; j++) { + proclist[m] = proc; + inbuf[m].me = -1; + inbuf[m].atomID = tag[i]; + inbuf[m].partnerID = bond_atom[i][j]; + m++; + } + if (newton_bond) { + for (j = 0; j < num_bond[i]; j++) { + proclist[m] = hashlittle(&bond_atom[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -1; + inbuf[m].atomID = bond_atom[i][j]; + inbuf[m].partnerID = tag[i]; + m++; + } + } } - // ---------------------------------------------------- - // create onetwo[i] = list of 1-2 neighbors for atom i - // ---------------------------------------------------- + // perform rendezvous operation + // each proc owns random subset of atoms, receives all their bond partners - max = 0; - for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][0]); + int nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), + rendezvous_1234, + buf,sizeof(OutRvous),(void *) this); + OutRvous *outbuf = (OutRvous *) buf; - MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + memory->destroy(proclist); + memory->sfree(inbuf); + + // set nspecial[0] and onetwo for all owned atoms based on output info + + MPI_Allreduce(&max_rvous,&maxall,1,MPI_INT,MPI_MAX,world); + memory->create(onetwo,nlocal,maxall,"special:onetwo"); + + for (i = 0; i < nlocal; i++) nspecial[i][0] = 0; + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + onetwo[i][nspecial[i][0]++] = outbuf[m].partnerID; + } + + memory->sfree(outbuf); + + // compute and print max # of 1-2 neighbors if (me == 0) { if (screen) fprintf(screen," %d = max # of 1-2 neighbors\n",maxall); if (logfile) fprintf(logfile," %d = max # of 1-2 neighbors\n",maxall); } - memory->create(onetwo,nlocal,maxall,"special:onetwo"); - - // count = accumulating counter - - memory->create(count,nlocal,"special:count"); - for (i = 0; i < nlocal; i++) count[i] = 0; - - // add bond partners stored by atom to onetwo list - - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_bond[i]; j++) - onetwo[i][count[i]++] = bond_atom[i][j]; - - // if newton_bond off, then done - // else only stored 1/2 of all bonds, so store other half - - if (force->newton_bond) { - - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = 2 global tags in each bond - - nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += 2*num_bond[i]; - memory->create(buf,nbuf,"special:buf"); - - // fill buffer with global tags of both atoms in bond - - size = 0; - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_bond[i]; j++) { - buf[size++] = tag[i]; - buf[size++] = bond_atom[i][j]; - } - - // cycle buffer around ring of procs back to self - // when receive buffer, scan 2nd-atom tags for atoms I own - // when find one, add 1st-atom tag to onetwo list for 2nd atom - - comm->ring(size,sizeof(tagint),buf,2,ring_two,NULL,(void *)this); - - memory->destroy(buf); - } - - memory->destroy(count); - // ----------------------------------------------------- // done if special_bond weights for 1-3, 1-4 are set to 1.0 // ----------------------------------------------------- @@ -189,114 +180,83 @@ void Special::build() dedup(); combine(); fix_alteration(); + timer_output(time1); return; } // ----------------------------------------------------- // compute nspecial[i][1] = # of 1-3 neighbors of atom i + // create onethree[i] = list of 1-3 neighbors for atom i // ----------------------------------------------------- - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = 2 scalars + list of 1-2 neighbors + // ncount = # of my datums to send + // include nlocal datums with owner of each atom - nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += 2 + nspecial[i][0]; - memory->create(buf,nbuf,"special:buf"); + ncount = nlocal; + for (i = 0; i < nlocal; i++) ncount += nspecial[i][0]*(nspecial[i][0]-1); - // fill buffer with: - // (1) = counter for 1-3 neighbors, initialized to 0 - // (2) = # of 1-2 neighbors - // (3:N) = list of 1-2 neighbors + memory->create(proclist,ncount,"special:proclist"); + inbuf = (InRvous *) + memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); - size = 0; + // setup input buf to rendezvous comm + // one datum for each owned atom: datum = proc, atomID + // one datum for each bond partner: datum = atomID, bond partner ID + // owning proc for each datum = random hash of atomID + + m = 0; for (i = 0; i < nlocal; i++) { - buf[size++] = 0; - buf[size++] = nspecial[i][0]; - for (j = 0; j < nspecial[i][0]; j++) buf[size++] = onetwo[i][j]; + proclist[m] = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; + inbuf[m].me = me; + inbuf[m].atomID = tag[i]; + inbuf[m].partnerID = 0; + m++; + + for (j = 0; j < nspecial[i][0]; j++) { + proc = hashlittle(&onetwo[i][j],sizeof(tagint),0) % nprocs; + for (k = 0; k < nspecial[i][0]; k++) { + if (j == k) continue; + proclist[m] = proc; + inbuf[m].me = -1; + inbuf[m].atomID = onetwo[i][j]; + inbuf[m].partnerID = onetwo[i][k]; + m++; + } + } } - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1-2 neighbors for atoms I own - // when find one, increment 1-3 count by # of 1-2 neighbors of my atom, - // subtracting one since my list will contain original atom + // perform rendezvous operation + // each proc owns random subset of atoms, receives all their bond partners - comm->ring(size,sizeof(tagint),buf,3,ring_three,buf,(void *)this); + nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), + rendezvous_1234, + buf,sizeof(OutRvous),(void *) this); + outbuf = (OutRvous *) buf; - // extract count from buffer that has cycled back to me - // nspecial[i][1] = # of 1-3 neighbors of atom i + memory->destroy(proclist); + memory->sfree(inbuf); - j = 0; - for (i = 0; i < nlocal; i++) { - nspecial[i][1] = buf[j]; - j += 2 + nspecial[i][0]; + // set nspecial[1] and onethree for all owned atoms based on output info + + MPI_Allreduce(&max_rvous,&maxall,1,MPI_INT,MPI_MAX,world); + memory->create(onethree,nlocal,maxall,"special:onethree"); + + for (i = 0; i < nlocal; i++) nspecial[i][1] = 0; + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + onethree[i][nspecial[i][1]++] = outbuf[m].partnerID; } - memory->destroy(buf); + memory->destroy(outbuf); - // ---------------------------------------------------- - // create onethree[i] = list of 1-3 neighbors for atom i - // ---------------------------------------------------- - - max = 0; - for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][1]); - MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + // compute and print max # of 1-3 neighbors if (me == 0) { if (screen) fprintf(screen," %d = max # of 1-3 neighbors\n",maxall); if (logfile) fprintf(logfile," %d = max # of 1-3 neighbors\n",maxall); } - memory->create(onethree,nlocal,maxall,"special:onethree"); - - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = 4 scalars + list of 1-2 neighs + list of 1-3 neighs - - nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += 4 + nspecial[i][0] + nspecial[i][1]; - memory->create(buf,nbuf,"special:buf"); - - // fill buffer with: - // (1) = global tag of original atom - // (2) = # of 1-2 neighbors - // (3) = # of 1-3 neighbors - // (4) = counter for 1-3 neighbors, initialized to 0 - // (5:N) = list of 1-2 neighbors - // (N+1:2N) space for list of 1-3 neighbors - - size = 0; - for (i = 0; i < nlocal; i++) { - buf[size++] = tag[i]; - buf[size++] = nspecial[i][0]; - buf[size++] = nspecial[i][1]; - buf[size++] = 0; - for (j = 0; j < nspecial[i][0]; j++) buf[size++] = onetwo[i][j]; - size += nspecial[i][1]; - } - - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1-2 neighbors for atoms I own - // when find one, add its neighbors to 1-3 list - // increment the count in buf(i+4) - // exclude the atom whose tag = original - // this process may include duplicates but they will be culled later - - comm->ring(size,sizeof(tagint),buf,4,ring_four,buf,(void *)this); - - // fill onethree with buffer values that have been returned to me - // sanity check: accumulated buf[i+3] count should equal - // nspecial[i][1] for each atom - - j = 0; - for (i = 0; i < nlocal; i++) { - if (buf[j+3] != nspecial[i][1]) - error->one(FLERR,"1-3 bond count is inconsistent"); - j += 4 + nspecial[i][0]; - for (k = 0; k < nspecial[i][1]; k++) - onethree[i][k] = buf[j++]; - } - - memory->destroy(buf); - // done if special_bond weights for 1-4 are set to 1.0 if (force->special_lj[3] == 1.0 && force->special_coul[3] == 1.0) { @@ -304,117 +264,92 @@ void Special::build() if (force->special_angle) angle_trim(); combine(); fix_alteration(); + timer_output(time1); return; } // ----------------------------------------------------- // compute nspecial[i][2] = # of 1-4 neighbors of atom i + // create onefour[i] = list of 1-4 neighbors for atom i // ----------------------------------------------------- - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = 2 scalars + list of 1-3 neighbors + // ncount = # of my datums to send + // include nlocal datums with owner of each atom - nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += 2 + nspecial[i][1]; - memory->create(buf,nbuf,"special:buf"); + ncount = nlocal; + for (i = 0; i < nlocal; i++) ncount += nspecial[i][1]*nspecial[i][0]; - // fill buffer with: - // (1) = counter for 1-4 neighbors, initialized to 0 - // (2) = # of 1-3 neighbors - // (3:N) = list of 1-3 neighbors + memory->create(proclist,ncount,"special:proclist"); + inbuf = (InRvous *) + memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); - size = 0; + // setup input buf to rendezvous comm + // one datum for each owned atom: datum = proc, atomID + // one datum for each partner: datum = atomID, bond partner ID + // owning proc for each datum = random hash of atomID + + m = 0; for (i = 0; i < nlocal; i++) { - buf[size++] = 0; - buf[size++] = nspecial[i][1]; - for (j = 0; j < nspecial[i][1]; j++) buf[size++] = onethree[i][j]; + proclist[m] = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; + inbuf[m].me = me; + inbuf[m].atomID = tag[i]; + inbuf[m].partnerID = 0; + m++; + + for (j = 0; j < nspecial[i][1]; j++) { + proc = hashlittle(&onethree[i][j],sizeof(tagint),0) % nprocs; + for (k = 0; k < nspecial[i][0]; k++) { + proclist[m] = proc; + inbuf[m].me = -1; + inbuf[m].atomID = onethree[i][j]; + inbuf[m].partnerID = onetwo[i][k]; + m++; + } + } } - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1-3 neighbors for atoms I own - // when find one, increment 1-4 count by # of 1-2 neighbors of my atom - // may include duplicates and original atom but they will be culled later + // perform rendezvous operation + // each proc owns random subset of bodies, receives all atoms in the bodies + // func = compute bbox of each body, flag atom closest to geometric center + // when done: each atom has atom ID of owning atom of its body - comm->ring(size,sizeof(tagint),buf,5,ring_five,buf,(void *)this); + nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), + rendezvous_1234, + buf,sizeof(OutRvous),(void *) this); + outbuf = (OutRvous *) buf; - // extract count from buffer that has cycled back to me - // nspecial[i][2] = # of 1-4 neighbors of atom i + memory->destroy(proclist); + memory->sfree(inbuf); - j = 0; - for (i = 0; i < nlocal; i++) { - nspecial[i][2] = buf[j]; - j += 2 + nspecial[i][1]; + // set nspecial[2] and onefour for all owned atoms based on output info + + MPI_Allreduce(&max_rvous,&maxall,1,MPI_INT,MPI_MAX,world); + memory->create(onefour,nlocal,maxall,"special:onefour"); + + for (i = 0; i < nlocal; i++) nspecial[i][2] = 0; + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + onefour[i][nspecial[i][2]++] = outbuf[m].partnerID; } - memory->destroy(buf); + memory->destroy(outbuf); - // ---------------------------------------------------- - // create onefour[i] = list of 1-4 neighbors for atom i - // ---------------------------------------------------- - - max = 0; - for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][2]); - MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world); + // compute and print max # of 1-4 neighbors if (me == 0) { if (screen) fprintf(screen," %d = max # of 1-4 neighbors\n",maxall); if (logfile) fprintf(logfile," %d = max # of 1-4 neighbors\n",maxall); } - memory->create(onefour,nlocal,maxall,"special:onefour"); - - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = 3 scalars + list of 1-3 neighs + list of 1-4 neighs - - nbuf = 0; - for (i = 0; i < nlocal; i++) - nbuf += 3 + nspecial[i][1] + nspecial[i][2]; - memory->create(buf,nbuf,"special:buf"); - - // fill buffer with: - // (1) = # of 1-3 neighbors - // (2) = # of 1-4 neighbors - // (3) = counter for 1-4 neighbors, initialized to 0 - // (4:N) = list of 1-3 neighbors - // (N+1:2N) space for list of 1-4 neighbors - - size = 0; - for (i = 0; i < nlocal; i++) { - buf[size++] = nspecial[i][1]; - buf[size++] = nspecial[i][2]; - buf[size++] = 0; - for (j = 0; j < nspecial[i][1]; j++) buf[size++] = onethree[i][j]; - size += nspecial[i][2]; - } - - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1-3 neighbors for atoms I own - // when find one, add its neighbors to 1-4 list - // incrementing the count in buf(i+4) - // this process may include duplicates but they will be culled later - - comm->ring(size,sizeof(tagint),buf,6,ring_six,buf,(void *)this); - - // fill onefour with buffer values that have been returned to me - // sanity check: accumulated buf[i+2] count should equal - // nspecial[i][2] for each atom - - j = 0; - for (i = 0; i < nlocal; i++) { - if (buf[j+2] != nspecial[i][2]) - error->one(FLERR,"1-4 bond count is inconsistent"); - j += 3 + nspecial[i][1]; - for (k = 0; k < nspecial[i][2]; k++) - onefour[i][k] = buf[j++]; - } - - memory->destroy(buf); + // finish processing the onetwo, onethree, onefour lists dedup(); if (force->special_angle) angle_trim(); if (force->special_dihedral) dihedral_trim(); combine(); fix_alteration(); + timer_output(time1); } /* ---------------------------------------------------------------------- @@ -663,17 +598,19 @@ void Special::combine() void Special::angle_trim() { - int i,j,m,n; + int i,j,m,n,proc,index; int *num_angle = atom->num_angle; int *num_dihedral = atom->num_dihedral; tagint **angle_atom1 = atom->angle_atom1; + tagint **angle_atom2 = atom->angle_atom2; tagint **angle_atom3 = atom->angle_atom3; tagint **dihedral_atom1 = atom->dihedral_atom1; tagint **dihedral_atom2 = atom->dihedral_atom2; tagint **dihedral_atom3 = atom->dihedral_atom3; tagint **dihedral_atom4 = atom->dihedral_atom4; int **nspecial = atom->nspecial; + tagint *tag = atom->tag; int nlocal = atom->nlocal; // stats on old 1-3 neighbor counts @@ -697,68 +634,125 @@ void Special::angle_trim() if ((num_angle && atom->nangles) || (num_dihedral && atom->ndihedrals)) { - // dflag = flag for 1-3 neighs of all owned atoms - - int maxcount = 0; - for (i = 0; i < nlocal; i++) maxcount = MAX(maxcount,nspecial[i][1]); - memory->create(dflag,nlocal,maxcount,"special::dflag"); + // ncount = # of my datums to send in 3 parts for each owned atom + // proc owner, onethree list, angle end points + // angle end points are from angle list and 1-3 and 2-4 pairs in dihedrals + // latter is only for angles or dihedrlas where I own atom2 + int ncount = nlocal; + for (i = 0; i < nlocal; i++) ncount += nspecial[i][1]; for (i = 0; i < nlocal; i++) { - n = nspecial[i][1]; - for (j = 0; j < n; j++) dflag[i][j] = 0; + for (j = 0; j < num_angle[i]; j++) { + index = atom->map(angle_atom2[i][j]); + if (index >= 0 && index < nlocal) ncount += 2; + } + for (j = 0; j < num_dihedral[i]; j++) { + index = atom->map(dihedral_atom2[i][j]); + if (index >= 0 && index < nlocal) ncount += 4; + } } - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = list of 1,3 atoms in each angle stored by atom - // and list of 1,3 and 2,4 atoms in each dihedral stored by atom + int *proclist; + memory->create(proclist,ncount,"special:proclist"); + InRvous *inbuf = (InRvous *) + memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); - int nbuf = 0; + // setup input buf to rendezvous comm + // one datum for each owned atom: datum = proc, atomID + // sent to owner of atomID + // one datum for each 1-4 partner: datum = atomID, ID + // sent to owner of atomID + // two datums for each dihedral 1-4 endatoms : datum = atomID, ID + // sent to owner of atomID + + m = 0; for (i = 0; i < nlocal; i++) { - if (num_angle && atom->nangles) nbuf += 2*num_angle[i]; - if (num_dihedral && atom->ndihedrals) nbuf += 2*2*num_dihedral[i]; - } - int *buf; - memory->create(buf,nbuf,"special:buf"); + proc = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; + proclist[m] = proc; + inbuf[m].me = me; + inbuf[m].atomID = tag[i]; + inbuf[m].partnerID = 0; + m++; - // fill buffer with list of 1,3 atoms in each angle - // and with list of 1,3 and 2,4 atoms in each dihedral + for (j = 0; j < nspecial[i][1]; j++) { + proclist[m] = proc; + inbuf[m].me = -1; + inbuf[m].atomID = tag[i]; + inbuf[m].partnerID = onethree[i][j]; + m++; + } - int size = 0; - if (num_angle && atom->nangles) - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_angle[i]; j++) { - buf[size++] = angle_atom1[i][j]; - buf[size++] = angle_atom3[i][j]; - } + for (j = 0; j < num_angle[i]; j++) { + index = atom->map(angle_atom2[i][j]); + if (index < 0 || index >= nlocal) continue; - if (num_dihedral && atom->ndihedrals) - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_dihedral[i]; j++) { - buf[size++] = dihedral_atom1[i][j]; - buf[size++] = dihedral_atom3[i][j]; - buf[size++] = dihedral_atom2[i][j]; - buf[size++] = dihedral_atom4[i][j]; - } + proclist[m] = hashlittle(&angle_atom1[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -2; + inbuf[m].atomID = angle_atom1[i][j]; + inbuf[m].partnerID = angle_atom3[i][j]; + m++; - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1,3 atoms looking for atoms I own - // when find one, scan its 1-3 neigh list and mark I,J as in an angle + proclist[m] = hashlittle(&angle_atom3[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -2; + inbuf[m].atomID = angle_atom3[i][j]; + inbuf[m].partnerID = angle_atom1[i][j]; + m++; + } - comm->ring(size,sizeof(tagint),buf,7,ring_seven,NULL,(void *)this); + for (j = 0; j < num_dihedral[i]; j++) { + index = atom->map(dihedral_atom2[i][j]); + if (index < 0 || index >= nlocal) continue; - // delete 1-3 neighbors if they are not flagged in dflag + proclist[m] = hashlittle(&dihedral_atom1[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -2; + inbuf[m].atomID = dihedral_atom1[i][j]; + inbuf[m].partnerID = dihedral_atom3[i][j]; + m++; - for (i = 0; i < nlocal; i++) { - m = 0; - for (j = 0; j < nspecial[i][1]; j++) - if (dflag[i][j]) onethree[i][m++] = onethree[i][j]; - nspecial[i][1] = m; + proclist[m] = hashlittle(&dihedral_atom2[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -2; + inbuf[m].atomID = dihedral_atom2[i][j]; + inbuf[m].partnerID = dihedral_atom4[i][j]; + m++; + + proclist[m] = hashlittle(&dihedral_atom3[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -2; + inbuf[m].atomID = dihedral_atom3[i][j]; + inbuf[m].partnerID = dihedral_atom1[i][j]; + m++; + + proclist[m] = hashlittle(&dihedral_atom4[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -2; + inbuf[m].atomID = dihedral_atom4[i][j]; + inbuf[m].partnerID = dihedral_atom2[i][j]; + m++; + } } - // clean up + // perform rendezvous operation + // each proc owns random subset of atoms + // func = compute bbox of each body, flag atom closest to geometric center + // when done: each atom has atom ID of owning atom of its body + + char *buf; + int nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), + rendezvous_trim, + buf,sizeof(OutRvous),(void *) this); + OutRvous *outbuf = (OutRvous *) buf; - memory->destroy(dflag); - memory->destroy(buf); + memory->destroy(proclist); + memory->sfree(inbuf); + + // reset nspecial[1] and onethree for all owned atoms based on output info + + for (i = 0; i < nlocal; i++) nspecial[i][1] = 0; + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + onethree[i][nspecial[i][1]++] = outbuf[m].partnerID; + } + + memory->destroy(outbuf); // if no angles or dihedrals are defined, delete all 1-3 neighs @@ -789,12 +783,14 @@ void Special::angle_trim() void Special::dihedral_trim() { - int i,j,m,n; + int i,j,m,n,proc,index; int *num_dihedral = atom->num_dihedral; tagint **dihedral_atom1 = atom->dihedral_atom1; + tagint **dihedral_atom2 = atom->dihedral_atom2; tagint **dihedral_atom4 = atom->dihedral_atom4; int **nspecial = atom->nspecial; + tagint *tag = atom->tag; int nlocal = atom->nlocal; // stats on old 1-4 neighbor counts @@ -813,57 +809,95 @@ void Special::dihedral_trim() " %g = # of 1-4 neighbors before dihedral trim\n",allcount); } - // if dihedrals are defined, flag each 1-4 neigh if it appears in a dihedral + // if dihedrals are defined, rendezvous onefour list with dihedral 1-4 pairs if (num_dihedral && atom->ndihedrals) { - // dflag = flag for 1-4 neighs of all owned atoms - - int maxcount = 0; - for (i = 0; i < nlocal; i++) maxcount = MAX(maxcount,nspecial[i][2]); - memory->create(dflag,nlocal,maxcount,"special::dflag"); + // ncount = # of my datums to send in 3 parts for each owned atom + // onefour list, proc owner, dihedral end points + // latter is only for dihedrals where I own atom2 + int ncount = nlocal; + for (i = 0; i < nlocal; i++) ncount += nspecial[i][2]; for (i = 0; i < nlocal; i++) { - n = nspecial[i][2]; - for (j = 0; j < n; j++) dflag[i][j] = 0; + for (j = 0; j < num_dihedral[i]; j++) { + index = atom->map(dihedral_atom2[i][j]); + if (index >= 0 && index < nlocal) ncount += 2; + } } - // nbufmax = largest buffer needed to hold info from any proc - // info for each atom = list of 1,4 atoms in each dihedral stored by atom + int *proclist; + memory->create(proclist,ncount,"special:proclist"); + InRvous *inbuf = (InRvous *) + memory->smalloc((bigint) ncount*sizeof(InRvous),"special:inbuf"); - int nbuf = 0; - for (i = 0; i < nlocal; i++) nbuf += 2*num_dihedral[i]; - int *buf; - memory->create(buf,nbuf,"special:buf"); + // setup input buf to rendezvous comm + // one datum for each owned atom: datum = proc, atomID + // sent to owner of atomID + // one datum for each 1-4 partner: datum = atomID, ID + // sent to owner of atomID + // two datums for each dihedral 1-4 endatoms : datum = atomID, ID + // sent to owner of atomID - // fill buffer with list of 1,4 atoms in each dihedral + m = 0; + for (i = 0; i < nlocal; i++) { + proc = hashlittle(&tag[i],sizeof(tagint),0) % nprocs; + proclist[m] = proc; + inbuf[m].me = me; + inbuf[m].atomID = tag[i]; + inbuf[m].partnerID = 0; + m++; - int size = 0; - for (i = 0; i < nlocal; i++) - for (j = 0; j < num_dihedral[i]; j++) { - buf[size++] = dihedral_atom1[i][j]; - buf[size++] = dihedral_atom4[i][j]; + for (j = 0; j < nspecial[i][2]; j++) { + proclist[m] = proc; + inbuf[m].me = -1; + inbuf[m].atomID = tag[i]; + inbuf[m].partnerID = onefour[i][j]; + m++; } - // cycle buffer around ring of procs back to self - // when receive buffer, scan list of 1,4 atoms looking for atoms I own - // when find one, scan its 1-4 neigh list and mark I,J as in a dihedral + for (j = 0; j < num_dihedral[i]; j++) { + index = atom->map(dihedral_atom2[i][j]); + if (index < 0 || index >= nlocal) continue; - comm->ring(size,sizeof(tagint),buf,8,ring_eight,NULL,(void *)this); + proclist[m] = hashlittle(&dihedral_atom1[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -2; + inbuf[m].atomID = dihedral_atom1[i][j]; + inbuf[m].partnerID = dihedral_atom4[i][j]; + m++; - // delete 1-4 neighbors if they are not flagged in dflag - - for (i = 0; i < nlocal; i++) { - m = 0; - for (j = 0; j < nspecial[i][2]; j++) - if (dflag[i][j]) onefour[i][m++] = onefour[i][j]; - nspecial[i][2] = m; + proclist[m] = hashlittle(&dihedral_atom4[i][j],sizeof(tagint),0) % nprocs; + inbuf[m].me = -2; + inbuf[m].atomID = dihedral_atom4[i][j]; + inbuf[m].partnerID = dihedral_atom1[i][j]; + m++; + } } - // clean up + // perform rendezvous operation + // each proc owns random subset of atoms + // func = compute bbox of each body, flag atom closest to geometric center + // when done: each atom has atom ID of owning atom of its body + + char *buf; + int nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous), + rendezvous_trim, + buf,sizeof(OutRvous),(void *) this); + OutRvous *outbuf = (OutRvous *) buf; - memory->destroy(dflag); - memory->destroy(buf); + memory->destroy(proclist); + memory->sfree(inbuf); + + // reset nspecial[2] and onefour for all owned atoms based on output info + + for (i = 0; i < nlocal; i++) nspecial[i][2] = 0; + + for (m = 0; m < nreturn; m++) { + i = atom->map(outbuf[m].atomID); + onefour[i][nspecial[i][2]++] = outbuf[m].partnerID; + } + + memory->destroy(outbuf); // if no dihedrals are defined, delete all 1-4 neighs @@ -888,262 +922,213 @@ void Special::dihedral_trim() } /* ---------------------------------------------------------------------- - when receive buffer, scan tags for atoms I own - when find one, increment nspecial count for that atom + process data for atoms assigned to me in rendezvous decomposition + inbuf = list of N InRvous datums + create outbuf = list of Nout OutRvous datums ------------------------------------------------------------------------- */ -void Special::ring_one(int ndatum, char *cbuf, void *ptr) +int Special::rendezvous_1234(int n, char *inbuf, + int *&proclist, char *&outbuf, + void *ptr) { + int i,j,m; + Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; + Memory *memory = sptr->memory; - tagint *buf = (tagint *) cbuf; - int m; + // setup hash + // ncount = number of atoms assigned to me + // key = atom ID + // value = index into Ncount-length data structure - for (int i = 0; i < ndatum; i++) { - m = atom->map(buf[i]); - if (m >= 0 && m < nlocal) nspecial[m][0]++; + InRvous *in = (InRvous *) inbuf; + std::map hash; + tagint id; + + int ncount = 0; + for (i = 0; i < n; i++) + if (in[i].me >= 0) + hash[in[i].atomID] = ncount++; + + // procowner = caller proc that owns each atom + // atomID = ID of each rendezvous atom I own + + int *procowner,*npartner; + tagint *atomID; + memory->create(procowner,ncount,"special:procowner"); + memory->create(atomID,ncount,"special:atomID"); + memory->create(npartner,ncount,"special:npartner"); + for (m = 0; m < ncount; m++) npartner[m] = 0; + + for (i = 0; i < n; i++) { + m = hash.find(in[i].atomID)->second; + if (in[i].me >= 0) { + procowner[m] = in[i].me; + atomID[m] = in[i].atomID; + } else npartner[m]++; } -} -/* ---------------------------------------------------------------------- - when receive buffer, scan 2nd-atom tags for atoms I own - when find one, add 1st-atom tag to onetwo list for 2nd atom -------------------------------------------------------------------------- */ + int max = 0; + for (m = 0; m < ncount; m++) + max = MAX(max,npartner[m]); + sptr->max_rvous = max; -void Special::ring_two(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int nlocal = atom->nlocal; + int **partner; + memory->create(partner,ncount,max,"special:partner"); + for (m = 0; m < ncount; m++) npartner[m] = 0; - tagint **onetwo = sptr->onetwo; - int *count = sptr->count; - - tagint *buf = (tagint *) cbuf; - int m; - - for (int i = 1; i < ndatum; i += 2) { - m = atom->map(buf[i]); - if (m >= 0 && m < nlocal) onetwo[m][count[m]++] = buf[i-1]; + for (i = 0; i < n; i++) { + if (in[i].me >= 0) continue; + m = hash.find(in[i].atomID)->second; + partner[m][npartner[m]++] = in[i].partnerID; } -} -/* ---------------------------------------------------------------------- - when receive buffer, scan list of 1-2 neighbors for atoms I own - when find one, increment 1-3 count by # of 1-2 neighbors of my atom, - subtracting one since my list will contain original atom -------------------------------------------------------------------------- */ + // pass list of OutRvous datums back to comm->rendezvous -void Special::ring_three(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; + int nout = 0; + for (m = 0; m < ncount; m++) nout += npartner[m]; - tagint *buf = (tagint *) cbuf; - int i,j,m,n,num12; + memory->create(proclist,nout,"special:proclist"); + OutRvous *out = (OutRvous *) + memory->smalloc((bigint) nout*sizeof(OutRvous),"special:out"); - i = 0; - while (i < ndatum) { - n = buf[i]; - num12 = buf[i+1]; - for (j = 0; j < num12; j++) { - m = atom->map(buf[i+2+j]); - if (m >= 0 && m < nlocal) - n += nspecial[m][0] - 1; + nout = 0; + for (m = 0; m < ncount; m++) + for (j = 0; j < npartner[m]; j++) { + proclist[nout] = procowner[m]; + out[nout].atomID = atomID[m]; + out[nout].partnerID = partner[m][j]; + nout++; } - buf[i] = n; - i += 2 + num12; - } + + outbuf = (char *) out; + + // clean up + // Comm::rendezvous will delete proclist and out (outbuf) + + memory->destroy(procowner); + memory->destroy(atomID); + memory->destroy(npartner); + memory->destroy(partner); + + return nout; } /* ---------------------------------------------------------------------- - when receive buffer, scan list of 1-2 neighbors for atoms I own - when find one, add its neighbors to 1-3 list - increment the count in buf(i+4) - exclude the atom whose tag = original - this process may include duplicates but they will be culled later + process data for atoms assigned to me in rendezvous decomposition + inbuf = list of N InRvous datums + create outbuf = list of Nout OutRvous datums ------------------------------------------------------------------------- */ -void Special::ring_four(int ndatum, char *cbuf, void *ptr) +int Special::rendezvous_trim(int n, char *inbuf, + int *&proclist, char *&outbuf, + void *ptr) { + int i,j,m; + Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; + Memory *memory = sptr->memory; - tagint **onetwo = sptr->onetwo; + // setup hash + // ncount = number of atoms assigned to me + // key = atom ID + // value = index into Ncount-length data structure - tagint *buf = (tagint *) cbuf; - tagint original; - int i,j,k,m,n,num12,num13; + InRvous *in = (InRvous *) inbuf; + std::map hash; + tagint id; + + int ncount = 0; + for (i = 0; i < n; i++) + if (in[i].me >= 0) + hash[in[i].atomID] = ncount++; - i = 0; - while (i < ndatum) { - original = buf[i]; - num12 = buf[i+1]; - num13 = buf[i+2]; - n = buf[i+3]; - for (j = 0; j < num12; j++) { - m = atom->map(buf[i+4+j]); - if (m >= 0 && m < nlocal) - for (k = 0; k < nspecial[m][0]; k++) - if (onetwo[m][k] != original) - buf[i+4+num12+(n++)] = onetwo[m][k]; + // procowner = caller proc that owns each atom + // atomID = ID of each rendezvous atom I own + // npartner = # of 1-3 partners for each atom I own + + int *procowner,*npartner; + tagint *atomID; + memory->create(procowner,ncount,"special:procowner"); + memory->create(atomID,ncount,"special:atomID"); + memory->create(npartner,ncount,"special:npartner"); + for (m = 0; m < ncount; m++) npartner[m] = 0; + + for (i = 0; i < n; i++) { + m = hash.find(in[i].atomID)->second; + if (in[i].me >= 0) { + procowner[m] = in[i].me; + atomID[m] = in[i].atomID; + } else if (in[i].me == -1) npartner[m]++; + } + + int max = 0; + for (m = 0; m < ncount; m++) max = MAX(max,npartner[m]); + + // partner = list of 1-3 or 1-4 partners for each atom I own + + int **partner; + memory->create(partner,ncount,max,"special:partner"); + for (m = 0; m < ncount; m++) npartner[m] = 0; + + for (i = 0; i < n; i++) { + if (in[i].me >= 0 || in[i].me == -2) continue; + m = hash.find(in[i].atomID)->second; + partner[m][npartner[m]++] = in[i].partnerID; + } + + // flag = 1 if partner is in an actual angle or in a dihedral + + int **flag; + memory->create(flag,ncount,max,"special:flag"); + + for (i = 0; i < ncount; i++) + for (j = 0; j < npartner[i]; j++) + flag[i][j] = 0; + + tagint actual; + for (i = 0; i < n; i++) { + if (in[i].me != -2) continue; + actual = in[i].partnerID; + m = hash.find(in[i].atomID)->second; + for (j = 0; j < npartner[m]; j++) + if (partner[m][j] == actual) { + flag[m][j] = 1; + break; + } + } + + // pass list of OutRvous datums back to comm->rendezvous + + int nout = 0; + for (m = 0; m < ncount; m++) nout += npartner[m]; + + memory->create(proclist,nout,"special:proclist"); + OutRvous *out = (OutRvous *) + memory->smalloc((bigint) nout*sizeof(OutRvous),"special:out"); + + nout = 0; + for (m = 0; m < ncount; m++) + for (j = 0; j < npartner[m]; j++) { + if (flag[m][j] == 0) continue; + proclist[nout] = procowner[m]; + out[nout].atomID = atomID[m]; + out[nout].partnerID = partner[m][j]; + nout++; } - buf[i+3] = n; - i += 4 + num12 + num13; - } -} -/* ---------------------------------------------------------------------- - when receive buffer, scan list of 1-3 neighbors for atoms I own - when find one, increment 1-4 count by # of 1-2 neighbors of my atom - may include duplicates and original atom but they will be culled later -------------------------------------------------------------------------- */ + outbuf = (char *) out; -void Special::ring_five(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; + // clean up + // Comm::rendezvous will delete proclist and out (outbuf) - tagint *buf = (tagint *) cbuf; - int i,j,m,n,num13; + memory->destroy(procowner); + memory->destroy(atomID); + memory->destroy(npartner); + memory->destroy(partner); + memory->destroy(flag); - i = 0; - while (i < ndatum) { - n = buf[i]; - num13 = buf[i+1]; - for (j = 0; j < num13; j++) { - m = atom->map(buf[i+2+j]); - if (m >= 0 && m < nlocal) n += nspecial[m][0]; - } - buf[i] = n; - i += 2 + num13; - } -} - -/* ---------------------------------------------------------------------- - when receive buffer, scan list of 1-3 neighbors for atoms I own - when find one, add its neighbors to 1-4 list - incrementing the count in buf(i+4) - this process may include duplicates but they will be culled later -------------------------------------------------------------------------- */ - -void Special::ring_six(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - tagint **onetwo = sptr->onetwo; - - tagint *buf = (tagint *) cbuf; - int i,j,k,m,n,num13,num14; - - i = 0; - while (i < ndatum) { - num13 = buf[i]; - num14 = buf[i+1]; - n = buf[i+2]; - for (j = 0; j < num13; j++) { - m = atom->map(buf[i+3+j]); - if (m >= 0 && m < nlocal) - for (k = 0; k < nspecial[m][0]; k++) - buf[i+3+num13+(n++)] = onetwo[m][k]; - } - buf[i+2] = n; - i += 3 + num13 + num14; - } -} - -/* ---------------------------------------------------------------------- - when receive buffer, scan list of 1,3 atoms looking for atoms I own - when find one, scan its 1-3 neigh list and mark I,J as in an angle -------------------------------------------------------------------------- */ - -void Special::ring_seven(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - tagint **onethree = sptr->onethree; - int **dflag = sptr->dflag; - - tagint *buf = (tagint *) cbuf; - tagint iglobal,jglobal; - int i,m,ilocal,jlocal; - - i = 0; - while (i < ndatum) { - iglobal = buf[i]; - jglobal = buf[i+1]; - ilocal = atom->map(iglobal); - jlocal = atom->map(jglobal); - if (ilocal >= 0 && ilocal < nlocal) - for (m = 0; m < nspecial[ilocal][1]; m++) - if (jglobal == onethree[ilocal][m]) { - dflag[ilocal][m] = 1; - break; - } - if (jlocal >= 0 && jlocal < nlocal) - for (m = 0; m < nspecial[jlocal][1]; m++) - if (iglobal == onethree[jlocal][m]) { - dflag[jlocal][m] = 1; - break; - } - i += 2; - } -} - -/* ---------------------------------------------------------------------- - when receive buffer, scan list of 1,4 atoms looking for atoms I own - when find one, scan its 1-4 neigh list and mark I,J as in a dihedral -------------------------------------------------------------------------- */ - -void Special::ring_eight(int ndatum, char *cbuf, void *ptr) -{ - Special *sptr = (Special *) ptr; - Atom *atom = sptr->atom; - int **nspecial = atom->nspecial; - int nlocal = atom->nlocal; - - tagint **onefour = sptr->onefour; - int **dflag = sptr->dflag; - - tagint *buf = (tagint *) cbuf; - tagint iglobal,jglobal; - int i,m,ilocal,jlocal; - - i = 0; - while (i < ndatum) { - iglobal = buf[i]; - jglobal = buf[i+1]; - ilocal = atom->map(iglobal); - jlocal = atom->map(jglobal); - if (ilocal >= 0 && ilocal < nlocal) - for (m = 0; m < nspecial[ilocal][2]; m++) - if (jglobal == onefour[ilocal][m]) { - dflag[ilocal][m] = 1; - break; - } - if (jlocal >= 0 && jlocal < nlocal) - for (m = 0; m < nspecial[jlocal][2]; m++) - if (iglobal == onefour[jlocal][m]) { - dflag[jlocal][m] = 1; - break; - } - i += 2; - } + return nout; } /* ---------------------------------------------------------------------- @@ -1159,3 +1144,15 @@ void Special::fix_alteration() modify->fix[ifix]->rebuild_special(); } +/* ---------------------------------------------------------------------- + print timing output +------------------------------------------------------------------------- */ + +void Special::timer_output(double time1) +{ + double t2 = MPI_Wtime(); + if (comm->me == 0) { + if (screen) fprintf(screen," special bonds CPU = %g secs\n",t2-t1); + if (logfile) fprintf(logfile," special bonds CPU = %g secs\n",t2-t1); + } +} diff --git a/src/special.h b/src/special.h index 9f25200336..f7892075ac 100644 --- a/src/special.h +++ b/src/special.h @@ -28,27 +28,30 @@ class Special : protected Pointers { int me,nprocs; tagint **onetwo,**onethree,**onefour; - // data used by ring callback methods + // data used by rendezvous callback methods - int *count; - int **dflag; + int max_rvous; + + struct InRvous { + int me; + tagint atomID,partnerID; + }; + + struct OutRvous { + tagint atomID,partnerID; + }; void dedup(); void angle_trim(); void dihedral_trim(); void combine(); void fix_alteration(); + void timer_output(double); - // callback functions for ring communication + // callback function for rendezvous communication - static void ring_one(int, char *, void *); - static void ring_two(int, char *, void *); - static void ring_three(int, char *, void *); - static void ring_four(int, char *, void *); - static void ring_five(int, char *, void *); - static void ring_six(int, char *, void *); - static void ring_seven(int, char *, void *); - static void ring_eight(int, char *, void *); + static int rendezvous_1234(int, char *, int *&, char *&, void *); + static int rendezvous_trim(int, char *, int *&, char *&, void *); }; }