mirror of https://github.com/lammps/lammps.git
Merge pull request #1384 from lammps/rendezvous2
second attempt at adding rendezvous protocol for some to some communication.
This commit is contained in:
commit
28a6dcd1c3
|
@ -28,12 +28,14 @@
|
|||
#include "modify.h"
|
||||
#include "group.h"
|
||||
#include "comm.h"
|
||||
#include "neighbor.h"
|
||||
#include "force.h"
|
||||
#include "input.h"
|
||||
#include "output.h"
|
||||
#include "variable.h"
|
||||
#include "random_mars.h"
|
||||
#include "math_const.h"
|
||||
#include "hashlittle.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
|
@ -43,6 +45,8 @@ using namespace LAMMPS_NS;
|
|||
using namespace FixConst;
|
||||
using namespace MathConst;
|
||||
|
||||
#define RVOUS 1 // 0 for irregular, 1 for all2all
|
||||
|
||||
#define MAXLINE 1024
|
||||
#define CHUNK 1024
|
||||
#define ATTRIBUTE_PERBODY 20
|
||||
|
@ -70,8 +74,7 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
|
|||
xcmimage(NULL), displace(NULL), eflags(NULL), orient(NULL), dorient(NULL),
|
||||
avec_ellipsoid(NULL), avec_line(NULL), avec_tri(NULL), counts(NULL),
|
||||
itensor(NULL), mass_body(NULL), langextra(NULL), random(NULL),
|
||||
id_dilate(NULL), onemols(NULL), hash(NULL), bbox(NULL), ctr(NULL),
|
||||
idclose(NULL), rsqclose(NULL)
|
||||
id_dilate(NULL), onemols(NULL)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -107,18 +110,18 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
|
|||
// parse args for rigid body specification
|
||||
|
||||
int *mask = atom->mask;
|
||||
tagint *bodyid = NULL;
|
||||
tagint *bodyID = NULL;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
if (narg < 4) error->all(FLERR,"Illegal fix rigid/small command");
|
||||
if (strcmp(arg[3],"molecule") == 0) {
|
||||
if (atom->molecule_flag == 0)
|
||||
error->all(FLERR,"Fix rigid/small requires atom attribute molecule");
|
||||
bodyid = atom->molecule;
|
||||
bodyID = atom->molecule;
|
||||
|
||||
} else if (strcmp(arg[3],"custom") == 0) {
|
||||
if (narg < 5) error->all(FLERR,"Illegal fix rigid/small command");
|
||||
bodyid = new tagint[nlocal];
|
||||
bodyID = new tagint[nlocal];
|
||||
customflag = 1;
|
||||
|
||||
// determine whether atom-style variable or atom property is used.
|
||||
|
@ -126,9 +129,11 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
|
|||
int is_double=0;
|
||||
int custom_index = atom->find_custom(arg[4]+2,is_double);
|
||||
if (custom_index == -1)
|
||||
error->all(FLERR,"Fix rigid/small custom requires previously defined property/atom");
|
||||
error->all(FLERR,"Fix rigid/small custom requires "
|
||||
"previously defined property/atom");
|
||||
else if (is_double)
|
||||
error->all(FLERR,"Fix rigid/small custom requires integer-valued property/atom");
|
||||
error->all(FLERR,"Fix rigid/small custom requires "
|
||||
"integer-valued property/atom");
|
||||
|
||||
int minval = INT_MAX;
|
||||
int *value = atom->ivector[custom_index];
|
||||
|
@ -139,15 +144,17 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
|
|||
|
||||
for (i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit)
|
||||
bodyid[i] = (tagint)(value[i] - minval + 1);
|
||||
else bodyid[i] = 0;
|
||||
bodyID[i] = (tagint)(value[i] - minval + 1);
|
||||
else bodyID[i] = 0;
|
||||
|
||||
} else if (strstr(arg[4],"v_") == arg[4]) {
|
||||
int ivariable = input->variable->find(arg[4]+2);
|
||||
if (ivariable < 0)
|
||||
error->all(FLERR,"Variable name for fix rigid/small custom does not exist");
|
||||
error->all(FLERR,"Variable name for fix rigid/small custom "
|
||||
"does not exist");
|
||||
if (input->variable->atomstyle(ivariable) == 0)
|
||||
error->all(FLERR,"Fix rigid/small custom variable is no atom-style variable");
|
||||
error->all(FLERR,"Fix rigid/small custom variable is not "
|
||||
"atom-style variable");
|
||||
double *value = new double[nlocal];
|
||||
input->variable->compute_atom(ivariable,0,value,1,0);
|
||||
int minval = INT_MAX;
|
||||
|
@ -158,8 +165,8 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
|
|||
|
||||
for (i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit)
|
||||
bodyid[i] = (tagint)((tagint)value[i] - minval + 1);
|
||||
else bodyid[0] = 0;
|
||||
bodyID[i] = (tagint)((tagint)value[i] - minval + 1);
|
||||
else bodyID[0] = 0;
|
||||
delete[] value;
|
||||
} else error->all(FLERR,"Unsupported fix rigid custom property");
|
||||
} else error->all(FLERR,"Illegal fix rigid/small command");
|
||||
|
@ -167,10 +174,11 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
|
|||
if (atom->map_style == 0)
|
||||
error->all(FLERR,"Fix rigid/small requires an atom map, see atom_modify");
|
||||
|
||||
// maxmol = largest bodyid #
|
||||
// maxmol = largest bodyID #
|
||||
|
||||
maxmol = -1;
|
||||
for (i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit) maxmol = MAX(maxmol,bodyid[i]);
|
||||
if (mask[i] & groupbit) maxmol = MAX(maxmol,bodyID[i]);
|
||||
|
||||
tagint itmp;
|
||||
MPI_Allreduce(&maxmol,&itmp,1,MPI_LMP_TAGINT,MPI_MAX,world);
|
||||
|
@ -400,8 +408,19 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
|
|||
// sets bodytag for owned atoms
|
||||
// body attributes are computed later by setup_bodies()
|
||||
|
||||
create_bodies(bodyid);
|
||||
if (customflag) delete [] bodyid;
|
||||
double time1 = MPI_Wtime();
|
||||
|
||||
create_bodies(bodyID);
|
||||
if (customflag) delete [] bodyID;
|
||||
|
||||
double time2 = MPI_Wtime();
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen," create bodies CPU = %g secs\n",time2-time1);
|
||||
if (logfile)
|
||||
fprintf(logfile," create bodies CPU = %g secs\n",time2-time1);
|
||||
}
|
||||
|
||||
// set nlocal_body and allocate bodies I own
|
||||
|
||||
|
@ -569,7 +588,8 @@ void FixRigidSmall::init()
|
|||
if (rflag && (modify->fmask[i] & POST_FORCE) &&
|
||||
!modify->fix[i]->rigid_flag) {
|
||||
char str[128];
|
||||
snprintf(str,128,"Fix %s alters forces after fix rigid",modify->fix[i]->id);
|
||||
snprintf(str,128,"Fix %s alters forces after fix rigid",
|
||||
modify->fix[i]->id);
|
||||
error->warning(FLERR,str);
|
||||
}
|
||||
}
|
||||
|
@ -633,6 +653,16 @@ void FixRigidSmall::setup(int vflag)
|
|||
{
|
||||
int i,n,ibody;
|
||||
|
||||
// error if maxextent > comm->cutghost
|
||||
// NOTE: could just warn if an override flag set
|
||||
// NOTE: this could fail for comm multi mode if user sets a wrong cutoff
|
||||
// for atom types in rigid bodies - need a more careful test
|
||||
// must check here, not in init, b/c neigh/comm values set after fix init
|
||||
|
||||
double cutghost = MAX(neighbor->cutneighmax,comm->cutghostuser);
|
||||
if (maxextent > cutghost)
|
||||
error->all(FLERR,"Rigid body extent > ghost cutoff - use comm_modify cutoff");
|
||||
|
||||
//check(1);
|
||||
|
||||
// sum fcm, torque across all rigid bodies
|
||||
|
@ -1514,175 +1544,72 @@ void FixRigidSmall::set_v()
|
|||
set bodytag for all owned atoms
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void FixRigidSmall::create_bodies(tagint *bodyid)
|
||||
void FixRigidSmall::create_bodies(tagint *bodyID)
|
||||
{
|
||||
int i,m,n;
|
||||
double unwrap[3];
|
||||
int i,m;
|
||||
|
||||
// error check on image flags of atoms in rigid bodies
|
||||
// allocate buffer for input to rendezvous comm
|
||||
// ncount = # of my atoms in bodies
|
||||
|
||||
imageint *image = atom->image;
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
int *periodicity = domain->periodicity;
|
||||
int xbox,ybox,zbox;
|
||||
|
||||
int flag = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
if (!(mask[i] & groupbit)) continue;
|
||||
xbox = (image[i] & IMGMASK) - IMGMAX;
|
||||
ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
|
||||
zbox = (image[i] >> IMG2BITS) - IMGMAX;
|
||||
if ((xbox && !periodicity[0]) || (ybox && !periodicity[1]) ||
|
||||
(zbox && !periodicity[2])) flag = 1;
|
||||
}
|
||||
|
||||
int flagall;
|
||||
MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
|
||||
if (flagall) error->all(FLERR,"Fix rigid/small atom has non-zero image flag "
|
||||
"in a non-periodic dimension");
|
||||
|
||||
// allocate buffer for passing messages around ring of procs
|
||||
// percount = max number of values to put in buffer for each of ncount
|
||||
|
||||
int ncount = 0;
|
||||
for (i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit) ncount++;
|
||||
|
||||
int percount = 5;
|
||||
double *buf;
|
||||
memory->create(buf,ncount*percount,"rigid/small:buf");
|
||||
int *proclist;
|
||||
memory->create(proclist,ncount,"rigid/small:proclist");
|
||||
InRvous *inbuf = (InRvous *)
|
||||
memory->smalloc(ncount*sizeof(InRvous),"rigid/small:inbuf");
|
||||
|
||||
// create map hash for storing unique body IDs of my atoms
|
||||
// key = body ID
|
||||
// value = index into per-body data structure
|
||||
// n = # of entries in hash
|
||||
|
||||
hash = new std::map<tagint,int>();
|
||||
hash->clear();
|
||||
|
||||
// setup hash
|
||||
// key = body ID
|
||||
// value = index into N-length data structure
|
||||
// n = count of unique bodies my atoms are part of
|
||||
|
||||
n = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
if (!(mask[i] & groupbit)) continue;
|
||||
if (hash->find(bodyid[i]) == hash->end()) (*hash)[bodyid[i]] = n++;
|
||||
}
|
||||
|
||||
// bbox = bounding box of each rigid body my atoms are part of
|
||||
|
||||
memory->create(bbox,n,6,"rigid/small:bbox");
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
bbox[i][0] = bbox[i][2] = bbox[i][4] = BIG;
|
||||
bbox[i][1] = bbox[i][3] = bbox[i][5] = -BIG;
|
||||
}
|
||||
|
||||
// pack my atoms into buffer as body ID, unwrapped coords
|
||||
// setup buf to pass to rendezvous comm
|
||||
// one BodyMsg datum for each constituent atom
|
||||
// datum = me, local index of atom, atomID, bodyID, unwrapped coords
|
||||
// owning proc for each datum = random hash of bodyID
|
||||
|
||||
double **x = atom->x;
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
if (!(mask[i] & groupbit)) continue;
|
||||
domain->unmap(x[i],image[i],unwrap);
|
||||
buf[m++] = bodyid[i];
|
||||
buf[m++] = unwrap[0];
|
||||
buf[m++] = unwrap[1];
|
||||
buf[m++] = unwrap[2];
|
||||
}
|
||||
|
||||
// pass buffer around ring of procs
|
||||
// func = update bbox with atom coords from every proc
|
||||
// when done, have full bbox for every rigid body my atoms are part of
|
||||
|
||||
comm->ring(m,sizeof(double),buf,1,ring_bbox,NULL,(void *)this);
|
||||
|
||||
// check if any bbox is size 0.0, meaning rigid body is a single particle
|
||||
|
||||
flag = 0;
|
||||
for (i = 0; i < n; i++)
|
||||
if (bbox[i][0] == bbox[i][1] && bbox[i][2] == bbox[i][3] &&
|
||||
bbox[i][4] == bbox[i][5]) flag = 1;
|
||||
MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
|
||||
if (flagall)
|
||||
error->all(FLERR,"One or more rigid bodies are a single particle");
|
||||
|
||||
// ctr = center pt of each rigid body my atoms are part of
|
||||
|
||||
memory->create(ctr,n,6,"rigid/small:bbox");
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
ctr[i][0] = 0.5 * (bbox[i][0] + bbox[i][1]);
|
||||
ctr[i][1] = 0.5 * (bbox[i][2] + bbox[i][3]);
|
||||
ctr[i][2] = 0.5 * (bbox[i][4] + bbox[i][5]);
|
||||
}
|
||||
|
||||
// idclose = ID of atom in body closest to center pt (smaller ID if tied)
|
||||
// rsqclose = distance squared from idclose to center pt
|
||||
|
||||
memory->create(idclose,n,"rigid/small:idclose");
|
||||
memory->create(rsqclose,n,"rigid/small:rsqclose");
|
||||
|
||||
for (i = 0; i < n; i++) rsqclose[i] = BIG;
|
||||
|
||||
// pack my atoms into buffer as body ID, atom ID, unwrapped coords
|
||||
|
||||
tagint *tag = atom->tag;
|
||||
imageint *image = atom->image;
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
if (!(mask[i] & groupbit)) continue;
|
||||
domain->unmap(x[i],image[i],unwrap);
|
||||
buf[m++] = bodyid[i];
|
||||
buf[m++] = ubuf(tag[i]).d;
|
||||
buf[m++] = unwrap[0];
|
||||
buf[m++] = unwrap[1];
|
||||
buf[m++] = unwrap[2];
|
||||
proclist[m] = hashlittle(&bodyID[i],sizeof(tagint),0) % nprocs;
|
||||
inbuf[m].me = me;
|
||||
inbuf[m].ilocal = i;
|
||||
inbuf[m].atomID = tag[i];
|
||||
inbuf[m].bodyID = bodyID[i];
|
||||
domain->unmap(x[i],image[i],inbuf[m].x);
|
||||
m++;
|
||||
}
|
||||
|
||||
// pass buffer around ring of procs
|
||||
// func = update idclose,rsqclose with atom IDs from every proc
|
||||
// when done, have idclose for every rigid body my atoms are part of
|
||||
// perform rendezvous operation
|
||||
// each proc owns random subset of bodies
|
||||
// receives all atoms in those bodies
|
||||
// func = compute bbox of each body, find atom closest to geometric center
|
||||
|
||||
comm->ring(m,sizeof(double),buf,2,ring_nearest,NULL,(void *)this);
|
||||
char *buf;
|
||||
int nreturn = comm->rendezvous(RVOUS,ncount,(char *) inbuf,sizeof(InRvous),
|
||||
0,proclist,
|
||||
rendezvous_body,0,buf,sizeof(OutRvous),
|
||||
(void *) this);
|
||||
OutRvous *outbuf = (OutRvous *) buf;
|
||||
|
||||
// set bodytag of all owned atoms, based on idclose
|
||||
// find max value of rsqclose across all procs
|
||||
memory->destroy(proclist);
|
||||
memory->sfree(inbuf);
|
||||
|
||||
double rsqmax = 0.0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
bodytag[i] = 0;
|
||||
if (!(mask[i] & groupbit)) continue;
|
||||
m = hash->find(bodyid[i])->second;
|
||||
bodytag[i] = idclose[m];
|
||||
rsqmax = MAX(rsqmax,rsqclose[m]);
|
||||
}
|
||||
// set bodytag of all owned atoms based on outbuf info for constituent atoms
|
||||
|
||||
// pack my atoms into buffer as bodytag of owning atom, unwrapped coords
|
||||
for (i = 0; i < nlocal; i++)
|
||||
if (!(mask[i] & groupbit)) bodytag[i] = 0;
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
if (!(mask[i] & groupbit)) continue;
|
||||
domain->unmap(x[i],image[i],unwrap);
|
||||
buf[m++] = ubuf(bodytag[i]).d;
|
||||
buf[m++] = unwrap[0];
|
||||
buf[m++] = unwrap[1];
|
||||
buf[m++] = unwrap[2];
|
||||
}
|
||||
for (m = 0; m < nreturn; m++)
|
||||
bodytag[outbuf[m].ilocal] = outbuf[m].atomID;
|
||||
|
||||
// pass buffer around ring of procs
|
||||
// func = update rsqfar for atoms belonging to bodies I own
|
||||
// when done, have rsqfar for all atoms in bodies I own
|
||||
memory->sfree(outbuf);
|
||||
|
||||
rsqfar = 0.0;
|
||||
comm->ring(m,sizeof(double),buf,3,ring_farthest,NULL,(void *)this);
|
||||
|
||||
// find maxextent of rsqfar across all procs
|
||||
// maxextent = max of rsqfar across all procs
|
||||
// if defined, include molecule->maxextent
|
||||
|
||||
MPI_Allreduce(&rsqfar,&maxextent,1,MPI_DOUBLE,MPI_MAX,world);
|
||||
|
@ -1691,125 +1618,156 @@ void FixRigidSmall::create_bodies(tagint *bodyid)
|
|||
for (int i = 0; i < nmol; i++)
|
||||
maxextent = MAX(maxextent,onemols[i]->maxextent);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
process rigid bodies assigned to me
|
||||
buf = list of N BodyMsg datums
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int FixRigidSmall::rendezvous_body(int n, char *inbuf,
|
||||
int &rflag, int *&proclist, char *&outbuf,
|
||||
void *ptr)
|
||||
{
|
||||
int i,m;
|
||||
double delx,dely,delz,rsq;
|
||||
int *iclose;
|
||||
tagint *idclose;
|
||||
double *x,*xown,*rsqclose;
|
||||
double **bbox,**ctr;
|
||||
|
||||
FixRigidSmall *frsptr = (FixRigidSmall *) ptr;
|
||||
Memory *memory = frsptr->memory;
|
||||
Error *error = frsptr->error;
|
||||
MPI_Comm world = frsptr->world;
|
||||
|
||||
// setup hash
|
||||
// use STL map instead of atom->map
|
||||
// b/c know nothing about body ID values specified by user
|
||||
// ncount = number of bodies assigned to me
|
||||
// key = body ID
|
||||
// value = index into Ncount-length data structure
|
||||
|
||||
InRvous *in = (InRvous *) inbuf;
|
||||
std::map<tagint,int> hash;
|
||||
tagint id;
|
||||
|
||||
int ncount = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
id = in[i].bodyID;
|
||||
if (hash.find(id) == hash.end()) hash[id] = ncount++;
|
||||
}
|
||||
|
||||
// bbox = bounding box of each rigid body
|
||||
|
||||
memory->create(bbox,ncount,6,"rigid/small:bbox");
|
||||
|
||||
for (m = 0; m < ncount; m++) {
|
||||
bbox[m][0] = bbox[m][2] = bbox[m][4] = BIG;
|
||||
bbox[m][1] = bbox[m][3] = bbox[m][5] = -BIG;
|
||||
}
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
m = hash.find(in[i].bodyID)->second;
|
||||
x = in[i].x;
|
||||
bbox[m][0] = MIN(bbox[m][0],x[0]);
|
||||
bbox[m][1] = MAX(bbox[m][1],x[0]);
|
||||
bbox[m][2] = MIN(bbox[m][2],x[1]);
|
||||
bbox[m][3] = MAX(bbox[m][3],x[1]);
|
||||
bbox[m][4] = MIN(bbox[m][4],x[2]);
|
||||
bbox[m][5] = MAX(bbox[m][5],x[2]);
|
||||
}
|
||||
|
||||
// check if any bbox is size 0.0, meaning rigid body is a single particle
|
||||
|
||||
int flag = 0;
|
||||
for (m = 0; m < ncount; m++)
|
||||
if (bbox[m][0] == bbox[m][1] && bbox[m][2] == bbox[m][3] &&
|
||||
bbox[m][4] == bbox[m][5]) flag = 1;
|
||||
int flagall;
|
||||
MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); // sync here?
|
||||
if (flagall)
|
||||
error->all(FLERR,"One or more rigid bodies are a single particle");
|
||||
|
||||
// ctr = geometric center pt of each rigid body
|
||||
|
||||
memory->create(ctr,ncount,3,"rigid/small:bbox");
|
||||
|
||||
for (m = 0; m < ncount; m++) {
|
||||
ctr[m][0] = 0.5 * (bbox[m][0] + bbox[m][1]);
|
||||
ctr[m][1] = 0.5 * (bbox[m][2] + bbox[m][3]);
|
||||
ctr[m][2] = 0.5 * (bbox[m][4] + bbox[m][5]);
|
||||
}
|
||||
|
||||
// idclose = atomID closest to center point of each body
|
||||
|
||||
memory->create(idclose,ncount,"rigid/small:idclose");
|
||||
memory->create(iclose,ncount,"rigid/small:iclose");
|
||||
memory->create(rsqclose,ncount,"rigid/small:rsqclose");
|
||||
for (m = 0; m < ncount; m++) rsqclose[m] = BIG;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
m = hash.find(in[i].bodyID)->second;
|
||||
x = in[i].x;
|
||||
delx = x[0] - ctr[m][0];
|
||||
dely = x[1] - ctr[m][1];
|
||||
delz = x[2] - ctr[m][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
if (rsq <= rsqclose[m]) {
|
||||
if (rsq == rsqclose[m] && in[i].atomID > idclose[m]) continue;
|
||||
iclose[m] = i;
|
||||
idclose[m] = in[i].atomID;
|
||||
rsqclose[m] = rsq;
|
||||
}
|
||||
}
|
||||
|
||||
// compute rsqfar for all bodies I own
|
||||
// set rsqfar back in caller
|
||||
|
||||
double rsqfar = 0.0;
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
m = hash.find(in[i].bodyID)->second;
|
||||
xown = in[iclose[m]].x;
|
||||
x = in[i].x;
|
||||
delx = x[0] - xown[0];
|
||||
dely = x[1] - xown[1];
|
||||
delz = x[2] - xown[2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
rsqfar = MAX(rsqfar,rsq);
|
||||
}
|
||||
|
||||
frsptr->rsqfar = rsqfar;
|
||||
|
||||
// pass list of OutRvous datums back to comm->rendezvous
|
||||
|
||||
int nout = n;
|
||||
memory->create(proclist,nout,"rigid/small:proclist");
|
||||
OutRvous *out = (OutRvous *)
|
||||
memory->smalloc(nout*sizeof(OutRvous),"rigid/small:out");
|
||||
|
||||
for (int i = 0; i < nout; i++) {
|
||||
proclist[i] = in[i].me;
|
||||
out[i].ilocal = in[i].ilocal;
|
||||
m = hash.find(in[i].bodyID)->second;
|
||||
out[i].atomID = idclose[m];
|
||||
}
|
||||
|
||||
outbuf = (char *) out;
|
||||
|
||||
// clean up
|
||||
// Comm::rendezvous will delete proclist and out (outbuf)
|
||||
|
||||
delete hash;
|
||||
memory->destroy(buf);
|
||||
memory->destroy(bbox);
|
||||
memory->destroy(ctr);
|
||||
memory->destroy(idclose);
|
||||
memory->destroy(iclose);
|
||||
memory->destroy(rsqclose);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
process rigid body atoms from another proc
|
||||
update bounding box for rigid bodies my atoms are part of
|
||||
------------------------------------------------------------------------- */
|
||||
// flag = 2: new outbuf
|
||||
|
||||
void FixRigidSmall::ring_bbox(int n, char *cbuf, void *ptr)
|
||||
{
|
||||
FixRigidSmall *frsptr = (FixRigidSmall *) ptr;
|
||||
std::map<tagint,int> *hash = frsptr->hash;
|
||||
double **bbox = frsptr->bbox;
|
||||
|
||||
double *buf = (double *) cbuf;
|
||||
int ndatums = n/4;
|
||||
|
||||
int j,imol;
|
||||
double *x;
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < ndatums; i++, m += 4) {
|
||||
imol = static_cast<int> (buf[m]);
|
||||
if (hash->find(imol) != hash->end()) {
|
||||
j = hash->find(imol)->second;
|
||||
x = &buf[m+1];
|
||||
bbox[j][0] = MIN(bbox[j][0],x[0]);
|
||||
bbox[j][1] = MAX(bbox[j][1],x[0]);
|
||||
bbox[j][2] = MIN(bbox[j][2],x[1]);
|
||||
bbox[j][3] = MAX(bbox[j][3],x[1]);
|
||||
bbox[j][4] = MIN(bbox[j][4],x[2]);
|
||||
bbox[j][5] = MAX(bbox[j][5],x[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
process rigid body atoms from another proc
|
||||
update nearest atom to body center for rigid bodies my atoms are part of
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void FixRigidSmall::ring_nearest(int n, char *cbuf, void *ptr)
|
||||
{
|
||||
FixRigidSmall *frsptr = (FixRigidSmall *) ptr;
|
||||
std::map<tagint,int> *hash = frsptr->hash;
|
||||
double **ctr = frsptr->ctr;
|
||||
tagint *idclose = frsptr->idclose;
|
||||
double *rsqclose = frsptr->rsqclose;
|
||||
|
||||
double *buf = (double *) cbuf;
|
||||
int ndatums = n/5;
|
||||
|
||||
int j,imol;
|
||||
tagint tag;
|
||||
double delx,dely,delz,rsq;
|
||||
double *x;
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < ndatums; i++, m += 5) {
|
||||
imol = static_cast<int> (buf[m]);
|
||||
if (hash->find(imol) != hash->end()) {
|
||||
j = hash->find(imol)->second;
|
||||
tag = (tagint) ubuf(buf[m+1]).i;
|
||||
x = &buf[m+2];
|
||||
delx = x[0] - ctr[j][0];
|
||||
dely = x[1] - ctr[j][1];
|
||||
delz = x[2] - ctr[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
if (rsq <= rsqclose[j]) {
|
||||
if (rsq == rsqclose[j] && tag > idclose[j]) continue;
|
||||
idclose[j] = tag;
|
||||
rsqclose[j] = rsq;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
process rigid body atoms from another proc
|
||||
update rsqfar = distance from owning atom to other atom
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void FixRigidSmall::ring_farthest(int n, char *cbuf, void *ptr)
|
||||
{
|
||||
FixRigidSmall *frsptr = (FixRigidSmall *) ptr;
|
||||
double **x = frsptr->atom->x;
|
||||
imageint *image = frsptr->atom->image;
|
||||
int nlocal = frsptr->atom->nlocal;
|
||||
|
||||
double *buf = (double *) cbuf;
|
||||
int ndatums = n/4;
|
||||
|
||||
int iowner;
|
||||
tagint tag;
|
||||
double delx,dely,delz,rsq;
|
||||
double *xx;
|
||||
double unwrap[3];
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < ndatums; i++, m += 4) {
|
||||
tag = (tagint) ubuf(buf[m]).i;
|
||||
iowner = frsptr->atom->map(tag);
|
||||
if (iowner < 0 || iowner >= nlocal) continue;
|
||||
frsptr->domain->unmap(x[iowner],image[iowner],unwrap);
|
||||
xx = &buf[m+1];
|
||||
delx = xx[0] - unwrap[0];
|
||||
dely = xx[1] - unwrap[1];
|
||||
delz = xx[2] - unwrap[2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
frsptr->rsqfar = MAX(frsptr->rsqfar,rsq);
|
||||
}
|
||||
rflag = 2;
|
||||
return nout;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -2472,9 +2430,9 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody)
|
|||
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
hash = new std::map<tagint,int>();
|
||||
std::map<tagint,int> hash;
|
||||
for (i = 0; i < nlocal; i++)
|
||||
if (bodyown[i] >= 0) (*hash)[atom->molecule[i]] = bodyown[i];
|
||||
if (bodyown[i] >= 0) hash[atom->molecule[i]] = bodyown[i];
|
||||
|
||||
// open file and read header
|
||||
|
||||
|
@ -2533,11 +2491,11 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody)
|
|||
id = ATOTAGINT(values[0]);
|
||||
if (id <= 0 || id > maxmol)
|
||||
error->all(FLERR,"Invalid rigid body ID in fix rigid/small file");
|
||||
if (hash->find(id) == hash->end()) {
|
||||
if (hash.find(id) == hash.end()) {
|
||||
buf = next + 1;
|
||||
continue;
|
||||
}
|
||||
m = (*hash)[id];
|
||||
m = hash[id];
|
||||
inbody[m] = 1;
|
||||
|
||||
if (which == 0) {
|
||||
|
@ -2576,7 +2534,6 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody)
|
|||
|
||||
delete [] buffer;
|
||||
delete [] values;
|
||||
delete hash;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
|
|
@ -22,9 +22,6 @@ FixStyle(rigid/small,FixRigidSmall)
|
|||
|
||||
#include "fix.h"
|
||||
|
||||
// replace this later
|
||||
#include <map>
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixRigidSmall : public Fix {
|
||||
|
@ -180,13 +177,21 @@ class FixRigidSmall : public Fix {
|
|||
|
||||
// class data used by ring communication callbacks
|
||||
|
||||
std::map<tagint,int> *hash;
|
||||
double **bbox;
|
||||
double **ctr;
|
||||
tagint *idclose;
|
||||
double *rsqclose;
|
||||
double rsqfar;
|
||||
|
||||
struct InRvous {
|
||||
int me,ilocal;
|
||||
tagint atomID,bodyID;
|
||||
double x[3];
|
||||
};
|
||||
|
||||
struct OutRvous {
|
||||
int ilocal;
|
||||
tagint atomID;
|
||||
};
|
||||
|
||||
// local methods
|
||||
|
||||
void image_shift();
|
||||
void set_xv();
|
||||
void set_v();
|
||||
|
@ -199,11 +204,9 @@ class FixRigidSmall : public Fix {
|
|||
void grow_body();
|
||||
void reset_atom2body();
|
||||
|
||||
// callback functions for ring communication
|
||||
// callback function for rendezvous communication
|
||||
|
||||
static void ring_bbox(int, char *, void *);
|
||||
static void ring_nearest(int, char *, void *);
|
||||
static void ring_farthest(int, char *, void *);
|
||||
static int rendezvous_body(int, char *, int &, int *&, char *&, void *);
|
||||
|
||||
// debug
|
||||
|
||||
|
|
|
@ -39,6 +39,8 @@ using namespace LAMMPS_NS;
|
|||
using namespace FixConst;
|
||||
using namespace MathConst;
|
||||
|
||||
#define RVOUS 1 // 0 for irregular, 1 for all2all
|
||||
|
||||
#define BIG 1.0e20
|
||||
#define MASSDELTA 0.1
|
||||
|
||||
|
@ -219,8 +221,19 @@ FixShake::FixShake(LAMMPS *lmp, int narg, char **arg) :
|
|||
|
||||
// identify all SHAKE clusters
|
||||
|
||||
double time1 = MPI_Wtime();
|
||||
|
||||
find_clusters();
|
||||
|
||||
double time2 = MPI_Wtime();
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen," find clusters CPU = %g secs\n",time2-time1);
|
||||
if (logfile)
|
||||
fprintf(logfile," find clusters CPU = %g secs\n",time2-time1);
|
||||
}
|
||||
|
||||
// initialize list of SHAKE clusters to constrain
|
||||
|
||||
maxlist = 0;
|
||||
|
@ -681,10 +694,9 @@ int FixShake::dof(int igroup)
|
|||
void FixShake::find_clusters()
|
||||
{
|
||||
int i,j,m,n,imol,iatom;
|
||||
int flag,flag_all,nbuf,size;
|
||||
int flag,flag_all;
|
||||
tagint tagprev;
|
||||
double massone;
|
||||
tagint *buf;
|
||||
|
||||
if (me == 0 && screen) {
|
||||
if (!rattle) fprintf(screen,"Finding SHAKE clusters ...\n");
|
||||
|
@ -707,13 +719,6 @@ void FixShake::find_clusters()
|
|||
int nlocal = atom->nlocal;
|
||||
int angles_allow = atom->avec->angles_allow;
|
||||
|
||||
// setup ring of procs
|
||||
|
||||
int next = me + 1;
|
||||
int prev = me -1;
|
||||
if (next == nprocs) next = 0;
|
||||
if (prev < 0) prev = nprocs - 1;
|
||||
|
||||
// -----------------------------------------------------
|
||||
// allocate arrays for self (1d) and bond partners (2d)
|
||||
// max = max # of bond partners for owned atoms = 2nd dim of partner arrays
|
||||
|
@ -755,6 +760,10 @@ void FixShake::find_clusters()
|
|||
memory->create(partner_shake,nlocal,max,"shake:partner_shake");
|
||||
memory->create(partner_nshake,nlocal,max,"shake:partner_nshake");
|
||||
|
||||
// setup atomIDs and procowner vectors in rendezvous decomposition
|
||||
|
||||
atom_owners();
|
||||
|
||||
// -----------------------------------------------------
|
||||
// set npartner and partner_tag from special arrays
|
||||
// -----------------------------------------------------
|
||||
|
@ -778,86 +787,13 @@ void FixShake::find_clusters()
|
|||
}
|
||||
|
||||
// -----------------------------------------------------
|
||||
// set partner_mask, partner_type, partner_massflag, partner_bondtype
|
||||
// for bonded partners
|
||||
// requires communication for off-proc partners
|
||||
// set partner_mask, partner_type, partner_massflag,
|
||||
// partner_bondtype for all my bonded partners
|
||||
// requires rendezvous communication for off-proc partners
|
||||
// -----------------------------------------------------
|
||||
|
||||
// fill in mask, type, massflag, bondtype if own bond partner
|
||||
// info to store in buf for each off-proc bond = nper = 6
|
||||
// 2 atoms IDs in bond, space for mask, type, massflag, bondtype
|
||||
// nbufmax = largest buffer needed to hold info from any proc
|
||||
|
||||
int nper = 6;
|
||||
|
||||
nbuf = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
for (j = 0; j < npartner[i]; j++) {
|
||||
partner_mask[i][j] = 0;
|
||||
partner_type[i][j] = 0;
|
||||
partner_massflag[i][j] = 0;
|
||||
partner_bondtype[i][j] = 0;
|
||||
|
||||
m = atom->map(partner_tag[i][j]);
|
||||
if (m >= 0 && m < nlocal) {
|
||||
partner_mask[i][j] = mask[m];
|
||||
partner_type[i][j] = type[m];
|
||||
if (nmass) {
|
||||
if (rmass) massone = rmass[m];
|
||||
else massone = mass[type[m]];
|
||||
partner_massflag[i][j] = masscheck(massone);
|
||||
}
|
||||
n = bondtype_findset(i,tag[i],partner_tag[i][j],0);
|
||||
if (n) partner_bondtype[i][j] = n;
|
||||
else {
|
||||
n = bondtype_findset(m,tag[i],partner_tag[i][j],0);
|
||||
if (n) partner_bondtype[i][j] = n;
|
||||
}
|
||||
} else nbuf += nper;
|
||||
}
|
||||
}
|
||||
|
||||
memory->create(buf,nbuf,"shake:buf");
|
||||
|
||||
// fill buffer with info
|
||||
|
||||
size = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
for (j = 0; j < npartner[i]; j++) {
|
||||
m = atom->map(partner_tag[i][j]);
|
||||
if (m < 0 || m >= nlocal) {
|
||||
buf[size] = tag[i];
|
||||
buf[size+1] = partner_tag[i][j];
|
||||
buf[size+2] = 0;
|
||||
buf[size+3] = 0;
|
||||
buf[size+4] = 0;
|
||||
n = bondtype_findset(i,tag[i],partner_tag[i][j],0);
|
||||
if (n) buf[size+5] = n;
|
||||
else buf[size+5] = 0;
|
||||
size += nper;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// cycle buffer around ring of procs back to self
|
||||
|
||||
comm->ring(size,sizeof(tagint),buf,1,ring_bonds,buf,(void *)this);
|
||||
|
||||
// store partner info returned to me
|
||||
|
||||
m = 0;
|
||||
while (m < size) {
|
||||
i = atom->map(buf[m]);
|
||||
for (j = 0; j < npartner[i]; j++)
|
||||
if (buf[m+1] == partner_tag[i][j]) break;
|
||||
partner_mask[i][j] = buf[m+2];
|
||||
partner_type[i][j] = buf[m+3];
|
||||
partner_massflag[i][j] = buf[m+4];
|
||||
partner_bondtype[i][j] = buf[m+5];
|
||||
m += nper;
|
||||
}
|
||||
|
||||
memory->destroy(buf);
|
||||
partner_info(npartner,partner_tag,partner_mask,partner_type,
|
||||
partner_massflag,partner_bondtype);
|
||||
|
||||
// error check for unfilled partner info
|
||||
// if partner_type not set, is an error
|
||||
|
@ -868,12 +804,13 @@ void FixShake::find_clusters()
|
|||
// else it's an error
|
||||
|
||||
flag = 0;
|
||||
int flag2 = 0;
|
||||
for (i = 0; i < nlocal; i++)
|
||||
for (j = 0; j < npartner[i]; j++) {
|
||||
if (partner_type[i][j] == 0) flag = 1;
|
||||
if (partner_type[i][j] == 0) flag++;
|
||||
if (!(mask[i] & groupbit)) continue;
|
||||
if (!(partner_mask[i][j] & groupbit)) continue;
|
||||
if (partner_bondtype[i][j] == 0) flag = 1;
|
||||
if (partner_bondtype[i][j] == 0) flag2++;
|
||||
}
|
||||
|
||||
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
|
||||
|
@ -931,55 +868,10 @@ void FixShake::find_clusters()
|
|||
|
||||
// -----------------------------------------------------
|
||||
// set partner_nshake for bonded partners
|
||||
// requires communication for off-proc partners
|
||||
// requires rendezvous communication for off-proc partners
|
||||
// -----------------------------------------------------
|
||||
|
||||
// fill in partner_nshake if own bond partner
|
||||
// info to store in buf for each off-proc bond =
|
||||
// 2 atoms IDs in bond, space for nshake value
|
||||
// nbufmax = largest buffer needed to hold info from any proc
|
||||
|
||||
nbuf = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
for (j = 0; j < npartner[i]; j++) {
|
||||
m = atom->map(partner_tag[i][j]);
|
||||
if (m >= 0 && m < nlocal) partner_nshake[i][j] = nshake[m];
|
||||
else nbuf += 3;
|
||||
}
|
||||
}
|
||||
|
||||
memory->create(buf,nbuf,"shake:buf");
|
||||
|
||||
// fill buffer with info
|
||||
|
||||
size = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
for (j = 0; j < npartner[i]; j++) {
|
||||
m = atom->map(partner_tag[i][j]);
|
||||
if (m < 0 || m >= nlocal) {
|
||||
buf[size] = tag[i];
|
||||
buf[size+1] = partner_tag[i][j];
|
||||
size += 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// cycle buffer around ring of procs back to self
|
||||
|
||||
comm->ring(size,sizeof(tagint),buf,2,ring_nshake,buf,(void *)this);
|
||||
|
||||
// store partner info returned to me
|
||||
|
||||
m = 0;
|
||||
while (m < size) {
|
||||
i = atom->map(buf[m]);
|
||||
for (j = 0; j < npartner[i]; j++)
|
||||
if (buf[m+1] == partner_tag[i][j]) break;
|
||||
partner_nshake[i][j] = buf[m+2];
|
||||
m += 3;
|
||||
}
|
||||
|
||||
memory->destroy(buf);
|
||||
nshake_info(npartner,partner_tag,partner_nshake);
|
||||
|
||||
// -----------------------------------------------------
|
||||
// error checks
|
||||
|
@ -988,7 +880,7 @@ void FixShake::find_clusters()
|
|||
// -----------------------------------------------------
|
||||
|
||||
flag = 0;
|
||||
for (i = 0; i < nlocal; i++) if (nshake[i] > 3) flag = 1;
|
||||
for (i = 0; i < nlocal; i++) if (nshake[i] > 3) flag++;
|
||||
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
|
||||
if (flag_all) error->all(FLERR,"Shake cluster of more than 4 atoms");
|
||||
|
||||
|
@ -996,7 +888,7 @@ void FixShake::find_clusters()
|
|||
for (i = 0; i < nlocal; i++) {
|
||||
if (nshake[i] <= 1) continue;
|
||||
for (j = 0; j < npartner[i]; j++)
|
||||
if (partner_shake[i][j] && partner_nshake[i][j] > 1) flag = 1;
|
||||
if (partner_shake[i][j] && partner_nshake[i][j] > 1) flag++;
|
||||
}
|
||||
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
|
||||
if (flag_all) error->all(FLERR,"Shake clusters are connected");
|
||||
|
@ -1064,68 +956,18 @@ void FixShake::find_clusters()
|
|||
|
||||
// -----------------------------------------------------
|
||||
// set shake_flag,shake_atom,shake_type for non-central atoms
|
||||
// requires communication for off-proc atoms
|
||||
// requires rendezvous communication for off-proc atoms
|
||||
// -----------------------------------------------------
|
||||
|
||||
// fill in shake arrays for each bond partner I own
|
||||
// info to store in buf for each off-proc bond =
|
||||
// all values from shake_flag, shake_atom, shake_type
|
||||
// nbufmax = largest buffer needed to hold info from any proc
|
||||
|
||||
nbuf = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
if (shake_flag[i] == 0) continue;
|
||||
for (j = 0; j < npartner[i]; j++) {
|
||||
if (partner_shake[i][j] == 0) continue;
|
||||
m = atom->map(partner_tag[i][j]);
|
||||
if (m >= 0 && m < nlocal) {
|
||||
shake_flag[m] = shake_flag[i];
|
||||
shake_atom[m][0] = shake_atom[i][0];
|
||||
shake_atom[m][1] = shake_atom[i][1];
|
||||
shake_atom[m][2] = shake_atom[i][2];
|
||||
shake_atom[m][3] = shake_atom[i][3];
|
||||
shake_type[m][0] = shake_type[i][0];
|
||||
shake_type[m][1] = shake_type[i][1];
|
||||
shake_type[m][2] = shake_type[i][2];
|
||||
} else nbuf += 9;
|
||||
}
|
||||
}
|
||||
|
||||
memory->create(buf,nbuf,"shake:buf");
|
||||
|
||||
// fill buffer with info
|
||||
|
||||
size = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
if (shake_flag[i] == 0) continue;
|
||||
for (j = 0; j < npartner[i]; j++) {
|
||||
if (partner_shake[i][j] == 0) continue;
|
||||
m = atom->map(partner_tag[i][j]);
|
||||
if (m < 0 || m >= nlocal) {
|
||||
buf[size] = partner_tag[i][j];
|
||||
buf[size+1] = shake_flag[i];
|
||||
buf[size+2] = shake_atom[i][0];
|
||||
buf[size+3] = shake_atom[i][1];
|
||||
buf[size+4] = shake_atom[i][2];
|
||||
buf[size+5] = shake_atom[i][3];
|
||||
buf[size+6] = shake_type[i][0];
|
||||
buf[size+7] = shake_type[i][1];
|
||||
buf[size+8] = shake_type[i][2];
|
||||
size += 9;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// cycle buffer around ring of procs back to self
|
||||
|
||||
comm->ring(size,sizeof(tagint),buf,3,ring_shake,NULL,(void *)this);
|
||||
|
||||
memory->destroy(buf);
|
||||
shake_info(npartner,partner_tag,partner_shake);
|
||||
|
||||
// -----------------------------------------------------
|
||||
// free local memory
|
||||
// -----------------------------------------------------
|
||||
|
||||
memory->destroy(atomIDs);
|
||||
memory->destroy(procowner);
|
||||
|
||||
memory->destroy(npartner);
|
||||
memory->destroy(nshake);
|
||||
memory->destroy(partner_tag);
|
||||
|
@ -1199,98 +1041,548 @@ void FixShake::find_clusters()
|
|||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
when receive buffer, scan bond partner IDs for atoms I own
|
||||
if I own partner:
|
||||
fill in mask and type and massflag
|
||||
search for bond with 1st atom and fill in bondtype
|
||||
setup atomIDs and procowner
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void FixShake::ring_bonds(int ndatum, char *cbuf, void *ptr)
|
||||
void FixShake::atom_owners()
|
||||
{
|
||||
FixShake *fsptr = (FixShake *)ptr;
|
||||
Atom *atom = fsptr->atom;
|
||||
tagint *tag = atom->tag;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
int *proclist;
|
||||
memory->create(proclist,nlocal,"shake:proclist");
|
||||
IDRvous *idbuf = (IDRvous *)
|
||||
memory->smalloc((bigint) nlocal*sizeof(IDRvous),"shake:idbuf");
|
||||
|
||||
// setup input buf to rendezvous comm
|
||||
// input datums = pairs of bonded atoms
|
||||
// owning proc for each datum = random hash of atomID
|
||||
// one datum for each owned atom: datum = owning proc, atomID
|
||||
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
proclist[i] = tag[i] % nprocs;
|
||||
idbuf[i].me = me;
|
||||
idbuf[i].atomID = tag[i];
|
||||
}
|
||||
|
||||
// perform rendezvous operation
|
||||
// each proc assigned every 1/Pth atom
|
||||
|
||||
char *buf;
|
||||
comm->rendezvous(RVOUS,nlocal,(char *) idbuf,sizeof(IDRvous),
|
||||
0,proclist,
|
||||
rendezvous_ids,0,buf,0,(void *) this);
|
||||
|
||||
memory->destroy(proclist);
|
||||
memory->sfree(idbuf);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
setup partner_mask, partner_type, partner_massflag, partner_bondtype
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void FixShake::partner_info(int *npartner, tagint **partner_tag,
|
||||
int **partner_mask, int **partner_type,
|
||||
int **partner_massflag, int **partner_bondtype)
|
||||
{
|
||||
int i,j,m,n;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
// nsend = # of my datums to send
|
||||
// one datum for every off-processor partner
|
||||
|
||||
int nsend = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
for (j = 0; j < npartner[i]; j++) {
|
||||
m = atom->map(partner_tag[i][j]);
|
||||
if (m < 0 || m >= nlocal) nsend++;
|
||||
}
|
||||
}
|
||||
|
||||
int *proclist;
|
||||
memory->create(proclist,nsend,"special:proclist");
|
||||
PartnerInfo *inbuf = (PartnerInfo *)
|
||||
memory->smalloc((bigint) nsend*sizeof(PartnerInfo),"special:inbuf");
|
||||
|
||||
// set values in 4 partner arrays for all partner atoms I own
|
||||
// also setup input buf to rendezvous comm
|
||||
// input datums = pair of bonded atoms where I do not own partner
|
||||
// owning proc for each datum = partner_tag % nprocs
|
||||
// datum: atomID = partner_tag (off-proc), partnerID = tag (on-proc)
|
||||
// 4 values for my owned atom
|
||||
|
||||
double *rmass = atom->rmass;
|
||||
double *mass = atom->mass;
|
||||
int *mask = atom->mask;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int nmass = fsptr->nmass;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
|
||||
tagint *buf = (tagint *) cbuf;
|
||||
int m,n;
|
||||
double massone;
|
||||
|
||||
for (int i = 0; i < ndatum; i += 6) {
|
||||
m = atom->map(buf[i+1]);
|
||||
if (m >= 0 && m < nlocal) {
|
||||
buf[i+2] = mask[m];
|
||||
buf[i+3] = type[m];
|
||||
if (nmass) {
|
||||
if (rmass) massone = rmass[m];
|
||||
else massone = mass[type[m]];
|
||||
buf[i+4] = fsptr->masscheck(massone);
|
||||
}
|
||||
if (buf[i+5] == 0) {
|
||||
n = fsptr->bondtype_findset(m,buf[i],buf[i+1],0);
|
||||
if (n) buf[i+5] = n;
|
||||
nsend = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
for (j = 0; j < npartner[i]; j++) {
|
||||
partner_mask[i][j] = 0;
|
||||
partner_type[i][j] = 0;
|
||||
partner_massflag[i][j] = 0;
|
||||
partner_bondtype[i][j] = 0;
|
||||
|
||||
m = atom->map(partner_tag[i][j]);
|
||||
|
||||
if (m >= 0 && m < nlocal) {
|
||||
partner_mask[i][j] = mask[m];
|
||||
partner_type[i][j] = type[m];
|
||||
if (nmass) {
|
||||
if (rmass) massone = rmass[m];
|
||||
else massone = mass[type[m]];
|
||||
partner_massflag[i][j] = masscheck(massone);
|
||||
}
|
||||
n = bondtype_findset(i,tag[i],partner_tag[i][j],0);
|
||||
if (n) partner_bondtype[i][j] = n;
|
||||
else {
|
||||
n = bondtype_findset(m,tag[i],partner_tag[i][j],0);
|
||||
if (n) partner_bondtype[i][j] = n;
|
||||
}
|
||||
|
||||
} else {
|
||||
proclist[nsend] = partner_tag[i][j] % nprocs;
|
||||
inbuf[nsend].atomID = partner_tag[i][j];
|
||||
inbuf[nsend].partnerID = tag[i];
|
||||
inbuf[nsend].mask = mask[i];
|
||||
inbuf[nsend].type = type[i];
|
||||
if (nmass) {
|
||||
if (rmass) massone = rmass[i];
|
||||
else massone = mass[type[i]];
|
||||
inbuf[nsend].massflag = masscheck(massone);
|
||||
} else inbuf[nsend].massflag = 0;
|
||||
|
||||
// my atom may own bond, in which case set partner_bondtype
|
||||
// else receiver of this datum will own the bond and return the value
|
||||
|
||||
n = bondtype_findset(i,tag[i],partner_tag[i][j],0);
|
||||
if (n) {
|
||||
partner_bondtype[i][j] = n;
|
||||
inbuf[nsend].bondtype = n;
|
||||
} else inbuf[nsend].bondtype = 0;
|
||||
|
||||
nsend++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
when receive buffer, scan bond partner IDs for atoms I own
|
||||
if I own partner, fill in nshake value
|
||||
------------------------------------------------------------------------- */
|
||||
// perform rendezvous operation
|
||||
// each proc owns random subset of atoms
|
||||
// receives all data needed to populate un-owned partner 4 values
|
||||
|
||||
void FixShake::ring_nshake(int ndatum, char *cbuf, void *ptr)
|
||||
{
|
||||
FixShake *fsptr = (FixShake *)ptr;
|
||||
Atom *atom = fsptr->atom;
|
||||
int nlocal = atom->nlocal;
|
||||
char *buf;
|
||||
int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(PartnerInfo),
|
||||
0,proclist,
|
||||
rendezvous_partners_info,
|
||||
0,buf,sizeof(PartnerInfo),
|
||||
(void *) this);
|
||||
PartnerInfo *outbuf = (PartnerInfo *) buf;
|
||||
|
||||
int *nshake = fsptr->nshake;
|
||||
memory->destroy(proclist);
|
||||
memory->sfree(inbuf);
|
||||
|
||||
tagint *buf = (tagint *) cbuf;
|
||||
int m;
|
||||
// set partner 4 values for un-onwed partners based on output info
|
||||
// outbuf.atomID = my owned atom, outbuf.partnerID = partner the info is for
|
||||
|
||||
for (int i = 0; i < ndatum; i += 3) {
|
||||
m = atom->map(buf[i+1]);
|
||||
if (m >= 0 && m < nlocal) buf[i+2] = nshake[m];
|
||||
for (m = 0; m < nreturn; m++) {
|
||||
i = atom->map(outbuf[m].atomID);
|
||||
for (j = 0; j < npartner[i]; j++)
|
||||
if (partner_tag[i][j] == outbuf[m].partnerID) break;
|
||||
partner_mask[i][j] = outbuf[m].mask;
|
||||
partner_type[i][j] = outbuf[m].type;
|
||||
partner_massflag[i][j] = outbuf[m].massflag;
|
||||
|
||||
// only set partner_bondtype if my atom did not set it
|
||||
// when setting up rendezvous
|
||||
// if this proc set it, then sender of this datum set outbuf.bondtype = 0
|
||||
|
||||
if (partner_bondtype[i][j] == 0)
|
||||
partner_bondtype[i][j] = outbuf[m].bondtype;
|
||||
}
|
||||
|
||||
memory->sfree(outbuf);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
when receive buffer, scan bond partner IDs for atoms I own
|
||||
if I own partner, fill in nshake value
|
||||
setup partner_nshake
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void FixShake::ring_shake(int ndatum, char *cbuf, void *ptr)
|
||||
void FixShake::nshake_info(int *npartner, tagint **partner_tag,
|
||||
int **partner_nshake)
|
||||
{
|
||||
FixShake *fsptr = (FixShake *)ptr;
|
||||
Atom *atom = fsptr->atom;
|
||||
int i,j,m;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
int *shake_flag = fsptr->shake_flag;
|
||||
tagint **shake_atom = fsptr->shake_atom;
|
||||
int **shake_type = fsptr->shake_type;
|
||||
// nsend = # of my datums to send
|
||||
// one datum for every off-processor partner
|
||||
|
||||
tagint *buf = (tagint *) cbuf;
|
||||
int m;
|
||||
|
||||
for (int i = 0; i < ndatum; i += 9) {
|
||||
m = atom->map(buf[i]);
|
||||
if (m >= 0 && m < nlocal) {
|
||||
shake_flag[m] = buf[i+1];
|
||||
shake_atom[m][0] = buf[i+2];
|
||||
shake_atom[m][1] = buf[i+3];
|
||||
shake_atom[m][2] = buf[i+4];
|
||||
shake_atom[m][3] = buf[i+5];
|
||||
shake_type[m][0] = buf[i+6];
|
||||
shake_type[m][1] = buf[i+7];
|
||||
shake_type[m][2] = buf[i+8];
|
||||
int nsend = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
for (j = 0; j < npartner[i]; j++) {
|
||||
m = atom->map(partner_tag[i][j]);
|
||||
if (m < 0 || m >= nlocal) nsend++;
|
||||
}
|
||||
}
|
||||
|
||||
int *proclist;
|
||||
memory->create(proclist,nsend,"special:proclist");
|
||||
NShakeInfo *inbuf = (NShakeInfo *)
|
||||
memory->smalloc((bigint) nsend*sizeof(NShakeInfo),"special:inbuf");
|
||||
|
||||
// set partner_nshake for all partner atoms I own
|
||||
// also setup input buf to rendezvous comm
|
||||
// input datums = pair of bonded atoms where I do not own partner
|
||||
// owning proc for each datum = partner_tag % nprocs
|
||||
// datum: atomID = partner_tag (off-proc), partnerID = tag (on-proc)
|
||||
// nshake value for my owned atom
|
||||
|
||||
tagint *tag = atom->tag;
|
||||
|
||||
nsend = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
for (j = 0; j < npartner[i]; j++) {
|
||||
partner_nshake[i][j] = 0;
|
||||
m = atom->map(partner_tag[i][j]);
|
||||
if (m >= 0 && m < nlocal) {
|
||||
partner_nshake[i][j] = nshake[m];
|
||||
} else {
|
||||
proclist[nsend] = partner_tag[i][j] % nprocs;
|
||||
inbuf[nsend].atomID = partner_tag[i][j];
|
||||
inbuf[nsend].partnerID = tag[i];
|
||||
inbuf[nsend].nshake = nshake[i];
|
||||
nsend++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// perform rendezvous operation
|
||||
// each proc owns random subset of atoms
|
||||
// receives all data needed to populate un-owned partner nshake
|
||||
|
||||
char *buf;
|
||||
int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(NShakeInfo),
|
||||
0,proclist,
|
||||
rendezvous_nshake,0,buf,sizeof(NShakeInfo),
|
||||
(void *) this);
|
||||
NShakeInfo *outbuf = (NShakeInfo *) buf;
|
||||
|
||||
memory->destroy(proclist);
|
||||
memory->sfree(inbuf);
|
||||
|
||||
// set partner nshake for un-onwed partners based on output info
|
||||
// outbuf.atomID = my owned atom, outbuf.partnerID = partner the info is for
|
||||
|
||||
for (m = 0; m < nreturn; m++) {
|
||||
i = atom->map(outbuf[m].atomID);
|
||||
for (j = 0; j < npartner[i]; j++)
|
||||
if (partner_tag[i][j] == outbuf[m].partnerID) break;
|
||||
partner_nshake[i][j] = outbuf[m].nshake;
|
||||
}
|
||||
|
||||
memory->sfree(outbuf);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
setup shake_flag, shake_atom, shake_type
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void FixShake::shake_info(int *npartner, tagint **partner_tag,
|
||||
int **partner_shake)
|
||||
{
|
||||
int i,j,m;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
// nsend = # of my datums to send
|
||||
// one datum for every off-processor partner
|
||||
|
||||
int nsend = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
for (j = 0; j < npartner[i]; j++) {
|
||||
m = atom->map(partner_tag[i][j]);
|
||||
if (m < 0 || m >= nlocal) nsend++;
|
||||
}
|
||||
}
|
||||
|
||||
int *proclist;
|
||||
memory->create(proclist,nsend,"special:proclist");
|
||||
ShakeInfo *inbuf = (ShakeInfo *)
|
||||
memory->smalloc((bigint) nsend*sizeof(ShakeInfo),"special:inbuf");
|
||||
|
||||
// set 3 shake arrays for all partner atoms I own
|
||||
// also setup input buf to rendezvous comm
|
||||
// input datums = partner atom where I do not own partner
|
||||
// owning proc for each datum = partner_tag % nprocs
|
||||
// datum: atomID = partner_tag (off-proc)
|
||||
// values in 3 shake arrays
|
||||
|
||||
nsend = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
if (shake_flag[i] == 0) continue;
|
||||
for (j = 0; j < npartner[i]; j++) {
|
||||
if (partner_shake[i][j] == 0) continue;
|
||||
m = atom->map(partner_tag[i][j]);
|
||||
|
||||
if (m >= 0 && m < nlocal) {
|
||||
shake_flag[m] = shake_flag[i];
|
||||
shake_atom[m][0] = shake_atom[i][0];
|
||||
shake_atom[m][1] = shake_atom[i][1];
|
||||
shake_atom[m][2] = shake_atom[i][2];
|
||||
shake_atom[m][3] = shake_atom[i][3];
|
||||
shake_type[m][0] = shake_type[i][0];
|
||||
shake_type[m][1] = shake_type[i][1];
|
||||
shake_type[m][2] = shake_type[i][2];
|
||||
|
||||
} else {
|
||||
proclist[nsend] = partner_tag[i][j] % nprocs;
|
||||
inbuf[nsend].atomID = partner_tag[i][j];
|
||||
inbuf[nsend].shake_flag = shake_flag[i];
|
||||
inbuf[nsend].shake_atom[0] = shake_atom[i][0];
|
||||
inbuf[nsend].shake_atom[1] = shake_atom[i][1];
|
||||
inbuf[nsend].shake_atom[2] = shake_atom[i][2];
|
||||
inbuf[nsend].shake_atom[3] = shake_atom[i][3];
|
||||
inbuf[nsend].shake_type[0] = shake_type[i][0];
|
||||
inbuf[nsend].shake_type[1] = shake_type[i][1];
|
||||
inbuf[nsend].shake_type[2] = shake_type[i][2];
|
||||
nsend++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// perform rendezvous operation
|
||||
// each proc owns random subset of atoms
|
||||
// receives all data needed to populate un-owned shake info
|
||||
|
||||
char *buf;
|
||||
int nreturn = comm->rendezvous(RVOUS,nsend,(char *) inbuf,sizeof(ShakeInfo),
|
||||
0,proclist,
|
||||
rendezvous_shake,0,buf,sizeof(ShakeInfo),
|
||||
(void *) this);
|
||||
ShakeInfo *outbuf = (ShakeInfo *) buf;
|
||||
|
||||
memory->destroy(proclist);
|
||||
memory->sfree(inbuf);
|
||||
|
||||
// set shake info for un-onwed partners based on output info
|
||||
|
||||
for (m = 0; m < nreturn; m++) {
|
||||
i = atom->map(outbuf[m].atomID);
|
||||
shake_flag[i] = outbuf[m].shake_flag;
|
||||
shake_atom[i][0] = outbuf[m].shake_atom[0];
|
||||
shake_atom[i][1] = outbuf[m].shake_atom[1];
|
||||
shake_atom[i][2] = outbuf[m].shake_atom[2];
|
||||
shake_atom[i][3] = outbuf[m].shake_atom[3];
|
||||
shake_type[i][0] = outbuf[m].shake_type[0];
|
||||
shake_type[i][1] = outbuf[m].shake_type[1];
|
||||
shake_type[i][2] = outbuf[m].shake_type[2];
|
||||
}
|
||||
|
||||
memory->sfree(outbuf);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
process data for atoms assigned to me in rendezvous decomposition
|
||||
inbuf = list of N IDRvous datums
|
||||
no outbuf
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int FixShake::rendezvous_ids(int n, char *inbuf,
|
||||
int &flag, int *&proclist, char *&outbuf,
|
||||
void *ptr)
|
||||
{
|
||||
FixShake *fsptr = (FixShake *) ptr;
|
||||
Memory *memory = fsptr->memory;
|
||||
|
||||
tagint *atomIDs;
|
||||
int *procowner;
|
||||
|
||||
memory->create(atomIDs,n,"special:atomIDs");
|
||||
memory->create(procowner,n,"special:procowner");
|
||||
|
||||
IDRvous *in = (IDRvous *) inbuf;
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
atomIDs[i] = in[i].atomID;
|
||||
procowner[i] = in[i].me;
|
||||
}
|
||||
|
||||
// store rendezvous data in FixShake class
|
||||
|
||||
fsptr->nrvous = n;
|
||||
fsptr->atomIDs = atomIDs;
|
||||
fsptr->procowner = procowner;
|
||||
|
||||
// flag = 0: no second comm needed in rendezvous
|
||||
|
||||
flag = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
process data for atoms assigned to me in rendezvous decomposition
|
||||
inbuf = list of N PairRvous datums
|
||||
outbuf = same list of N PairRvous datums, routed to different procs
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int FixShake::rendezvous_partners_info(int n, char *inbuf,
|
||||
int &flag, int *&proclist, char *&outbuf,
|
||||
void *ptr)
|
||||
{
|
||||
int i,m;
|
||||
|
||||
FixShake *fsptr = (FixShake *) ptr;
|
||||
Atom *atom = fsptr->atom;
|
||||
Memory *memory = fsptr->memory;
|
||||
|
||||
// clear atom map so it can be here as a hash table
|
||||
// faster than an STL map for large atom counts
|
||||
|
||||
atom->map_clear();
|
||||
|
||||
// hash atom IDs stored in rendezvous decomposition
|
||||
|
||||
int nrvous = fsptr->nrvous;
|
||||
tagint *atomIDs = fsptr->atomIDs;
|
||||
|
||||
for (i = 0; i < nrvous; i++)
|
||||
atom->map_one(atomIDs[i],i);
|
||||
|
||||
// proclist = owner of atomID in caller decomposition
|
||||
// outbuf = info about owned atomID = 4 values
|
||||
|
||||
PartnerInfo *in = (PartnerInfo *) inbuf;
|
||||
int *procowner = fsptr->procowner;
|
||||
memory->create(proclist,n,"shake:proclist");
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
m = atom->map(in[i].atomID);
|
||||
proclist[i] = procowner[m];
|
||||
}
|
||||
|
||||
outbuf = inbuf;
|
||||
|
||||
// re-create atom map
|
||||
|
||||
atom->map_init(0);
|
||||
atom->nghost = 0;
|
||||
atom->map_set();
|
||||
|
||||
// flag = 1: outbuf = inbuf
|
||||
|
||||
flag = 1;
|
||||
return n;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
process data for atoms assigned to me in rendezvous decomposition
|
||||
inbuf = list of N NShakeInfo datums
|
||||
outbuf = same list of N NShakeInfo datums, routed to different procs
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int FixShake::rendezvous_nshake(int n, char *inbuf,
|
||||
int &flag, int *&proclist, char *&outbuf,
|
||||
void *ptr)
|
||||
{
|
||||
int i,m;
|
||||
|
||||
FixShake *fsptr = (FixShake *) ptr;
|
||||
Atom *atom = fsptr->atom;
|
||||
Memory *memory = fsptr->memory;
|
||||
|
||||
// clear atom map so it can be here as a hash table
|
||||
// faster than an STL map for large atom counts
|
||||
|
||||
atom->map_clear();
|
||||
|
||||
// hash atom IDs stored in rendezvous decomposition
|
||||
|
||||
int nrvous = fsptr->nrvous;
|
||||
tagint *atomIDs = fsptr->atomIDs;
|
||||
|
||||
for (i = 0; i < nrvous; i++)
|
||||
atom->map_one(atomIDs[i],i);
|
||||
|
||||
// proclist = owner of atomID in caller decomposition
|
||||
// outbuf = info about owned atomID
|
||||
|
||||
NShakeInfo *in = (NShakeInfo *) inbuf;
|
||||
int *procowner = fsptr->procowner;
|
||||
memory->create(proclist,n,"shake:proclist");
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
m = atom->map(in[i].atomID);
|
||||
proclist[i] = procowner[m];
|
||||
}
|
||||
|
||||
outbuf = inbuf;
|
||||
|
||||
// re-create atom map
|
||||
|
||||
atom->map_init(0);
|
||||
atom->nghost = 0;
|
||||
atom->map_set();
|
||||
|
||||
// flag = 1: outbuf = inbuf
|
||||
|
||||
flag = 1;
|
||||
return n;
|
||||
}
|
||||
/* ----------------------------------------------------------------------
|
||||
process data for atoms assigned to me in rendezvous decomposition
|
||||
inbuf = list of N PairRvous datums
|
||||
outbuf = same list of N PairRvous datums, routed to different procs
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int FixShake::rendezvous_shake(int n, char *inbuf,
|
||||
int &flag, int *&proclist, char *&outbuf,
|
||||
void *ptr)
|
||||
{
|
||||
int i,m;
|
||||
|
||||
FixShake *fsptr = (FixShake *) ptr;
|
||||
Atom *atom = fsptr->atom;
|
||||
Memory *memory = fsptr->memory;
|
||||
|
||||
// clear atom map so it can be here as a hash table
|
||||
// faster than an STL map for large atom counts
|
||||
|
||||
atom->map_clear();
|
||||
|
||||
// hash atom IDs stored in rendezvous decomposition
|
||||
|
||||
int nrvous = fsptr->nrvous;
|
||||
tagint *atomIDs = fsptr->atomIDs;
|
||||
|
||||
for (i = 0; i < nrvous; i++)
|
||||
atom->map_one(atomIDs[i],i);
|
||||
|
||||
// proclist = owner of atomID in caller decomposition
|
||||
// outbuf = info about owned atomID
|
||||
|
||||
ShakeInfo *in = (ShakeInfo *) inbuf;
|
||||
int *procowner = fsptr->procowner;
|
||||
memory->create(proclist,n,"shake:proclist");
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
m = atom->map(in[i].atomID);
|
||||
proclist[i] = procowner[m];
|
||||
}
|
||||
|
||||
outbuf = inbuf;
|
||||
|
||||
// re-create atom map
|
||||
|
||||
atom->map_init(0);
|
||||
atom->nghost = 0;
|
||||
atom->map_set();
|
||||
|
||||
// flag = 1: outbuf = inbuf;
|
||||
|
||||
flag = 1;
|
||||
return n;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
|
|
@ -120,6 +120,11 @@ class FixShake : public Fix {
|
|||
int nmol;
|
||||
|
||||
void find_clusters();
|
||||
void atom_owners();
|
||||
void partner_info(int *, tagint **, int **, int **, int **, int **);
|
||||
void nshake_info(int *, tagint **, int **);
|
||||
void shake_info(int *, tagint **, int **);
|
||||
|
||||
int masscheck(double);
|
||||
void unconstrained_update();
|
||||
void unconstrained_update_respa(int);
|
||||
|
@ -131,12 +136,40 @@ class FixShake : public Fix {
|
|||
int bondtype_findset(int, tagint, tagint, int);
|
||||
int angletype_findset(int, tagint, tagint, int);
|
||||
|
||||
// static variable for ring communication callback to access class data
|
||||
// callback functions for ring communication
|
||||
// data used by rendezvous callback methods
|
||||
|
||||
static void ring_bonds(int, char *, void *);
|
||||
static void ring_nshake(int, char *, void *);
|
||||
static void ring_shake(int, char *, void *);
|
||||
int nrvous;
|
||||
tagint *atomIDs;
|
||||
int *procowner;
|
||||
|
||||
struct IDRvous {
|
||||
int me;
|
||||
tagint atomID;
|
||||
};
|
||||
|
||||
struct PartnerInfo {
|
||||
tagint atomID,partnerID;
|
||||
int mask,type,massflag,bondtype;
|
||||
};
|
||||
|
||||
struct NShakeInfo {
|
||||
tagint atomID,partnerID;
|
||||
int nshake;
|
||||
};
|
||||
|
||||
struct ShakeInfo {
|
||||
tagint atomID;
|
||||
tagint shake_atom[4];
|
||||
int shake_flag;
|
||||
int shake_type[3];
|
||||
};
|
||||
|
||||
// callback functions for rendezvous communication
|
||||
|
||||
static int rendezvous_ids(int, char *, int &, int *&, char *&, void *);
|
||||
static int rendezvous_partners_info(int, char *, int &, int *&, char *&, void *);
|
||||
static int rendezvous_nshake(int, char *, int &, int *&, char *&, void *);
|
||||
static int rendezvous_shake(int, char *, int &, int *&, char *&, void *);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
424
src/comm.cpp
424
src/comm.cpp
|
@ -28,6 +28,7 @@
|
|||
#include "dump.h"
|
||||
#include "group.h"
|
||||
#include "procmap.h"
|
||||
#include "irregular.h"
|
||||
#include "accelerator_kokkos.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
@ -725,6 +726,429 @@ void Comm::ring(int n, int nper, void *inbuf, int messtag,
|
|||
memory->destroy(bufcopy);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
rendezvous communication operation
|
||||
three stages:
|
||||
first comm sends inbuf from caller decomp to rvous decomp
|
||||
callback operates on data in rendevous decomp
|
||||
second comm sends outbuf from rvous decomp back to caller decomp
|
||||
inputs:
|
||||
which = perform (0) irregular or (1) MPI_All2allv communication
|
||||
n = # of datums in inbuf
|
||||
inbuf = vector of input datums
|
||||
insize = byte size of each input datum
|
||||
inorder = 0 for inbuf in random proc order, 1 for datums ordered by proc
|
||||
procs: inorder 0 = proc to send each datum to, 1 = # of datums/proc,
|
||||
callback = caller function to invoke in rendezvous decomposition
|
||||
takes input datums, returns output datums
|
||||
outorder = same as inorder, but for datums returned by callback()
|
||||
ptr = pointer to caller class, passed to callback()
|
||||
outputs:
|
||||
nout = # of output datums (function return)
|
||||
outbuf = vector of output datums
|
||||
outsize = byte size of each output datum
|
||||
callback inputs:
|
||||
nrvous = # of rvous decomp datums in inbuf_rvous
|
||||
inbuf_rvous = vector of rvous decomp input datums
|
||||
ptr = pointer to caller class
|
||||
callback outputs:
|
||||
nrvous_out = # of rvous decomp output datums (function return)
|
||||
flag = 0 for no second comm, 1 for outbuf_rvous = inbuf_rvous,
|
||||
2 for second comm with new outbuf_rvous
|
||||
procs_rvous = outorder 0 = proc to send each datum to, 1 = # of datums/proc
|
||||
allocated
|
||||
outbuf_rvous = vector of rvous decomp output datums
|
||||
NOTE: could use MPI_INT or MPI_DOUBLE insead of MPI_CHAR
|
||||
to avoid checked-for overflow in MPI_Alltoallv?
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int Comm::
|
||||
rendezvous(int which, int n, char *inbuf, int insize,
|
||||
int inorder, int *procs,
|
||||
int (*callback)(int, char *, int &, int *&, char *&, void *),
|
||||
int outorder, char *&outbuf, int outsize, void *ptr, int statflag)
|
||||
{
|
||||
if (which == 0)
|
||||
return rendezvous_irregular(n,inbuf,insize,inorder,procs,callback,
|
||||
outorder,outbuf,outsize,ptr,statflag);
|
||||
else
|
||||
return rendezvous_all2all(n,inbuf,insize,inorder,procs,callback,
|
||||
outorder,outbuf,outsize,ptr,statflag);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int Comm::
|
||||
rendezvous_irregular(int n, char *inbuf, int insize, int inorder, int *procs,
|
||||
int (*callback)(int, char *, int &, int *&, char *&, void *),
|
||||
int outorder, char *&outbuf,
|
||||
int outsize, void *ptr, int statflag)
|
||||
{
|
||||
// irregular comm of inbuf from caller decomp to rendezvous decomp
|
||||
|
||||
Irregular *irregular = new Irregular(lmp);
|
||||
|
||||
int nrvous;
|
||||
if (inorder) nrvous = irregular->create_data_grouped(n,procs);
|
||||
else nrvous = irregular->create_data(n,procs);
|
||||
|
||||
char *inbuf_rvous = (char *) memory->smalloc((bigint) nrvous*insize,
|
||||
"rendezvous:inbuf");
|
||||
irregular->exchange_data(inbuf,insize,inbuf_rvous);
|
||||
|
||||
bigint irregular1_bytes = irregular->memory_usage();
|
||||
irregular->destroy_data();
|
||||
delete irregular;
|
||||
|
||||
// peform rendezvous computation via callback()
|
||||
// callback() allocates/populates proclist_rvous and outbuf_rvous
|
||||
|
||||
int flag;
|
||||
int *procs_rvous;
|
||||
char *outbuf_rvous;
|
||||
int nrvous_out = callback(nrvous,inbuf_rvous,flag,
|
||||
procs_rvous,outbuf_rvous,ptr);
|
||||
|
||||
if (flag != 1) memory->sfree(inbuf_rvous); // outbuf_rvous = inbuf_vous
|
||||
if (flag == 0) {
|
||||
if (statflag) rendezvous_stats(n,0,nrvous,nrvous_out,insize,outsize,
|
||||
(bigint) nrvous_out*sizeof(int) +
|
||||
irregular1_bytes);
|
||||
return 0; // all nout_rvous are 0, no 2nd comm stage
|
||||
}
|
||||
|
||||
// irregular comm of outbuf from rendezvous decomp back to caller decomp
|
||||
// caller will free outbuf
|
||||
|
||||
irregular = new Irregular(lmp);
|
||||
|
||||
int nout;
|
||||
if (outorder)
|
||||
nout = irregular->create_data_grouped(nrvous_out,procs_rvous);
|
||||
else nout = irregular->create_data(nrvous_out,procs_rvous);
|
||||
|
||||
outbuf = (char *) memory->smalloc((bigint) nout*outsize,
|
||||
"rendezvous:outbuf");
|
||||
irregular->exchange_data(outbuf_rvous,outsize,outbuf);
|
||||
|
||||
bigint irregular2_bytes = irregular->memory_usage();
|
||||
irregular->destroy_data();
|
||||
delete irregular;
|
||||
|
||||
memory->destroy(procs_rvous);
|
||||
memory->sfree(outbuf_rvous);
|
||||
|
||||
// return number of output datums
|
||||
// last arg to stats() = memory for procs_rvous + irregular comm
|
||||
|
||||
if (statflag) rendezvous_stats(n,nout,nrvous,nrvous_out,insize,outsize,
|
||||
(bigint) nrvous_out*sizeof(int) +
|
||||
MAX(irregular1_bytes,irregular2_bytes));
|
||||
return nout;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int Comm::
|
||||
rendezvous_all2all(int n, char *inbuf, int insize, int inorder, int *procs,
|
||||
int (*callback)(int, char *, int &, int *&, char *&, void *),
|
||||
int outorder, char *&outbuf, int outsize, void *ptr,
|
||||
int statflag)
|
||||
{
|
||||
int iproc;
|
||||
bigint all2all1_bytes,all2all2_bytes;
|
||||
int *sendcount,*sdispls,*recvcount,*rdispls;
|
||||
int *procs_a2a;
|
||||
bigint *offsets;
|
||||
char *inbuf_a2a,*outbuf_a2a;
|
||||
|
||||
// create procs and inbuf for All2all if necesary
|
||||
|
||||
if (!inorder) {
|
||||
memory->create(procs_a2a,nprocs,"rendezvous:procs");
|
||||
inbuf_a2a = (char *) memory->smalloc((bigint) n*insize,
|
||||
"rendezvous:inbuf");
|
||||
memory->create(offsets,nprocs,"rendezvous:offsets");
|
||||
|
||||
for (int i = 0; i < nprocs; i++) procs_a2a[i] = 0;
|
||||
for (int i = 0; i < n; i++) procs_a2a[procs[i]]++;
|
||||
|
||||
offsets[0] = 0;
|
||||
for (int i = 1; i < nprocs; i++)
|
||||
offsets[i] = offsets[i-1] + insize*procs_a2a[i-1];
|
||||
|
||||
bigint offset = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
iproc = procs[i];
|
||||
memcpy(&inbuf_a2a[offsets[iproc]],&inbuf[offset],insize);
|
||||
offsets[iproc] += insize;
|
||||
offset += insize;
|
||||
}
|
||||
|
||||
all2all1_bytes = nprocs*sizeof(int) + nprocs*sizeof(bigint) + n*insize;
|
||||
|
||||
} else {
|
||||
procs_a2a = procs;
|
||||
inbuf_a2a = inbuf;
|
||||
all2all1_bytes = 0;
|
||||
}
|
||||
|
||||
// create args for MPI_Alltoallv() on input data
|
||||
|
||||
memory->create(sendcount,nprocs,"rendezvous:sendcount");
|
||||
memcpy(sendcount,procs_a2a,nprocs*sizeof(int));
|
||||
|
||||
memory->create(recvcount,nprocs,"rendezvous:recvcount");
|
||||
MPI_Alltoall(sendcount,1,MPI_INT,recvcount,1,MPI_INT,world);
|
||||
|
||||
memory->create(sdispls,nprocs,"rendezvous:sdispls");
|
||||
memory->create(rdispls,nprocs,"rendezvous:rdispls");
|
||||
sdispls[0] = rdispls[0] = 0;
|
||||
for (int i = 1; i < nprocs; i++) {
|
||||
sdispls[i] = sdispls[i-1] + sendcount[i-1];
|
||||
rdispls[i] = rdispls[i-1] + recvcount[i-1];
|
||||
}
|
||||
int nrvous = rdispls[nprocs-1] + recvcount[nprocs-1];
|
||||
|
||||
// test for overflow of input data due to imbalance or insize
|
||||
// means that individual sdispls or rdispls values overflow
|
||||
|
||||
int overflow = 0;
|
||||
if ((bigint) n*insize > MAXSMALLINT) overflow = 1;
|
||||
if ((bigint) nrvous*insize > MAXSMALLINT) overflow = 1;
|
||||
int overflowall;
|
||||
MPI_Allreduce(&overflow,&overflowall,1,MPI_INT,MPI_MAX,world);
|
||||
if (overflowall) error->all(FLERR,"Overflow input size in rendezvous_a2a");
|
||||
|
||||
for (int i = 0; i < nprocs; i++) {
|
||||
sendcount[i] *= insize;
|
||||
sdispls[i] *= insize;
|
||||
recvcount[i] *= insize;
|
||||
rdispls[i] *= insize;
|
||||
}
|
||||
|
||||
// all2all comm of inbuf from caller decomp to rendezvous decomp
|
||||
|
||||
char *inbuf_rvous = (char *) memory->smalloc((bigint) nrvous*insize,
|
||||
"rendezvous:inbuf");
|
||||
|
||||
MPI_Alltoallv(inbuf_a2a,sendcount,sdispls,MPI_CHAR,
|
||||
inbuf_rvous,recvcount,rdispls,MPI_CHAR,world);
|
||||
|
||||
if (!inorder) {
|
||||
memory->destroy(procs_a2a);
|
||||
memory->sfree(inbuf_a2a);
|
||||
memory->destroy(offsets);
|
||||
}
|
||||
|
||||
// peform rendezvous computation via callback()
|
||||
// callback() allocates/populates proclist_rvous and outbuf_rvous
|
||||
|
||||
int flag;
|
||||
int *procs_rvous;
|
||||
char *outbuf_rvous;
|
||||
|
||||
int nrvous_out = callback(nrvous,inbuf_rvous,flag,
|
||||
procs_rvous,outbuf_rvous,ptr);
|
||||
|
||||
if (flag != 1) memory->sfree(inbuf_rvous); // outbuf_rvous = inbuf_vous
|
||||
if (flag == 0) {
|
||||
memory->destroy(sendcount);
|
||||
memory->destroy(recvcount);
|
||||
memory->destroy(sdispls);
|
||||
memory->destroy(rdispls);
|
||||
if (statflag) rendezvous_stats(n,0,nrvous,nrvous_out,insize,outsize,
|
||||
(bigint) nrvous_out*sizeof(int) +
|
||||
4*nprocs*sizeof(int) + all2all1_bytes);
|
||||
return 0; // all nout_rvous are 0, no 2nd irregular
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// create procs and outbuf for All2all if necesary
|
||||
|
||||
if (!outorder) {
|
||||
memory->create(procs_a2a,nprocs,"rendezvous_a2a:procs");
|
||||
|
||||
outbuf_a2a = (char *) memory->smalloc((bigint) nrvous_out*outsize,
|
||||
"rendezvous:outbuf");
|
||||
memory->create(offsets,nprocs,"rendezvous:offsets");
|
||||
|
||||
for (int i = 0; i < nprocs; i++) procs_a2a[i] = 0;
|
||||
for (int i = 0; i < nrvous_out; i++) procs_a2a[procs_rvous[i]]++;
|
||||
|
||||
offsets[0] = 0;
|
||||
for (int i = 1; i < nprocs; i++)
|
||||
offsets[i] = offsets[i-1] + outsize*procs_a2a[i-1];
|
||||
|
||||
bigint offset = 0;
|
||||
for (int i = 0; i < nrvous_out; i++) {
|
||||
iproc = procs_rvous[i];
|
||||
memcpy(&outbuf_a2a[offsets[iproc]],&outbuf_rvous[offset],outsize);
|
||||
offsets[iproc] += outsize;
|
||||
offset += outsize;
|
||||
}
|
||||
|
||||
all2all2_bytes = nprocs*sizeof(int) + nprocs*sizeof(bigint) +
|
||||
nrvous_out*outsize;
|
||||
|
||||
} else {
|
||||
procs_a2a = procs_rvous;
|
||||
outbuf_a2a = outbuf_rvous;
|
||||
all2all2_bytes = 0;
|
||||
}
|
||||
|
||||
// comm outbuf from rendezvous decomposition back to caller
|
||||
|
||||
memcpy(sendcount,procs_a2a,nprocs*sizeof(int));
|
||||
|
||||
MPI_Alltoall(sendcount,1,MPI_INT,recvcount,1,MPI_INT,world);
|
||||
|
||||
sdispls[0] = rdispls[0] = 0;
|
||||
for (int i = 1; i < nprocs; i++) {
|
||||
sdispls[i] = sdispls[i-1] + sendcount[i-1];
|
||||
rdispls[i] = rdispls[i-1] + recvcount[i-1];
|
||||
}
|
||||
int nout = rdispls[nprocs-1] + recvcount[nprocs-1];
|
||||
|
||||
// test for overflow of outbuf due to imbalance or outsize
|
||||
// means that individual sdispls or rdispls values overflow
|
||||
|
||||
overflow = 0;
|
||||
if ((bigint) nrvous*outsize > MAXSMALLINT) overflow = 1;
|
||||
if ((bigint) nout*outsize > MAXSMALLINT) overflow = 1;
|
||||
MPI_Allreduce(&overflow,&overflowall,1,MPI_INT,MPI_MAX,world);
|
||||
if (overflowall) error->all(FLERR,"Overflow output in rendezvous_a2a");
|
||||
|
||||
for (int i = 0; i < nprocs; i++) {
|
||||
sendcount[i] *= outsize;
|
||||
sdispls[i] *= outsize;
|
||||
recvcount[i] *= outsize;
|
||||
rdispls[i] *= outsize;
|
||||
}
|
||||
|
||||
// all2all comm of outbuf from rendezvous decomp back to caller decomp
|
||||
// caller will free outbuf
|
||||
|
||||
outbuf = (char *) memory->smalloc((bigint) nout*outsize,"rendezvous:outbuf");
|
||||
|
||||
MPI_Alltoallv(outbuf_a2a,sendcount,sdispls,MPI_CHAR,
|
||||
outbuf,recvcount,rdispls,MPI_CHAR,world);
|
||||
|
||||
memory->destroy(procs_rvous);
|
||||
memory->sfree(outbuf_rvous);
|
||||
|
||||
if (!outorder) {
|
||||
memory->destroy(procs_a2a);
|
||||
memory->sfree(outbuf_a2a);
|
||||
memory->destroy(offsets);
|
||||
}
|
||||
|
||||
// clean up
|
||||
|
||||
memory->destroy(sendcount);
|
||||
memory->destroy(recvcount);
|
||||
memory->destroy(sdispls);
|
||||
memory->destroy(rdispls);
|
||||
|
||||
// return number of output datums
|
||||
// last arg to stats() = mem for procs_rvous + per-proc vecs + reordering ops
|
||||
|
||||
if (statflag) rendezvous_stats(n,nout,nrvous,nrvous_out,insize,outsize,
|
||||
(bigint) nrvous_out*sizeof(int) +
|
||||
4*nprocs*sizeof(int) +
|
||||
MAX(all2all1_bytes,all2all2_bytes));
|
||||
return nout;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
print balance and memory info for rendezvous operation
|
||||
useful for debugging
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Comm::rendezvous_stats(int n, int nout, int nrvous, int nrvous_out,
|
||||
int insize, int outsize, bigint commsize)
|
||||
{
|
||||
bigint size_in_all,size_in_max,size_in_min;
|
||||
bigint size_out_all,size_out_max,size_out_min;
|
||||
bigint size_inrvous_all,size_inrvous_max,size_inrvous_min;
|
||||
bigint size_outrvous_all,size_outrvous_max,size_outrvous_min;
|
||||
bigint size_comm_all,size_comm_max,size_comm_min;
|
||||
|
||||
bigint size = (bigint) n*insize;
|
||||
MPI_Allreduce(&size,&size_in_all,1,MPI_LMP_BIGINT,MPI_SUM,world);
|
||||
MPI_Allreduce(&size,&size_in_max,1,MPI_LMP_BIGINT,MPI_MAX,world);
|
||||
MPI_Allreduce(&size,&size_in_min,1,MPI_LMP_BIGINT,MPI_MIN,world);
|
||||
|
||||
size = (bigint) nout*outsize;
|
||||
MPI_Allreduce(&size,&size_out_all,1,MPI_LMP_BIGINT,MPI_SUM,world);
|
||||
MPI_Allreduce(&size,&size_out_max,1,MPI_LMP_BIGINT,MPI_MAX,world);
|
||||
MPI_Allreduce(&size,&size_out_min,1,MPI_LMP_BIGINT,MPI_MIN,world);
|
||||
|
||||
size = (bigint) nrvous*insize;
|
||||
MPI_Allreduce(&size,&size_inrvous_all,1,MPI_LMP_BIGINT,MPI_SUM,world);
|
||||
MPI_Allreduce(&size,&size_inrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world);
|
||||
MPI_Allreduce(&size,&size_inrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world);
|
||||
|
||||
size = (bigint) nrvous_out*insize;
|
||||
MPI_Allreduce(&size,&size_outrvous_all,1,MPI_LMP_BIGINT,MPI_SUM,world);
|
||||
MPI_Allreduce(&size,&size_outrvous_max,1,MPI_LMP_BIGINT,MPI_MAX,world);
|
||||
MPI_Allreduce(&size,&size_outrvous_min,1,MPI_LMP_BIGINT,MPI_MIN,world);
|
||||
|
||||
size = commsize;
|
||||
MPI_Allreduce(&size,&size_comm_all,1,MPI_LMP_BIGINT,MPI_SUM,world);
|
||||
MPI_Allreduce(&size,&size_comm_max,1,MPI_LMP_BIGINT,MPI_MAX,world);
|
||||
MPI_Allreduce(&size,&size_comm_min,1,MPI_LMP_BIGINT,MPI_MIN,world);
|
||||
|
||||
int mbytes = 1024*1024;
|
||||
|
||||
if (me == 0) {
|
||||
if (screen) {
|
||||
fprintf(screen,"Rendezvous balance and memory info: (tot,ave,max,min) \n");
|
||||
fprintf(screen," input datum count: "
|
||||
BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n",
|
||||
size_in_all/insize,1.0*size_in_all/nprocs/insize,
|
||||
size_in_max/insize,size_in_min/insize);
|
||||
fprintf(screen," input data (MB): %g %g %g %g\n",
|
||||
1.0*size_in_all/mbytes,1.0*size_in_all/nprocs/mbytes,
|
||||
1.0*size_in_max/mbytes,1.0*size_in_min/mbytes);
|
||||
if (outsize)
|
||||
fprintf(screen," output datum count: "
|
||||
BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n",
|
||||
size_out_all/outsize,1.0*size_out_all/nprocs/outsize,
|
||||
size_out_max/outsize,size_out_min/outsize);
|
||||
else
|
||||
fprintf(screen," output datum count: %d %g %d %d\n",0,0.0,0,0);
|
||||
fprintf(screen," output data (MB): %g %g %g %g\n",
|
||||
1.0*size_out_all/mbytes,1.0*size_out_all/nprocs/mbytes,
|
||||
1.0*size_out_max/mbytes,1.0*size_out_min/mbytes);
|
||||
fprintf(screen," input rvous datum count: "
|
||||
BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n",
|
||||
size_inrvous_all/insize,1.0*size_inrvous_all/nprocs/insize,
|
||||
size_inrvous_max/insize,size_inrvous_min/insize);
|
||||
fprintf(screen," input rvous data (MB): %g %g %g %g\n",
|
||||
1.0*size_inrvous_all/mbytes,1.0*size_inrvous_all/nprocs/mbytes,
|
||||
1.0*size_inrvous_max/mbytes,1.0*size_inrvous_min/mbytes);
|
||||
if (outsize)
|
||||
fprintf(screen," output rvous datum count: "
|
||||
BIGINT_FORMAT " %g " BIGINT_FORMAT " " BIGINT_FORMAT "\n",
|
||||
size_outrvous_all/outsize,1.0*size_outrvous_all/nprocs/outsize,
|
||||
size_outrvous_max/outsize,size_outrvous_min/outsize);
|
||||
else
|
||||
fprintf(screen," output rvous datum count: %d %g %d %d\n",0,0.0,0,0);
|
||||
fprintf(screen," output rvous data (MB): %g %g %g %g\n",
|
||||
1.0*size_outrvous_all/mbytes,1.0*size_outrvous_all/nprocs/mbytes,
|
||||
1.0*size_outrvous_max/mbytes,1.0*size_outrvous_min/mbytes);
|
||||
fprintf(screen," rvous comm (MB): %g %g %g %g\n",
|
||||
1.0*size_comm_all/mbytes,1.0*size_comm_all/nprocs/mbytes,
|
||||
1.0*size_comm_max/mbytes,1.0*size_comm_min/mbytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 reads Nlines from file into buf and bcasts buf to all procs
|
||||
caller allocates buf to max size needed
|
||||
|
|
13
src/comm.h
13
src/comm.h
|
@ -109,6 +109,10 @@ class Comm : protected Pointers {
|
|||
|
||||
void ring(int, int, void *, int, void (*)(int, char *, void *),
|
||||
void *, void *, int self = 1);
|
||||
int rendezvous(int, int, char *, int, int, int *,
|
||||
int (*)(int, char *, int &, int *&, char *&, void *),
|
||||
int, char *&, int, void *, int statflag=0);
|
||||
|
||||
int read_lines_from_file(FILE *, int, int, char *);
|
||||
int read_lines_from_file_universe(FILE *, int, int, char *);
|
||||
|
||||
|
@ -142,6 +146,15 @@ class Comm : protected Pointers {
|
|||
int ncores; // # of cores per node
|
||||
int coregrid[3]; // 3d grid of cores within a node
|
||||
int user_coregrid[3]; // user request for cores in each dim
|
||||
|
||||
int rendezvous_irregular(int, char *, int, int, int *,
|
||||
int (*)(int, char *, int &, int *&, char *&, void *),
|
||||
int, char *&, int, void *, int);
|
||||
int rendezvous_all2all(int, char *, int, int, int *,
|
||||
int (*)(int, char *, int &, int *&, char *&, void *),
|
||||
int, char *&, int, void *, int);
|
||||
void rendezvous_stats(int, int, int, int, int, int, bigint);
|
||||
|
||||
public:
|
||||
enum{MULTIPLE};
|
||||
};
|
||||
|
|
|
@ -514,9 +514,6 @@ void CreateAtoms::command(int narg, char **arg)
|
|||
if (domain->triclinic) domain->lamda2x(atom->nlocal);
|
||||
}
|
||||
|
||||
MPI_Barrier(world);
|
||||
double time2 = MPI_Wtime();
|
||||
|
||||
// clean up
|
||||
|
||||
delete ranmol;
|
||||
|
@ -526,21 +523,6 @@ void CreateAtoms::command(int narg, char **arg)
|
|||
delete [] ystr;
|
||||
delete [] zstr;
|
||||
|
||||
// print status
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (screen) {
|
||||
fprintf(screen,"Created " BIGINT_FORMAT " atoms\n",
|
||||
atom->natoms-natoms_previous);
|
||||
fprintf(screen," Time spent = %g secs\n",time2-time1);
|
||||
}
|
||||
if (logfile) {
|
||||
fprintf(logfile,"Created " BIGINT_FORMAT " atoms\n",
|
||||
atom->natoms-natoms_previous);
|
||||
fprintf(logfile," Time spent = %g secs\n",time2-time1);
|
||||
}
|
||||
}
|
||||
|
||||
// for MOLECULE mode:
|
||||
// create special bond lists for molecular systems,
|
||||
// but not for atom style template
|
||||
|
@ -550,6 +532,25 @@ void CreateAtoms::command(int narg, char **arg)
|
|||
if (atom->molecular == 1 && onemol->bondflag && !onemol->specialflag) {
|
||||
Special special(lmp);
|
||||
special.build();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// print status
|
||||
|
||||
MPI_Barrier(world);
|
||||
double time2 = MPI_Wtime();
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (screen) {
|
||||
fprintf(screen,"Created " BIGINT_FORMAT " atoms\n",
|
||||
atom->natoms-natoms_previous);
|
||||
fprintf(screen," create_atoms CPU = %g secs\n",time2-time1);
|
||||
}
|
||||
if (logfile) {
|
||||
fprintf(logfile,"Created " BIGINT_FORMAT " atoms\n",
|
||||
atom->natoms-natoms_previous);
|
||||
fprintf(logfile," create_atoms CPU = %g secs\n",time2-time1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,348 @@
|
|||
// Hash function hashlittle()
|
||||
// from lookup3.c, by Bob Jenkins, May 2006, Public Domain
|
||||
// bob_jenkins@burtleburtle.net
|
||||
|
||||
#include <cmath>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
// if the system defines the __BYTE_ORDER__ define,
|
||||
// we use it instead of guessing the platform
|
||||
|
||||
#if defined(__BYTE_ORDER__)
|
||||
# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
# define HASH_LITTLE_ENDIAN 1
|
||||
# else
|
||||
# define HASH_LITTLE_ENDIAN 0
|
||||
# endif
|
||||
#else // heuristic platform guess
|
||||
# if defined(__bg__)
|
||||
# define HASH_LITTLE_ENDIAN 0 // IBM BlueGene is big endian
|
||||
# else
|
||||
# define HASH_LITTLE_ENDIAN 1 // Intel and AMD x86 are little endian
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
|
||||
|
||||
/*
|
||||
-------------------------------------------------------------------------------
|
||||
mix -- mix 3 32-bit values reversibly.
|
||||
|
||||
This is reversible, so any information in (a,b,c) before mix() is
|
||||
still in (a,b,c) after mix().
|
||||
|
||||
If four pairs of (a,b,c) inputs are run through mix(), or through
|
||||
mix() in reverse, there are at least 32 bits of the output that
|
||||
are sometimes the same for one pair and different for another pair.
|
||||
This was tested for:
|
||||
* pairs that differed by one bit, by two bits, in any combination
|
||||
of top bits of (a,b,c), or in any combination of bottom bits of
|
||||
(a,b,c).
|
||||
* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
|
||||
the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
|
||||
is commonly produced by subtraction) look like a single 1-bit
|
||||
difference.
|
||||
* the base values were pseudorandom, all zero but one bit set, or
|
||||
all zero plus a counter that starts at zero.
|
||||
|
||||
Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
|
||||
satisfy this are
|
||||
4 6 8 16 19 4
|
||||
9 15 3 18 27 15
|
||||
14 9 3 7 17 3
|
||||
Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
|
||||
for "differ" defined as + with a one-bit base and a two-bit delta. I
|
||||
used http://burtleburtle.net/bob/hash/avalanche.html to choose
|
||||
the operations, constants, and arrangements of the variables.
|
||||
|
||||
This does not achieve avalanche. There are input bits of (a,b,c)
|
||||
that fail to affect some output bits of (a,b,c), especially of a. The
|
||||
most thoroughly mixed value is c, but it doesn't really even achieve
|
||||
avalanche in c.
|
||||
|
||||
This allows some parallelism. Read-after-writes are good at doubling
|
||||
the number of bits affected, so the goal of mixing pulls in the opposite
|
||||
direction as the goal of parallelism. I did what I could. Rotates
|
||||
seem to cost as much as shifts on every machine I could lay my hands
|
||||
on, and rotates are much kinder to the top and bottom bits, so I used
|
||||
rotates.
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
#define mix(a,b,c) \
|
||||
{ \
|
||||
a -= c; a ^= rot(c, 4); c += b; \
|
||||
b -= a; b ^= rot(a, 6); a += c; \
|
||||
c -= b; c ^= rot(b, 8); b += a; \
|
||||
a -= c; a ^= rot(c,16); c += b; \
|
||||
b -= a; b ^= rot(a,19); a += c; \
|
||||
c -= b; c ^= rot(b, 4); b += a; \
|
||||
}
|
||||
|
||||
/*
|
||||
-------------------------------------------------------------------------------
|
||||
final -- final mixing of 3 32-bit values (a,b,c) into c
|
||||
|
||||
Pairs of (a,b,c) values differing in only a few bits will usually
|
||||
produce values of c that look totally different. This was tested for
|
||||
* pairs that differed by one bit, by two bits, in any combination
|
||||
of top bits of (a,b,c), or in any combination of bottom bits of
|
||||
(a,b,c).
|
||||
* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
|
||||
the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
|
||||
is commonly produced by subtraction) look like a single 1-bit
|
||||
difference.
|
||||
* the base values were pseudorandom, all zero but one bit set, or
|
||||
all zero plus a counter that starts at zero.
|
||||
|
||||
These constants passed:
|
||||
14 11 25 16 4 14 24
|
||||
12 14 25 16 4 14 24
|
||||
and these came close:
|
||||
4 8 15 26 3 22 24
|
||||
10 8 15 26 3 22 24
|
||||
11 8 15 26 3 22 24
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
#define final(a,b,c) \
|
||||
{ \
|
||||
c ^= b; c -= rot(b,14); \
|
||||
a ^= c; a -= rot(c,11); \
|
||||
b ^= a; b -= rot(a,25); \
|
||||
c ^= b; c -= rot(b,16); \
|
||||
a ^= c; a -= rot(c,4); \
|
||||
b ^= a; b -= rot(a,14); \
|
||||
c ^= b; c -= rot(b,24); \
|
||||
}
|
||||
|
||||
/*
|
||||
-------------------------------------------------------------------------------
|
||||
hashlittle() -- hash a variable-length key into a 32-bit value
|
||||
k : the key (the unaligned variable-length array of bytes)
|
||||
length : the length of the key, counting by bytes
|
||||
initval : can be any 4-byte value
|
||||
Returns a 32-bit value. Every bit of the key affects every bit of
|
||||
the return value. Two keys differing by one or two bits will have
|
||||
totally different hash values.
|
||||
|
||||
The best hash table sizes are powers of 2. There is no need to do
|
||||
mod a prime (mod is sooo slow!). If you need less than 32 bits,
|
||||
use a bitmask. For example, if you need only 10 bits, do
|
||||
h = (h & hashmask(10));
|
||||
In which case, the hash table should have hashsize(10) elements.
|
||||
|
||||
If you are hashing n strings (uint8_t **)k, do it like this:
|
||||
for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
|
||||
|
||||
By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
|
||||
code any way you wish, private, educational, or commercial. It's free.
|
||||
|
||||
Use for hash table lookup, or anything where one collision in 2^^32 is
|
||||
acceptable. Do NOT use for cryptographic purposes.
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
|
||||
{
|
||||
#ifndef PURIFY_HATES_HASHLITTLE
|
||||
|
||||
uint32_t a,b,c; /* internal state */
|
||||
union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
|
||||
|
||||
/* Set up the internal state */
|
||||
a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
|
||||
|
||||
u.ptr = key;
|
||||
if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
|
||||
const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
|
||||
|
||||
/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
|
||||
while (length > 12)
|
||||
{
|
||||
a += k[0];
|
||||
b += k[1];
|
||||
c += k[2];
|
||||
mix(a,b,c);
|
||||
length -= 12;
|
||||
k += 3;
|
||||
}
|
||||
|
||||
/*----------------------------- handle the last (probably partial) block */
|
||||
/*
|
||||
* "k[2]&0xffffff" actually reads beyond the end of the string, but
|
||||
* then masks off the part it's not allowed to read. Because the
|
||||
* string is aligned, the masked-off tail is in the same word as the
|
||||
* rest of the string. Every machine with memory protection I've seen
|
||||
* does it on word boundaries, so is OK with this. But VALGRIND will
|
||||
* still catch it and complain. The masking trick does make the hash
|
||||
* noticably faster for short strings (like English words).
|
||||
*/
|
||||
#ifndef VALGRIND
|
||||
|
||||
switch(length)
|
||||
{
|
||||
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
|
||||
case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
|
||||
case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
|
||||
case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
|
||||
case 8 : b+=k[1]; a+=k[0]; break;
|
||||
case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
|
||||
case 6 : b+=k[1]&0xffff; a+=k[0]; break;
|
||||
case 5 : b+=k[1]&0xff; a+=k[0]; break;
|
||||
case 4 : a+=k[0]; break;
|
||||
case 3 : a+=k[0]&0xffffff; break;
|
||||
case 2 : a+=k[0]&0xffff; break;
|
||||
case 1 : a+=k[0]&0xff; break;
|
||||
case 0 : return c; /* zero length strings require no mixing */
|
||||
}
|
||||
|
||||
#else /* make valgrind happy */
|
||||
|
||||
const uint8_t *k8 = (const uint8_t *)k;
|
||||
switch(length)
|
||||
{
|
||||
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
|
||||
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
|
||||
case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
|
||||
case 9 : c+=k8[8]; /* fall through */
|
||||
case 8 : b+=k[1]; a+=k[0]; break;
|
||||
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
|
||||
case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
|
||||
case 5 : b+=k8[4]; /* fall through */
|
||||
case 4 : a+=k[0]; break;
|
||||
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
|
||||
case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
|
||||
case 1 : a+=k8[0]; break;
|
||||
case 0 : return c;
|
||||
}
|
||||
|
||||
#endif /* !valgrind */
|
||||
|
||||
} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
|
||||
const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
|
||||
const uint8_t *k8;
|
||||
|
||||
/*--------------- all but last block: aligned reads and different mixing */
|
||||
while (length > 12)
|
||||
{
|
||||
a += k[0] + (((uint32_t)k[1])<<16);
|
||||
b += k[2] + (((uint32_t)k[3])<<16);
|
||||
c += k[4] + (((uint32_t)k[5])<<16);
|
||||
mix(a,b,c);
|
||||
length -= 12;
|
||||
k += 6;
|
||||
}
|
||||
|
||||
/*----------------------------- handle the last (probably partial) block */
|
||||
k8 = (const uint8_t *)k;
|
||||
switch(length)
|
||||
{
|
||||
case 12: c+=k[4]+(((uint32_t)k[5])<<16);
|
||||
b+=k[2]+(((uint32_t)k[3])<<16);
|
||||
a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
|
||||
case 10: c+=k[4];
|
||||
b+=k[2]+(((uint32_t)k[3])<<16);
|
||||
a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 9 : c+=k8[8]; /* fall through */
|
||||
case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
|
||||
a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
|
||||
case 6 : b+=k[2];
|
||||
a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 5 : b+=k8[4]; /* fall through */
|
||||
case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
|
||||
case 2 : a+=k[0];
|
||||
break;
|
||||
case 1 : a+=k8[0];
|
||||
break;
|
||||
case 0 : return c; /* zero length requires no mixing */
|
||||
}
|
||||
|
||||
} else { /* need to read the key one byte at a time */
|
||||
const uint8_t *k = (const uint8_t *)key;
|
||||
|
||||
/*--------------- all but the last block: affect some 32 bits of (a,b,c) */
|
||||
while (length > 12)
|
||||
{
|
||||
a += k[0];
|
||||
a += ((uint32_t)k[1])<<8;
|
||||
a += ((uint32_t)k[2])<<16;
|
||||
a += ((uint32_t)k[3])<<24;
|
||||
b += k[4];
|
||||
b += ((uint32_t)k[5])<<8;
|
||||
b += ((uint32_t)k[6])<<16;
|
||||
b += ((uint32_t)k[7])<<24;
|
||||
c += k[8];
|
||||
c += ((uint32_t)k[9])<<8;
|
||||
c += ((uint32_t)k[10])<<16;
|
||||
c += ((uint32_t)k[11])<<24;
|
||||
mix(a,b,c);
|
||||
length -= 12;
|
||||
k += 12;
|
||||
}
|
||||
|
||||
/*-------------------------------- last block: affect all 32 bits of (c) */
|
||||
switch(length) /* all the case statements fall through */
|
||||
{
|
||||
case 12: c+=((uint32_t)k[11])<<24;
|
||||
case 11: c+=((uint32_t)k[10])<<16;
|
||||
case 10: c+=((uint32_t)k[9])<<8;
|
||||
case 9 : c+=k[8];
|
||||
case 8 : b+=((uint32_t)k[7])<<24;
|
||||
case 7 : b+=((uint32_t)k[6])<<16;
|
||||
case 6 : b+=((uint32_t)k[5])<<8;
|
||||
case 5 : b+=k[4];
|
||||
case 4 : a+=((uint32_t)k[3])<<24;
|
||||
case 3 : a+=((uint32_t)k[2])<<16;
|
||||
case 2 : a+=((uint32_t)k[1])<<8;
|
||||
case 1 : a+=k[0];
|
||||
break;
|
||||
case 0 : return c;
|
||||
}
|
||||
}
|
||||
|
||||
final(a,b,c);
|
||||
return c;
|
||||
|
||||
#else /* PURIFY_HATES_HASHLITTLE */
|
||||
/* I don't know what it is about Jenkins' hashlittle function, but
|
||||
* it drives purify insane, even with VALGRIND defined. It makes
|
||||
* purify unusable!! The code execution doesn't even make sense.
|
||||
* Below is a (probably) weaker hash function that at least allows
|
||||
* testing with purify.
|
||||
*/
|
||||
#define MAXINT_DIV_PHI 11400714819323198485U
|
||||
|
||||
uint32_t h, rest, *p, bytes, num_bytes;
|
||||
char *byteptr;
|
||||
|
||||
num_bytes = length;
|
||||
|
||||
/* First hash the uint32_t-sized portions of the key */
|
||||
h = 0;
|
||||
for (p = (uint32_t *)key, bytes=num_bytes;
|
||||
bytes >= (uint32_t) sizeof(uint32_t);
|
||||
bytes-=sizeof(uint32_t), p++){
|
||||
h = (h^(*p))*MAXINT_DIV_PHI;
|
||||
}
|
||||
|
||||
/* Then take care of the remaining bytes, if any */
|
||||
rest = 0;
|
||||
for (byteptr = (char *)p; bytes > 0; bytes--, byteptr++){
|
||||
rest = (rest<<8) | (*byteptr);
|
||||
}
|
||||
|
||||
/* If extra bytes, merge the two parts */
|
||||
if (rest)
|
||||
h = (h^rest)*MAXINT_DIV_PHI;
|
||||
|
||||
return h;
|
||||
#endif /* PURIFY_HATES_HASHLITTLE */
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
// Hash function hashlittle()
|
||||
// from lookup3.c, by Bob Jenkins, May 2006, Public Domain
|
||||
// bob_jenkins@burtleburtle.net
|
||||
|
||||
uint32_t hashlittle(const void *key, size_t length, uint32_t);
|
|
@ -501,7 +501,8 @@ int compare_standalone(const int i, const int j, void *ptr)
|
|||
|
||||
void Irregular::exchange_atom(double *sendbuf, int *sizes, double *recvbuf)
|
||||
{
|
||||
int i,m,n,offset,count;
|
||||
int i,m,n,count;
|
||||
bigint offset;
|
||||
|
||||
// post all receives
|
||||
|
||||
|
@ -621,6 +622,7 @@ int Irregular::create_data(int n, int *proclist, int sortflag)
|
|||
num_send = new int[nsend_proc];
|
||||
index_send = new int[n-work1[me]];
|
||||
index_self = new int[work1[me]];
|
||||
maxindex = n;
|
||||
|
||||
// proc_send = procs I send to
|
||||
// num_send = # of datums I send to each proc
|
||||
|
@ -678,8 +680,182 @@ int Irregular::create_data(int n, int *proclist, int sortflag)
|
|||
|
||||
// receive incoming messages
|
||||
// proc_recv = procs I recv from
|
||||
// num_recv = total size of message each proc sends me
|
||||
// nrecvdatum = total size of data I recv
|
||||
// num_recv = # of datums each proc sends me
|
||||
// nrecvdatum = total # of datums I recv
|
||||
|
||||
int nrecvdatum = 0;
|
||||
for (i = 0; i < nrecv_proc; i++) {
|
||||
MPI_Recv(&num_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status);
|
||||
proc_recv[i] = status->MPI_SOURCE;
|
||||
nrecvdatum += num_recv[i];
|
||||
}
|
||||
nrecvdatum += num_self;
|
||||
|
||||
// sort proc_recv and num_recv by proc ID if requested
|
||||
// useful for debugging to insure reproducible ordering of received datums
|
||||
|
||||
if (sortflag) {
|
||||
int *order = new int[nrecv_proc];
|
||||
int *proc_recv_ordered = new int[nrecv_proc];
|
||||
int *num_recv_ordered = new int[nrecv_proc];
|
||||
|
||||
for (i = 0; i < nrecv_proc; i++) order[i] = i;
|
||||
|
||||
#if defined(LMP_QSORT)
|
||||
proc_recv_copy = proc_recv;
|
||||
qsort(order,nrecv_proc,sizeof(int),compare_standalone);
|
||||
#else
|
||||
merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
|
||||
#endif
|
||||
|
||||
int j;
|
||||
for (i = 0; i < nrecv_proc; i++) {
|
||||
j = order[i];
|
||||
proc_recv_ordered[i] = proc_recv[j];
|
||||
num_recv_ordered[i] = num_recv[j];
|
||||
}
|
||||
|
||||
memcpy(proc_recv,proc_recv_ordered,nrecv_proc*sizeof(int));
|
||||
memcpy(num_recv,num_recv_ordered,nrecv_proc*sizeof(int));
|
||||
delete [] order;
|
||||
delete [] proc_recv_ordered;
|
||||
delete [] num_recv_ordered;
|
||||
}
|
||||
|
||||
// barrier to insure all MPI_ANY_SOURCE messages are received
|
||||
// else another proc could proceed to exchange_data() and send to me
|
||||
|
||||
MPI_Barrier(world);
|
||||
|
||||
// return # of datums I will receive
|
||||
|
||||
return nrecvdatum;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
create communication plan based on list of datums of uniform size
|
||||
n = # of datums to send
|
||||
procs = how many datums to send to each proc, must include self
|
||||
sort = flag for sorting order of received messages by proc ID
|
||||
return total # of datums I will recv, including any to self
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int Irregular::create_data_grouped(int n, int *procs, int sortflag)
|
||||
{
|
||||
int i,j,k,m;
|
||||
|
||||
// setup for collective comm
|
||||
// work1 = # of datums I send to each proc, set self to 0
|
||||
// work2 = 1 for all procs, used for ReduceScatter
|
||||
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
work1[i] = procs[i];
|
||||
work2[i] = 1;
|
||||
}
|
||||
work1[me] = 0;
|
||||
|
||||
// nrecv_proc = # of procs I receive messages from, not including self
|
||||
// options for performing ReduceScatter operation
|
||||
// some are more efficient on some machines at big sizes
|
||||
|
||||
#ifdef LAMMPS_RS_ALLREDUCE_INPLACE
|
||||
MPI_Allreduce(MPI_IN_PLACE,work1,nprocs,MPI_INT,MPI_SUM,world);
|
||||
nrecv_proc = work1[me];
|
||||
#else
|
||||
#ifdef LAMMPS_RS_ALLREDUCE
|
||||
MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world);
|
||||
nrecv_proc = work2[me];
|
||||
#else
|
||||
MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// allocate receive arrays
|
||||
|
||||
proc_recv = new int[nrecv_proc];
|
||||
num_recv = new int[nrecv_proc];
|
||||
request = new MPI_Request[nrecv_proc];
|
||||
status = new MPI_Status[nrecv_proc];
|
||||
|
||||
// work1 = # of datums I send to each proc, including self
|
||||
// nsend_proc = # of procs I send messages to, not including self
|
||||
|
||||
for (i = 0; i < nprocs; i++) work1[i] = procs[i];
|
||||
|
||||
nsend_proc = 0;
|
||||
for (i = 0; i < nprocs; i++)
|
||||
if (work1[i]) nsend_proc++;
|
||||
if (work1[me]) nsend_proc--;
|
||||
|
||||
// allocate send and self arrays
|
||||
|
||||
proc_send = new int[nsend_proc];
|
||||
num_send = new int[nsend_proc];
|
||||
index_send = new int[n-work1[me]];
|
||||
index_self = new int[work1[me]];
|
||||
maxindex = n;
|
||||
|
||||
// proc_send = procs I send to
|
||||
// num_send = # of datums I send to each proc
|
||||
// num_self = # of datums I copy to self
|
||||
// to balance pattern of send messages:
|
||||
// each proc begins with iproc > me, continues until iproc = me
|
||||
// reset work1 to store which send message each proc corresponds to
|
||||
|
||||
int iproc = me;
|
||||
int isend = 0;
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
iproc++;
|
||||
if (iproc == nprocs) iproc = 0;
|
||||
if (iproc == me) {
|
||||
num_self = work1[iproc];
|
||||
work1[iproc] = 0;
|
||||
} else if (work1[iproc] > 0) {
|
||||
proc_send[isend] = iproc;
|
||||
num_send[isend] = work1[iproc];
|
||||
work1[iproc] = isend;
|
||||
isend++;
|
||||
}
|
||||
}
|
||||
|
||||
// work2 = offsets into index_send for each proc I send to
|
||||
// m = ptr into index_self
|
||||
// index_send = list of which datums to send to each proc
|
||||
// 1st N1 values are datum indices for 1st proc,
|
||||
// next N2 values are datum indices for 2nd proc, etc
|
||||
// index_self = list of which datums to copy to self
|
||||
|
||||
work2[0] = 0;
|
||||
for (i = 1; i < nsend_proc; i++) work2[i] = work2[i-1] + num_send[i-1];
|
||||
|
||||
m = 0;
|
||||
i = 0;
|
||||
for (iproc = 0; iproc < nprocs; iproc++) {
|
||||
k = procs[iproc];
|
||||
for (j = 0; j < k; j++) {
|
||||
if (iproc == me) index_self[m++] = i++;
|
||||
else {
|
||||
isend = work1[iproc];
|
||||
index_send[work2[isend]++] = i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// tell receivers how much data I send
|
||||
// sendmax_proc = largest # of datums I send in a single message
|
||||
|
||||
sendmax_proc = 0;
|
||||
for (i = 0; i < nsend_proc; i++) {
|
||||
MPI_Request tmpReq; // Use non-blocking send to avoid possible deadlock
|
||||
MPI_Isend(&num_send[i],1,MPI_INT,proc_send[i],0,world,&tmpReq);
|
||||
MPI_Request_free(&tmpReq); // the MPI_Barrier below marks completion
|
||||
sendmax_proc = MAX(sendmax_proc,num_send[i]);
|
||||
}
|
||||
|
||||
// receive incoming messages
|
||||
// proc_recv = procs I recv from
|
||||
// num_recv = # of datums each proc sends me
|
||||
// nrecvdatum = total # of datums I recv
|
||||
|
||||
int nrecvdatum = 0;
|
||||
for (i = 0; i < nrecv_proc; i++) {
|
||||
|
@ -739,11 +915,13 @@ int Irregular::create_data(int n, int *proclist, int sortflag)
|
|||
|
||||
void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf)
|
||||
{
|
||||
int i,m,n,offset,count;
|
||||
int i,n,count;
|
||||
bigint m; // these 2 lines enable send/recv buf to be larger than 2 GB
|
||||
char *dest;
|
||||
|
||||
// post all receives, starting after self copies
|
||||
|
||||
offset = num_self*nbytes;
|
||||
bigint offset = num_self*nbytes;
|
||||
for (int irecv = 0; irecv < nrecv_proc; irecv++) {
|
||||
MPI_Irecv(&recvbuf[offset],num_recv[irecv]*nbytes,MPI_CHAR,
|
||||
proc_recv[irecv],0,world,&request[irecv]);
|
||||
|
@ -765,23 +943,34 @@ void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf)
|
|||
n = 0;
|
||||
for (int isend = 0; isend < nsend_proc; isend++) {
|
||||
count = num_send[isend];
|
||||
dest = buf;
|
||||
for (i = 0; i < count; i++) {
|
||||
m = index_send[n++];
|
||||
memcpy(&buf[i*nbytes],&sendbuf[m*nbytes],nbytes);
|
||||
memcpy(dest,&sendbuf[m*nbytes],nbytes);
|
||||
dest += nbytes;
|
||||
}
|
||||
MPI_Send(buf,count*nbytes,MPI_CHAR,proc_send[isend],0,world);
|
||||
}
|
||||
|
||||
// copy datums to self, put at beginning of recvbuf
|
||||
|
||||
dest = recvbuf;
|
||||
for (i = 0; i < num_self; i++) {
|
||||
m = index_self[i];
|
||||
memcpy(&recvbuf[i*nbytes],&sendbuf[m*nbytes],nbytes);
|
||||
memcpy(dest,&sendbuf[m*nbytes],nbytes);
|
||||
dest += nbytes;
|
||||
}
|
||||
|
||||
// wait on all incoming messages
|
||||
|
||||
if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status);
|
||||
|
||||
// approximate memory tally
|
||||
// DEBUG lines
|
||||
|
||||
//bigint irregular_bytes = 2*nprocs*sizeof(int);
|
||||
//irregular_bytes += maxindex*sizeof(int);
|
||||
//irregular_bytes += maxbuf;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
|
|
@ -33,6 +33,7 @@ class Irregular : protected Pointers {
|
|||
int *procassign = NULL);
|
||||
int migrate_check();
|
||||
int create_data(int, int *, int sortflag = 0);
|
||||
int create_data_grouped(int, int *, int sortflag = 0);
|
||||
void exchange_data(char *, int, char *);
|
||||
void destroy_data();
|
||||
bigint memory_usage();
|
||||
|
@ -48,6 +49,7 @@ class Irregular : protected Pointers {
|
|||
double *dbuf; // double buf for largest single atom send
|
||||
int maxbuf; // size of char buf in bytes
|
||||
char *buf; // char buf for largest single data send
|
||||
int maxindex; // combined size of index_send + index_self
|
||||
|
||||
int *mproclist,*msizes; // persistent vectors in migrate_atoms
|
||||
int maxlocal; // allocated size of mproclist and msizes
|
||||
|
|
|
@ -378,7 +378,7 @@ void PairTable::read_table(Table *tb, char *file, char *keyword)
|
|||
utils::sfgets(FLERR,line,MAXLINE,fp,file,error); // no match, skip section
|
||||
param_extract(tb,line);
|
||||
utils::sfgets(FLERR,line,MAXLINE,fp,file,error);
|
||||
for (int i = 0; i < tb->ninput; i++)
|
||||
for (int i = 0; i < tb->ninput; i++)
|
||||
utils::sfgets(FLERR,line,MAXLINE,fp,file,error);
|
||||
}
|
||||
|
||||
|
|
|
@ -120,6 +120,9 @@ void ReadData::command(int narg, char **arg)
|
|||
{
|
||||
if (narg < 1) error->all(FLERR,"Illegal read_data command");
|
||||
|
||||
MPI_Barrier(world);
|
||||
double time1 = MPI_Wtime();
|
||||
|
||||
// optional args
|
||||
|
||||
addflag = NONE;
|
||||
|
@ -906,6 +909,18 @@ void ReadData::command(int narg, char **arg)
|
|||
|
||||
force->kspace = saved_kspace;
|
||||
}
|
||||
|
||||
// total time
|
||||
|
||||
MPI_Barrier(world);
|
||||
double time2 = MPI_Wtime();
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen," read_data CPU = %g secs\n",time2-time1);
|
||||
if (logfile)
|
||||
fprintf(logfile," read_data CPU = %g secs\n",time2-time1);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
|
|
@ -94,7 +94,7 @@ ReadDump::~ReadDump()
|
|||
|
||||
memory->destroy(fields);
|
||||
memory->destroy(buf);
|
||||
|
||||
|
||||
for (int i = 0; i < nreader; i++) delete readers[i];
|
||||
delete [] readers;
|
||||
delete [] nsnapatoms;
|
||||
|
@ -359,7 +359,7 @@ bigint ReadDump::seek(bigint nrequest, int exact)
|
|||
readers[i]->skip();
|
||||
}
|
||||
|
||||
if (eofflag)
|
||||
if (eofflag)
|
||||
error->one(FLERR,"Read dump parallel files "
|
||||
"do not all have same timestep");
|
||||
}
|
||||
|
@ -466,7 +466,7 @@ bigint ReadDump::next(bigint ncurrent, bigint nlast, int nevery, int nskip)
|
|||
readers[i]->skip();
|
||||
}
|
||||
|
||||
if (eofflag)
|
||||
if (eofflag)
|
||||
error->one(FLERR,"Read dump parallel files "
|
||||
"do not all have same timestep");
|
||||
}
|
||||
|
@ -724,7 +724,7 @@ void ReadDump::read_atoms()
|
|||
otherproc = 0;
|
||||
ofirst = (bigint) otherproc * nsnap/nprocs_cluster;
|
||||
olast = (bigint) (otherproc+1) * nsnap/nprocs_cluster;
|
||||
if (olast-ofirst > MAXSMALLINT)
|
||||
if (olast-ofirst > MAXSMALLINT)
|
||||
error->one(FLERR,"Read dump snapshot is too large for a proc");
|
||||
nnew = static_cast<int> (olast - ofirst);
|
||||
|
||||
|
@ -765,7 +765,7 @@ void ReadDump::read_atoms()
|
|||
} else {
|
||||
ofirst = (bigint) me_cluster * nsnap/nprocs_cluster;
|
||||
olast = (bigint) (me_cluster+1) * nsnap/nprocs_cluster;
|
||||
if (olast-ofirst > MAXSMALLINT)
|
||||
if (olast-ofirst > MAXSMALLINT)
|
||||
error->one(FLERR,"Read dump snapshot is too large for a proc");
|
||||
nnew = static_cast<int> (olast - ofirst);
|
||||
if (nnew > maxnew || maxnew == 0) {
|
||||
|
@ -791,7 +791,7 @@ void ReadDump::read_atoms()
|
|||
bigint sum = 0;
|
||||
for (int i = 0; i < nreader; i++)
|
||||
sum += nsnapatoms[i];
|
||||
if (sum > MAXSMALLINT)
|
||||
if (sum > MAXSMALLINT)
|
||||
error->one(FLERR,"Read dump snapshot is too large for a proc");
|
||||
nnew = static_cast<int> (sum);
|
||||
if (nnew > maxnew || maxnew == 0) {
|
||||
|
@ -811,7 +811,7 @@ void ReadDump::read_atoms()
|
|||
}
|
||||
nnew += nsnap;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -943,7 +943,7 @@ void ReadDump::process_atoms()
|
|||
ntrim++;
|
||||
} else i++;
|
||||
}
|
||||
|
||||
|
||||
atom->nlocal = nlocal;
|
||||
bigint nblocal = atom->nlocal;
|
||||
MPI_Allreduce(&nblocal,&atom->natoms,1,MPI_LMP_BIGINT,MPI_SUM,world);
|
||||
|
|
|
@ -86,7 +86,7 @@ private:
|
|||
double xlo,xhi,ylo,yhi,zlo,zhi,xy,xz,yz; // dump snapshot box params
|
||||
double xprd,yprd,zprd;
|
||||
|
||||
bigint *nsnapatoms; // # of atoms in one snapshot from
|
||||
bigint *nsnapatoms; // # of atoms in one snapshot from
|
||||
// one (parallel) dump file
|
||||
// nreader-length vector b/c a reader proc
|
||||
// may read from multiple parallel dump files
|
||||
|
|
|
@ -81,6 +81,9 @@ void ReadRestart::command(int narg, char **arg)
|
|||
if (domain->box_exist)
|
||||
error->all(FLERR,"Cannot read_restart after simulation box is defined");
|
||||
|
||||
MPI_Barrier(world);
|
||||
double time1 = MPI_Wtime();
|
||||
|
||||
MPI_Comm_rank(world,&me);
|
||||
MPI_Comm_size(world,&nprocs);
|
||||
|
||||
|
@ -562,6 +565,18 @@ void ReadRestart::command(int narg, char **arg)
|
|||
Special special(lmp);
|
||||
special.build();
|
||||
}
|
||||
|
||||
// total time
|
||||
|
||||
MPI_Barrier(world);
|
||||
double time2 = MPI_Wtime();
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen," read_restart CPU = %g secs\n",time2-time1);
|
||||
if (logfile)
|
||||
fprintf(logfile," read_restart CPU = %g secs\n",time2-time1);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
|
|
@ -76,7 +76,7 @@ void Replicate::command(int narg, char **arg)
|
|||
if (atom->nextra_grow || atom->nextra_restart || atom->nextra_store)
|
||||
error->all(FLERR,"Cannot replicate with fixes that store atom quantities");
|
||||
|
||||
// Record wall time for atom replication
|
||||
// record wall time for atom replication
|
||||
|
||||
MPI_Barrier(world);
|
||||
double time1 = MPI_Wtime();
|
||||
|
@ -762,15 +762,15 @@ void Replicate::command(int narg, char **arg)
|
|||
special.build();
|
||||
}
|
||||
|
||||
// Wall time
|
||||
// total time
|
||||
|
||||
MPI_Barrier(world);
|
||||
double time2 = MPI_Wtime();
|
||||
|
||||
if (me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen," Time spent = %g secs\n",time2-time1);
|
||||
fprintf(screen," replicate CPU = %g secs\n",time2-time1);
|
||||
if (logfile)
|
||||
fprintf(logfile," Time spent = %g secs\n",time2-time1);
|
||||
fprintf(logfile," replicate CPU = %g secs\n",time2-time1);
|
||||
}
|
||||
}
|
||||
|
|
1422
src/special.cpp
1422
src/special.cpp
File diff suppressed because it is too large
Load Diff
|
@ -26,29 +26,43 @@ class Special : protected Pointers {
|
|||
|
||||
private:
|
||||
int me,nprocs;
|
||||
int maxall;
|
||||
tagint **onetwo,**onethree,**onefour;
|
||||
|
||||
// data used by ring callback methods
|
||||
// data used by rendezvous callback methods
|
||||
|
||||
int *count;
|
||||
int **dflag;
|
||||
int nrvous;
|
||||
tagint *atomIDs;
|
||||
int *procowner;
|
||||
|
||||
struct IDRvous {
|
||||
int me;
|
||||
tagint atomID;
|
||||
};
|
||||
|
||||
struct PairRvous {
|
||||
tagint atomID,partnerID;
|
||||
};
|
||||
|
||||
// private methods
|
||||
|
||||
void atom_owners();
|
||||
void onetwo_build_newton();
|
||||
void onetwo_build_newton_off();
|
||||
void onethree_build();
|
||||
void onefour_build();
|
||||
|
||||
void dedup();
|
||||
void angle_trim();
|
||||
void dihedral_trim();
|
||||
void combine();
|
||||
void fix_alteration();
|
||||
void timer_output(double);
|
||||
|
||||
// callback functions for ring communication
|
||||
// callback functions for rendezvous communication
|
||||
|
||||
static void ring_one(int, char *, void *);
|
||||
static void ring_two(int, char *, void *);
|
||||
static void ring_three(int, char *, void *);
|
||||
static void ring_four(int, char *, void *);
|
||||
static void ring_five(int, char *, void *);
|
||||
static void ring_six(int, char *, void *);
|
||||
static void ring_seven(int, char *, void *);
|
||||
static void ring_eight(int, char *, void *);
|
||||
static int rendezvous_ids(int, char *, int &, int *&, char *&, void *);
|
||||
static int rendezvous_pairs(int, char *, int &, int *&, char *&, void *);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue