forked from lijiext/lammps
rendevous comm option for special bonds and fix rigid/small
This commit is contained in:
parent
2f20581b29
commit
9912c44413
|
@ -34,6 +34,7 @@
|
|||
#include "variable.h"
|
||||
#include "random_mars.h"
|
||||
#include "math_const.h"
|
||||
#include "hashlittle.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
|
@ -70,8 +71,7 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
|
|||
xcmimage(NULL), displace(NULL), eflags(NULL), orient(NULL), dorient(NULL),
|
||||
avec_ellipsoid(NULL), avec_line(NULL), avec_tri(NULL), counts(NULL),
|
||||
itensor(NULL), mass_body(NULL), langextra(NULL), random(NULL),
|
||||
id_dilate(NULL), onemols(NULL), hash(NULL), bbox(NULL), ctr(NULL),
|
||||
idclose(NULL), rsqclose(NULL)
|
||||
id_dilate(NULL), onemols(NULL)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -107,18 +107,18 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
|
|||
// parse args for rigid body specification
|
||||
|
||||
int *mask = atom->mask;
|
||||
tagint *bodyid = NULL;
|
||||
tagint *bodyID = NULL;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
if (narg < 4) error->all(FLERR,"Illegal fix rigid/small command");
|
||||
if (strcmp(arg[3],"molecule") == 0) {
|
||||
if (atom->molecule_flag == 0)
|
||||
error->all(FLERR,"Fix rigid/small requires atom attribute molecule");
|
||||
bodyid = atom->molecule;
|
||||
bodyID = atom->molecule;
|
||||
|
||||
} else if (strcmp(arg[3],"custom") == 0) {
|
||||
if (narg < 5) error->all(FLERR,"Illegal fix rigid/small command");
|
||||
bodyid = new tagint[nlocal];
|
||||
bodyID = new tagint[nlocal];
|
||||
customflag = 1;
|
||||
|
||||
// determine whether atom-style variable or atom property is used.
|
||||
|
@ -126,9 +126,11 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
|
|||
int is_double=0;
|
||||
int custom_index = atom->find_custom(arg[4]+2,is_double);
|
||||
if (custom_index == -1)
|
||||
error->all(FLERR,"Fix rigid/small custom requires previously defined property/atom");
|
||||
error->all(FLERR,"Fix rigid/small custom requires "
|
||||
"previously defined property/atom");
|
||||
else if (is_double)
|
||||
error->all(FLERR,"Fix rigid/small custom requires integer-valued property/atom");
|
||||
error->all(FLERR,"Fix rigid/small custom requires "
|
||||
"integer-valued property/atom");
|
||||
|
||||
int minval = INT_MAX;
|
||||
int *value = atom->ivector[custom_index];
|
||||
|
@ -139,15 +141,17 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
|
|||
|
||||
for (i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit)
|
||||
bodyid[i] = (tagint)(value[i] - minval + 1);
|
||||
else bodyid[i] = 0;
|
||||
bodyID[i] = (tagint)(value[i] - minval + 1);
|
||||
else bodyID[i] = 0;
|
||||
|
||||
} else if (strstr(arg[4],"v_") == arg[4]) {
|
||||
int ivariable = input->variable->find(arg[4]+2);
|
||||
if (ivariable < 0)
|
||||
error->all(FLERR,"Variable name for fix rigid/small custom does not exist");
|
||||
error->all(FLERR,"Variable name for fix rigid/small custom "
|
||||
"does not exist");
|
||||
if (input->variable->atomstyle(ivariable) == 0)
|
||||
error->all(FLERR,"Fix rigid/small custom variable is no atom-style variable");
|
||||
error->all(FLERR,"Fix rigid/small custom variable is not "
|
||||
"atom-style variable");
|
||||
double *value = new double[nlocal];
|
||||
input->variable->compute_atom(ivariable,0,value,1,0);
|
||||
int minval = INT_MAX;
|
||||
|
@ -158,8 +162,8 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
|
|||
|
||||
for (i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit)
|
||||
bodyid[i] = (tagint)((tagint)value[i] - minval + 1);
|
||||
else bodyid[0] = 0;
|
||||
bodyID[i] = (tagint)((tagint)value[i] - minval + 1);
|
||||
else bodyID[0] = 0;
|
||||
delete[] value;
|
||||
} else error->all(FLERR,"Unsupported fix rigid custom property");
|
||||
} else error->all(FLERR,"Illegal fix rigid/small command");
|
||||
|
@ -167,10 +171,11 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
|
|||
if (atom->map_style == 0)
|
||||
error->all(FLERR,"Fix rigid/small requires an atom map, see atom_modify");
|
||||
|
||||
// maxmol = largest bodyid #
|
||||
// maxmol = largest bodyID #
|
||||
|
||||
maxmol = -1;
|
||||
for (i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit) maxmol = MAX(maxmol,bodyid[i]);
|
||||
if (mask[i] & groupbit) maxmol = MAX(maxmol,bodyID[i]);
|
||||
|
||||
tagint itmp;
|
||||
MPI_Allreduce(&maxmol,&itmp,1,MPI_LMP_TAGINT,MPI_MAX,world);
|
||||
|
@ -400,8 +405,19 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
|
|||
// sets bodytag for owned atoms
|
||||
// body attributes are computed later by setup_bodies()
|
||||
|
||||
create_bodies(bodyid);
|
||||
if (customflag) delete [] bodyid;
|
||||
double time1 = MPI_Wtime();
|
||||
|
||||
create_bodies(bodyID);
|
||||
if (customflag) delete [] bodyID;
|
||||
|
||||
double time2 = MPI_Wtime();
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen," create_bodies CPU = %g secs\n",time2-time1);
|
||||
if (logfile)
|
||||
fprintf(logfile," create_bodies CPU = %g secs\n",time2-time1);
|
||||
}
|
||||
|
||||
// set nlocal_body and allocate bodies I own
|
||||
|
||||
|
@ -1514,175 +1530,71 @@ void FixRigidSmall::set_v()
|
|||
set bodytag for all owned atoms
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void FixRigidSmall::create_bodies(tagint *bodyid)
|
||||
void FixRigidSmall::create_bodies(tagint *bodyID)
|
||||
{
|
||||
int i,m,n;
|
||||
double unwrap[3];
|
||||
int i,m;
|
||||
|
||||
// error check on image flags of atoms in rigid bodies
|
||||
|
||||
imageint *image = atom->image;
|
||||
// allocate buffer for input to rendezvous comm
|
||||
// ncount = # of my atoms in bodies
|
||||
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
int *periodicity = domain->periodicity;
|
||||
int xbox,ybox,zbox;
|
||||
|
||||
int flag = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
if (!(mask[i] & groupbit)) continue;
|
||||
xbox = (image[i] & IMGMASK) - IMGMAX;
|
||||
ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX;
|
||||
zbox = (image[i] >> IMG2BITS) - IMGMAX;
|
||||
if ((xbox && !periodicity[0]) || (ybox && !periodicity[1]) ||
|
||||
(zbox && !periodicity[2])) flag = 1;
|
||||
}
|
||||
|
||||
int flagall;
|
||||
MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
|
||||
if (flagall) error->all(FLERR,"Fix rigid/small atom has non-zero image flag "
|
||||
"in a non-periodic dimension");
|
||||
|
||||
// allocate buffer for passing messages around ring of procs
|
||||
// percount = max number of values to put in buffer for each of ncount
|
||||
|
||||
int ncount = 0;
|
||||
for (i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit) ncount++;
|
||||
|
||||
int percount = 5;
|
||||
double *buf;
|
||||
memory->create(buf,ncount*percount,"rigid/small:buf");
|
||||
int *proclist;
|
||||
memory->create(proclist,ncount,"rigid/small:proclist");
|
||||
InRvous *inbuf = (InRvous *)
|
||||
memory->smalloc(ncount*sizeof(InRvous),"rigid/small:inbuf");
|
||||
|
||||
// create map hash for storing unique body IDs of my atoms
|
||||
// key = body ID
|
||||
// value = index into per-body data structure
|
||||
// n = # of entries in hash
|
||||
|
||||
hash = new std::map<tagint,int>();
|
||||
hash->clear();
|
||||
|
||||
// setup hash
|
||||
// key = body ID
|
||||
// value = index into N-length data structure
|
||||
// n = count of unique bodies my atoms are part of
|
||||
|
||||
n = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
if (!(mask[i] & groupbit)) continue;
|
||||
if (hash->find(bodyid[i]) == hash->end()) (*hash)[bodyid[i]] = n++;
|
||||
}
|
||||
|
||||
// bbox = bounding box of each rigid body my atoms are part of
|
||||
|
||||
memory->create(bbox,n,6,"rigid/small:bbox");
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
bbox[i][0] = bbox[i][2] = bbox[i][4] = BIG;
|
||||
bbox[i][1] = bbox[i][3] = bbox[i][5] = -BIG;
|
||||
}
|
||||
|
||||
// pack my atoms into buffer as body ID, unwrapped coords
|
||||
// setup buf to pass to rendezvous comm
|
||||
// one BodyMsg datum for each constituent atom
|
||||
// datum = me, local index of atom, atomID, bodyID, unwrapped coords
|
||||
// owning proc for each datum = random hash of bodyID
|
||||
|
||||
double **x = atom->x;
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
if (!(mask[i] & groupbit)) continue;
|
||||
domain->unmap(x[i],image[i],unwrap);
|
||||
buf[m++] = bodyid[i];
|
||||
buf[m++] = unwrap[0];
|
||||
buf[m++] = unwrap[1];
|
||||
buf[m++] = unwrap[2];
|
||||
}
|
||||
|
||||
// pass buffer around ring of procs
|
||||
// func = update bbox with atom coords from every proc
|
||||
// when done, have full bbox for every rigid body my atoms are part of
|
||||
|
||||
comm->ring(m,sizeof(double),buf,1,ring_bbox,NULL,(void *)this);
|
||||
|
||||
// check if any bbox is size 0.0, meaning rigid body is a single particle
|
||||
|
||||
flag = 0;
|
||||
for (i = 0; i < n; i++)
|
||||
if (bbox[i][0] == bbox[i][1] && bbox[i][2] == bbox[i][3] &&
|
||||
bbox[i][4] == bbox[i][5]) flag = 1;
|
||||
MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
|
||||
if (flagall)
|
||||
error->all(FLERR,"One or more rigid bodies are a single particle");
|
||||
|
||||
// ctr = center pt of each rigid body my atoms are part of
|
||||
|
||||
memory->create(ctr,n,6,"rigid/small:bbox");
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
ctr[i][0] = 0.5 * (bbox[i][0] + bbox[i][1]);
|
||||
ctr[i][1] = 0.5 * (bbox[i][2] + bbox[i][3]);
|
||||
ctr[i][2] = 0.5 * (bbox[i][4] + bbox[i][5]);
|
||||
}
|
||||
|
||||
// idclose = ID of atom in body closest to center pt (smaller ID if tied)
|
||||
// rsqclose = distance squared from idclose to center pt
|
||||
|
||||
memory->create(idclose,n,"rigid/small:idclose");
|
||||
memory->create(rsqclose,n,"rigid/small:rsqclose");
|
||||
|
||||
for (i = 0; i < n; i++) rsqclose[i] = BIG;
|
||||
|
||||
// pack my atoms into buffer as body ID, atom ID, unwrapped coords
|
||||
|
||||
tagint *tag = atom->tag;
|
||||
imageint *image = atom->image;
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
if (!(mask[i] & groupbit)) continue;
|
||||
domain->unmap(x[i],image[i],unwrap);
|
||||
buf[m++] = bodyid[i];
|
||||
buf[m++] = ubuf(tag[i]).d;
|
||||
buf[m++] = unwrap[0];
|
||||
buf[m++] = unwrap[1];
|
||||
buf[m++] = unwrap[2];
|
||||
proclist[m] = hashlittle(&bodyID[i],sizeof(tagint),0) % nprocs;
|
||||
inbuf[m].me = me;
|
||||
inbuf[m].ilocal = i;
|
||||
inbuf[m].atomID = tag[i];
|
||||
inbuf[m].bodyID = bodyID[i];
|
||||
domain->unmap(x[i],image[i],inbuf[m].x);
|
||||
m++;
|
||||
}
|
||||
|
||||
// pass buffer around ring of procs
|
||||
// func = update idclose,rsqclose with atom IDs from every proc
|
||||
// when done, have idclose for every rigid body my atoms are part of
|
||||
// perform rendezvous operation
|
||||
// each proc owns random subset of bodies, receives all atoms in the bodies
|
||||
// func = compute bbox of each body, flag atom closest to geometric center
|
||||
// when done: each atom has atom ID of owning atom of its body
|
||||
|
||||
comm->ring(m,sizeof(double),buf,2,ring_nearest,NULL,(void *)this);
|
||||
char *buf;
|
||||
int nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous),
|
||||
rendezvous_body,buf,sizeof(OutRvous),
|
||||
(void *) this);
|
||||
OutRvous *outbuf = (OutRvous *) buf;
|
||||
|
||||
memory->destroy(proclist);
|
||||
memory->sfree(inbuf);
|
||||
|
||||
// set bodytag of all owned atoms, based on idclose
|
||||
// find max value of rsqclose across all procs
|
||||
// set bodytag of all owned atoms based on outbuf info for constituent atoms
|
||||
|
||||
double rsqmax = 0.0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
bodytag[i] = 0;
|
||||
if (!(mask[i] & groupbit)) continue;
|
||||
m = hash->find(bodyid[i])->second;
|
||||
bodytag[i] = idclose[m];
|
||||
rsqmax = MAX(rsqmax,rsqclose[m]);
|
||||
}
|
||||
for (i = 0; i < nlocal; i++)
|
||||
if (!(mask[i] & groupbit)) bodytag[i] = 0;
|
||||
|
||||
// pack my atoms into buffer as bodytag of owning atom, unwrapped coords
|
||||
for (m = 0; m < nreturn; m++)
|
||||
bodytag[outbuf[m].ilocal] = outbuf[m].atomID;
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
if (!(mask[i] & groupbit)) continue;
|
||||
domain->unmap(x[i],image[i],unwrap);
|
||||
buf[m++] = ubuf(bodytag[i]).d;
|
||||
buf[m++] = unwrap[0];
|
||||
buf[m++] = unwrap[1];
|
||||
buf[m++] = unwrap[2];
|
||||
}
|
||||
memory->sfree(outbuf);
|
||||
|
||||
// pass buffer around ring of procs
|
||||
// func = update rsqfar for atoms belonging to bodies I own
|
||||
// when done, have rsqfar for all atoms in bodies I own
|
||||
|
||||
rsqfar = 0.0;
|
||||
comm->ring(m,sizeof(double),buf,3,ring_farthest,NULL,(void *)this);
|
||||
|
||||
// find maxextent of rsqfar across all procs
|
||||
// maxextent = max of rsqfar across all procs
|
||||
// if defined, include molecule->maxextent
|
||||
|
||||
MPI_Allreduce(&rsqfar,&maxextent,1,MPI_DOUBLE,MPI_MAX,world);
|
||||
|
@ -1691,125 +1603,151 @@ void FixRigidSmall::create_bodies(tagint *bodyid)
|
|||
for (int i = 0; i < nmol; i++)
|
||||
maxextent = MAX(maxextent,onemols[i]->maxextent);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
process rigid bodies assigned to me
|
||||
buf = list of N BodyMsg datums
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int FixRigidSmall::rendezvous_body(int n, char *inbuf,
|
||||
int *&proclist, char *&outbuf,
|
||||
void *ptr)
|
||||
{
|
||||
int i,j,m;
|
||||
double delx,dely,delz,rsq;
|
||||
int *iclose;
|
||||
tagint *idclose;
|
||||
double *x,*xown,*rsqclose;
|
||||
double **bbox,**ctr;
|
||||
|
||||
FixRigidSmall *frsptr = (FixRigidSmall *) ptr;
|
||||
Memory *memory = frsptr->memory;
|
||||
Error *error = frsptr->error;
|
||||
MPI_Comm world = frsptr->world;
|
||||
|
||||
// setup hash
|
||||
// ncount = number of bodies assigned to me
|
||||
// key = body ID
|
||||
// value = index into Ncount-length data structure
|
||||
|
||||
InRvous *in = (InRvous *) inbuf;
|
||||
std::map<tagint,int> hash;
|
||||
tagint id;
|
||||
|
||||
int ncount = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
id = in[i].bodyID;
|
||||
if (hash.find(id) == hash.end()) hash[id] = ncount++;
|
||||
}
|
||||
|
||||
// bbox = bounding box of each rigid body
|
||||
|
||||
memory->create(bbox,ncount,6,"rigid/small:bbox");
|
||||
|
||||
for (m = 0; m < ncount; m++) {
|
||||
bbox[m][0] = bbox[m][2] = bbox[m][4] = BIG;
|
||||
bbox[m][1] = bbox[m][3] = bbox[m][5] = -BIG;
|
||||
}
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
m = hash.find(in[i].bodyID)->second;
|
||||
x = in[i].x;
|
||||
bbox[m][0] = MIN(bbox[m][0],x[0]);
|
||||
bbox[m][1] = MAX(bbox[m][1],x[0]);
|
||||
bbox[m][2] = MIN(bbox[m][2],x[1]);
|
||||
bbox[m][3] = MAX(bbox[m][3],x[1]);
|
||||
bbox[m][4] = MIN(bbox[m][4],x[2]);
|
||||
bbox[m][5] = MAX(bbox[m][5],x[2]);
|
||||
}
|
||||
|
||||
// check if any bbox is size 0.0, meaning rigid body is a single particle
|
||||
|
||||
int flag = 0;
|
||||
for (m = 0; m < ncount; m++)
|
||||
if (bbox[m][0] == bbox[m][1] && bbox[m][2] == bbox[m][3] &&
|
||||
bbox[m][4] == bbox[m][5]) flag = 1;
|
||||
int flagall;
|
||||
MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); // sync here?
|
||||
if (flagall)
|
||||
error->all(FLERR,"One or more rigid bodies are a single particle");
|
||||
|
||||
// ctr = geometric center pt of each rigid body
|
||||
|
||||
memory->create(ctr,ncount,3,"rigid/small:bbox");
|
||||
|
||||
for (m = 0; m < ncount; m++) {
|
||||
ctr[m][0] = 0.5 * (bbox[m][0] + bbox[m][1]);
|
||||
ctr[m][1] = 0.5 * (bbox[m][2] + bbox[m][3]);
|
||||
ctr[m][2] = 0.5 * (bbox[m][4] + bbox[m][5]);
|
||||
}
|
||||
|
||||
// idclose = atomID closest to center point of each body
|
||||
|
||||
memory->create(idclose,ncount,"rigid/small:idclose");
|
||||
memory->create(iclose,ncount,"rigid/small:iclose");
|
||||
memory->create(rsqclose,ncount,"rigid/small:rsqclose");
|
||||
for (m = 0; m < ncount; m++) rsqclose[m] = BIG;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
m = hash.find(in[i].bodyID)->second;
|
||||
x = in[i].x;
|
||||
delx = x[0] - ctr[m][0];
|
||||
dely = x[1] - ctr[m][1];
|
||||
delz = x[2] - ctr[m][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
if (rsq <= rsqclose[m]) {
|
||||
if (rsq == rsqclose[m] && in[i].atomID > idclose[m]) continue;
|
||||
iclose[m] = i;
|
||||
idclose[m] = in[i].atomID;
|
||||
rsqclose[m] = rsq;
|
||||
}
|
||||
}
|
||||
|
||||
// compute rsqfar for all bodies I own
|
||||
// set rsqfar back in caller
|
||||
|
||||
double rsqfar = 0.0;
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
m = hash.find(in[i].bodyID)->second;
|
||||
xown = in[iclose[m]].x;
|
||||
x = in[i].x;
|
||||
delx = x[0] - xown[0];
|
||||
dely = x[1] - xown[1];
|
||||
delz = x[2] - xown[2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
rsqfar = MAX(rsqfar,rsq);
|
||||
}
|
||||
|
||||
frsptr->rsqfar = rsqfar;
|
||||
|
||||
// pass list of OutRvous datums back to comm->rendezvous
|
||||
|
||||
int nout = n;
|
||||
memory->create(proclist,nout,"rigid/small:proclist");
|
||||
OutRvous *out = (OutRvous *)
|
||||
memory->smalloc(nout*sizeof(OutRvous),"rigid/small:out");
|
||||
|
||||
for (int i = 0; i < nout; i++) {
|
||||
proclist[i] = in[i].me;
|
||||
out[i].ilocal = in[i].ilocal;
|
||||
m = hash.find(in[i].bodyID)->second;
|
||||
out[i].atomID = idclose[m];
|
||||
}
|
||||
|
||||
outbuf = (char *) out;
|
||||
|
||||
// clean up
|
||||
// Comm::rendezvous will delete proclist and out (outbuf)
|
||||
|
||||
delete hash;
|
||||
memory->destroy(buf);
|
||||
memory->destroy(bbox);
|
||||
memory->destroy(ctr);
|
||||
memory->destroy(idclose);
|
||||
memory->destroy(iclose);
|
||||
memory->destroy(rsqclose);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
process rigid body atoms from another proc
|
||||
update bounding box for rigid bodies my atoms are part of
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void FixRigidSmall::ring_bbox(int n, char *cbuf, void *ptr)
|
||||
{
|
||||
FixRigidSmall *frsptr = (FixRigidSmall *) ptr;
|
||||
std::map<tagint,int> *hash = frsptr->hash;
|
||||
double **bbox = frsptr->bbox;
|
||||
|
||||
double *buf = (double *) cbuf;
|
||||
int ndatums = n/4;
|
||||
|
||||
int j,imol;
|
||||
double *x;
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < ndatums; i++, m += 4) {
|
||||
imol = static_cast<int> (buf[m]);
|
||||
if (hash->find(imol) != hash->end()) {
|
||||
j = hash->find(imol)->second;
|
||||
x = &buf[m+1];
|
||||
bbox[j][0] = MIN(bbox[j][0],x[0]);
|
||||
bbox[j][1] = MAX(bbox[j][1],x[0]);
|
||||
bbox[j][2] = MIN(bbox[j][2],x[1]);
|
||||
bbox[j][3] = MAX(bbox[j][3],x[1]);
|
||||
bbox[j][4] = MIN(bbox[j][4],x[2]);
|
||||
bbox[j][5] = MAX(bbox[j][5],x[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
process rigid body atoms from another proc
|
||||
update nearest atom to body center for rigid bodies my atoms are part of
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void FixRigidSmall::ring_nearest(int n, char *cbuf, void *ptr)
|
||||
{
|
||||
FixRigidSmall *frsptr = (FixRigidSmall *) ptr;
|
||||
std::map<tagint,int> *hash = frsptr->hash;
|
||||
double **ctr = frsptr->ctr;
|
||||
tagint *idclose = frsptr->idclose;
|
||||
double *rsqclose = frsptr->rsqclose;
|
||||
|
||||
double *buf = (double *) cbuf;
|
||||
int ndatums = n/5;
|
||||
|
||||
int j,imol;
|
||||
tagint tag;
|
||||
double delx,dely,delz,rsq;
|
||||
double *x;
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < ndatums; i++, m += 5) {
|
||||
imol = static_cast<int> (buf[m]);
|
||||
if (hash->find(imol) != hash->end()) {
|
||||
j = hash->find(imol)->second;
|
||||
tag = (tagint) ubuf(buf[m+1]).i;
|
||||
x = &buf[m+2];
|
||||
delx = x[0] - ctr[j][0];
|
||||
dely = x[1] - ctr[j][1];
|
||||
delz = x[2] - ctr[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
if (rsq <= rsqclose[j]) {
|
||||
if (rsq == rsqclose[j] && tag > idclose[j]) continue;
|
||||
idclose[j] = tag;
|
||||
rsqclose[j] = rsq;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
process rigid body atoms from another proc
|
||||
update rsqfar = distance from owning atom to other atom
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void FixRigidSmall::ring_farthest(int n, char *cbuf, void *ptr)
|
||||
{
|
||||
FixRigidSmall *frsptr = (FixRigidSmall *) ptr;
|
||||
double **x = frsptr->atom->x;
|
||||
imageint *image = frsptr->atom->image;
|
||||
int nlocal = frsptr->atom->nlocal;
|
||||
|
||||
double *buf = (double *) cbuf;
|
||||
int ndatums = n/4;
|
||||
|
||||
int iowner;
|
||||
tagint tag;
|
||||
double delx,dely,delz,rsq;
|
||||
double *xx;
|
||||
double unwrap[3];
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < ndatums; i++, m += 4) {
|
||||
tag = (tagint) ubuf(buf[m]).i;
|
||||
iowner = frsptr->atom->map(tag);
|
||||
if (iowner < 0 || iowner >= nlocal) continue;
|
||||
frsptr->domain->unmap(x[iowner],image[iowner],unwrap);
|
||||
xx = &buf[m+1];
|
||||
delx = xx[0] - unwrap[0];
|
||||
dely = xx[1] - unwrap[1];
|
||||
delz = xx[2] - unwrap[2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
frsptr->rsqfar = MAX(frsptr->rsqfar,rsq);
|
||||
}
|
||||
return nout;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -2472,9 +2410,9 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody)
|
|||
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
hash = new std::map<tagint,int>();
|
||||
std::map<tagint,int> hash;
|
||||
for (i = 0; i < nlocal; i++)
|
||||
if (bodyown[i] >= 0) (*hash)[atom->molecule[i]] = bodyown[i];
|
||||
if (bodyown[i] >= 0) hash[atom->molecule[i]] = bodyown[i];
|
||||
|
||||
// open file and read header
|
||||
|
||||
|
@ -2533,11 +2471,11 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody)
|
|||
id = ATOTAGINT(values[0]);
|
||||
if (id <= 0 || id > maxmol)
|
||||
error->all(FLERR,"Invalid rigid body ID in fix rigid/small file");
|
||||
if (hash->find(id) == hash->end()) {
|
||||
if (hash.find(id) == hash.end()) {
|
||||
buf = next + 1;
|
||||
continue;
|
||||
}
|
||||
m = (*hash)[id];
|
||||
m = hash[id];
|
||||
inbody[m] = 1;
|
||||
|
||||
if (which == 0) {
|
||||
|
@ -2576,7 +2514,6 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody)
|
|||
|
||||
delete [] buffer;
|
||||
delete [] values;
|
||||
delete hash;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
|
|
@ -23,7 +23,7 @@ FixStyle(rigid/small,FixRigidSmall)
|
|||
#include "fix.h"
|
||||
|
||||
// replace this later
|
||||
#include <map>
|
||||
//#include <map>
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
|
@ -180,13 +180,21 @@ class FixRigidSmall : public Fix {
|
|||
|
||||
// class data used by ring communication callbacks
|
||||
|
||||
std::map<tagint,int> *hash;
|
||||
double **bbox;
|
||||
double **ctr;
|
||||
tagint *idclose;
|
||||
double *rsqclose;
|
||||
double rsqfar;
|
||||
|
||||
struct InRvous {
|
||||
int me,ilocal;
|
||||
tagint atomID,bodyID;
|
||||
double x[3];
|
||||
};
|
||||
|
||||
struct OutRvous {
|
||||
int ilocal;
|
||||
tagint atomID;
|
||||
};
|
||||
|
||||
// local methods
|
||||
|
||||
void image_shift();
|
||||
void set_xv();
|
||||
void set_v();
|
||||
|
@ -199,11 +207,9 @@ class FixRigidSmall : public Fix {
|
|||
void grow_body();
|
||||
void reset_atom2body();
|
||||
|
||||
// callback functions for ring communication
|
||||
// callback function for rendezvous communication
|
||||
|
||||
static void ring_bbox(int, char *, void *);
|
||||
static void ring_nearest(int, char *, void *);
|
||||
static void ring_farthest(int, char *, void *);
|
||||
static int rendezvous_body(int, char *, int *&, char *&, void *);
|
||||
|
||||
// debug
|
||||
|
||||
|
|
51
src/comm.cpp
51
src/comm.cpp
|
@ -28,6 +28,7 @@
|
|||
#include "dump.h"
|
||||
#include "group.h"
|
||||
#include "procmap.h"
|
||||
#include "irregular.h"
|
||||
#include "accelerator_kokkos.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
@ -725,6 +726,56 @@ void Comm::ring(int n, int nper, void *inbuf, int messtag,
|
|||
memory->destroy(bufcopy);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
rendezvous communication operation
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int Comm::rendezvous(int n, int *proclist, char *inbuf, int insize,
|
||||
int (*callback)(int, char *, int *&, char *&, void *),
|
||||
char *&outbuf, int outsize, void *ptr)
|
||||
{
|
||||
// comm data from caller decomposition to rendezvous decomposition
|
||||
|
||||
Irregular *irregular = new Irregular(lmp);
|
||||
|
||||
int n_rvous = irregular->create_data(n,proclist); // add sort
|
||||
char *inbuf_rvous = (char *) memory->smalloc((bigint) n_rvous*insize,
|
||||
"rendezvous:inbuf_rvous");
|
||||
irregular->exchange_data(inbuf,insize,inbuf_rvous);
|
||||
|
||||
irregular->destroy_data();
|
||||
delete irregular;
|
||||
|
||||
// peform rendezvous computation via callback()
|
||||
// callback() allocates proclist_rvous and outbuf_rvous
|
||||
|
||||
int *proclist_rvous;
|
||||
char *outbuf_rvous;
|
||||
|
||||
int nout_rvous =
|
||||
callback(n_rvous,inbuf_rvous,proclist_rvous,outbuf_rvous,ptr);
|
||||
|
||||
memory->sfree(inbuf_rvous);
|
||||
|
||||
// comm data from rendezvous decomposition back to caller
|
||||
// caller will free outbuf
|
||||
|
||||
irregular = new Irregular(lmp);
|
||||
|
||||
int nout = irregular->create_data(nout_rvous,proclist_rvous);
|
||||
outbuf = (char *) memory->smalloc((bigint) nout*outsize,"rendezvous:outbuf");
|
||||
irregular->exchange_data(outbuf_rvous,outsize,outbuf);
|
||||
|
||||
irregular->destroy_data();
|
||||
delete irregular;
|
||||
memory->destroy(proclist_rvous);
|
||||
memory->sfree(outbuf_rvous);
|
||||
|
||||
// return number of datums
|
||||
|
||||
return nout;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 reads Nlines from file into buf and bcasts buf to all procs
|
||||
caller allocates buf to max size needed
|
||||
|
|
|
@ -109,6 +109,10 @@ class Comm : protected Pointers {
|
|||
|
||||
void ring(int, int, void *, int, void (*)(int, char *, void *),
|
||||
void *, void *, int self = 1);
|
||||
int rendezvous(int, int *, char *, int,
|
||||
int (*)(int, char *, int *&, char *&, void *),
|
||||
char *&, int, void *);
|
||||
|
||||
int read_lines_from_file(FILE *, int, int, char *);
|
||||
int read_lines_from_file_universe(FILE *, int, int, char *);
|
||||
|
||||
|
|
|
@ -514,9 +514,6 @@ void CreateAtoms::command(int narg, char **arg)
|
|||
if (domain->triclinic) domain->lamda2x(atom->nlocal);
|
||||
}
|
||||
|
||||
MPI_Barrier(world);
|
||||
double time2 = MPI_Wtime();
|
||||
|
||||
// clean up
|
||||
|
||||
delete ranmol;
|
||||
|
@ -526,21 +523,6 @@ void CreateAtoms::command(int narg, char **arg)
|
|||
delete [] ystr;
|
||||
delete [] zstr;
|
||||
|
||||
// print status
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (screen) {
|
||||
fprintf(screen,"Created " BIGINT_FORMAT " atoms\n",
|
||||
atom->natoms-natoms_previous);
|
||||
fprintf(screen," Time spent = %g secs\n",time2-time1);
|
||||
}
|
||||
if (logfile) {
|
||||
fprintf(logfile,"Created " BIGINT_FORMAT " atoms\n",
|
||||
atom->natoms-natoms_previous);
|
||||
fprintf(logfile," Time spent = %g secs\n",time2-time1);
|
||||
}
|
||||
}
|
||||
|
||||
// for MOLECULE mode:
|
||||
// create special bond lists for molecular systems,
|
||||
// but not for atom style template
|
||||
|
@ -550,6 +532,25 @@ void CreateAtoms::command(int narg, char **arg)
|
|||
if (atom->molecular == 1 && onemol->bondflag && !onemol->specialflag) {
|
||||
Special special(lmp);
|
||||
special.build();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// print status
|
||||
|
||||
MPI_Barrier(world);
|
||||
double time2 = MPI_Wtime();
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (screen) {
|
||||
fprintf(screen,"Created " BIGINT_FORMAT " atoms\n",
|
||||
atom->natoms-natoms_previous);
|
||||
fprintf(screen," create_atoms CPU = %g secs\n",time2-time1);
|
||||
}
|
||||
if (logfile) {
|
||||
fprintf(logfile,"Created " BIGINT_FORMAT " atoms\n",
|
||||
atom->natoms-natoms_previous);
|
||||
fprintf(logfile," create_atoms CPU = %g secs\n",time2-time1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,333 @@
|
|||
// Hash function hashlittle()
|
||||
// from lookup3.c, by Bob Jenkins, May 2006, Public Domain
|
||||
// bob_jenkins@burtleburtle.net
|
||||
|
||||
#include "stddef.h"
|
||||
#include "stdint.h"
|
||||
|
||||
#define HASH_LITTLE_ENDIAN 1 // Intel and AMD are little endian
|
||||
|
||||
#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
|
||||
|
||||
/*
|
||||
-------------------------------------------------------------------------------
|
||||
mix -- mix 3 32-bit values reversibly.
|
||||
|
||||
This is reversible, so any information in (a,b,c) before mix() is
|
||||
still in (a,b,c) after mix().
|
||||
|
||||
If four pairs of (a,b,c) inputs are run through mix(), or through
|
||||
mix() in reverse, there are at least 32 bits of the output that
|
||||
are sometimes the same for one pair and different for another pair.
|
||||
This was tested for:
|
||||
* pairs that differed by one bit, by two bits, in any combination
|
||||
of top bits of (a,b,c), or in any combination of bottom bits of
|
||||
(a,b,c).
|
||||
* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
|
||||
the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
|
||||
is commonly produced by subtraction) look like a single 1-bit
|
||||
difference.
|
||||
* the base values were pseudorandom, all zero but one bit set, or
|
||||
all zero plus a counter that starts at zero.
|
||||
|
||||
Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
|
||||
satisfy this are
|
||||
4 6 8 16 19 4
|
||||
9 15 3 18 27 15
|
||||
14 9 3 7 17 3
|
||||
Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
|
||||
for "differ" defined as + with a one-bit base and a two-bit delta. I
|
||||
used http://burtleburtle.net/bob/hash/avalanche.html to choose
|
||||
the operations, constants, and arrangements of the variables.
|
||||
|
||||
This does not achieve avalanche. There are input bits of (a,b,c)
|
||||
that fail to affect some output bits of (a,b,c), especially of a. The
|
||||
most thoroughly mixed value is c, but it doesn't really even achieve
|
||||
avalanche in c.
|
||||
|
||||
This allows some parallelism. Read-after-writes are good at doubling
|
||||
the number of bits affected, so the goal of mixing pulls in the opposite
|
||||
direction as the goal of parallelism. I did what I could. Rotates
|
||||
seem to cost as much as shifts on every machine I could lay my hands
|
||||
on, and rotates are much kinder to the top and bottom bits, so I used
|
||||
rotates.
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
#define mix(a,b,c) \
|
||||
{ \
|
||||
a -= c; a ^= rot(c, 4); c += b; \
|
||||
b -= a; b ^= rot(a, 6); a += c; \
|
||||
c -= b; c ^= rot(b, 8); b += a; \
|
||||
a -= c; a ^= rot(c,16); c += b; \
|
||||
b -= a; b ^= rot(a,19); a += c; \
|
||||
c -= b; c ^= rot(b, 4); b += a; \
|
||||
}
|
||||
|
||||
/*
|
||||
-------------------------------------------------------------------------------
|
||||
final -- final mixing of 3 32-bit values (a,b,c) into c
|
||||
|
||||
Pairs of (a,b,c) values differing in only a few bits will usually
|
||||
produce values of c that look totally different. This was tested for
|
||||
* pairs that differed by one bit, by two bits, in any combination
|
||||
of top bits of (a,b,c), or in any combination of bottom bits of
|
||||
(a,b,c).
|
||||
* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
|
||||
the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
|
||||
is commonly produced by subtraction) look like a single 1-bit
|
||||
difference.
|
||||
* the base values were pseudorandom, all zero but one bit set, or
|
||||
all zero plus a counter that starts at zero.
|
||||
|
||||
These constants passed:
|
||||
14 11 25 16 4 14 24
|
||||
12 14 25 16 4 14 24
|
||||
and these came close:
|
||||
4 8 15 26 3 22 24
|
||||
10 8 15 26 3 22 24
|
||||
11 8 15 26 3 22 24
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
#define final(a,b,c) \
|
||||
{ \
|
||||
c ^= b; c -= rot(b,14); \
|
||||
a ^= c; a -= rot(c,11); \
|
||||
b ^= a; b -= rot(a,25); \
|
||||
c ^= b; c -= rot(b,16); \
|
||||
a ^= c; a -= rot(c,4); \
|
||||
b ^= a; b -= rot(a,14); \
|
||||
c ^= b; c -= rot(b,24); \
|
||||
}
|
||||
|
||||
/*
|
||||
-------------------------------------------------------------------------------
|
||||
hashlittle() -- hash a variable-length key into a 32-bit value
|
||||
k : the key (the unaligned variable-length array of bytes)
|
||||
length : the length of the key, counting by bytes
|
||||
initval : can be any 4-byte value
|
||||
Returns a 32-bit value. Every bit of the key affects every bit of
|
||||
the return value. Two keys differing by one or two bits will have
|
||||
totally different hash values.
|
||||
|
||||
The best hash table sizes are powers of 2. There is no need to do
|
||||
mod a prime (mod is sooo slow!). If you need less than 32 bits,
|
||||
use a bitmask. For example, if you need only 10 bits, do
|
||||
h = (h & hashmask(10));
|
||||
In which case, the hash table should have hashsize(10) elements.
|
||||
|
||||
If you are hashing n strings (uint8_t **)k, do it like this:
|
||||
for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
|
||||
|
||||
By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
|
||||
code any way you wish, private, educational, or commercial. It's free.
|
||||
|
||||
Use for hash table lookup, or anything where one collision in 2^^32 is
|
||||
acceptable. Do NOT use for cryptographic purposes.
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
|
||||
{
|
||||
#ifndef PURIFY_HATES_HASHLITTLE
|
||||
|
||||
uint32_t a,b,c; /* internal state */
|
||||
union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
|
||||
|
||||
/* Set up the internal state */
|
||||
a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
|
||||
|
||||
u.ptr = key;
|
||||
if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
|
||||
const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
|
||||
const uint8_t *k8;
|
||||
|
||||
/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
|
||||
while (length > 12)
|
||||
{
|
||||
a += k[0];
|
||||
b += k[1];
|
||||
c += k[2];
|
||||
mix(a,b,c);
|
||||
length -= 12;
|
||||
k += 3;
|
||||
}
|
||||
|
||||
/*----------------------------- handle the last (probably partial) block */
|
||||
/*
|
||||
* "k[2]&0xffffff" actually reads beyond the end of the string, but
|
||||
* then masks off the part it's not allowed to read. Because the
|
||||
* string is aligned, the masked-off tail is in the same word as the
|
||||
* rest of the string. Every machine with memory protection I've seen
|
||||
* does it on word boundaries, so is OK with this. But VALGRIND will
|
||||
* still catch it and complain. The masking trick does make the hash
|
||||
* noticably faster for short strings (like English words).
|
||||
*/
|
||||
#ifndef VALGRIND
|
||||
|
||||
switch(length)
|
||||
{
|
||||
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
|
||||
case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
|
||||
case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
|
||||
case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
|
||||
case 8 : b+=k[1]; a+=k[0]; break;
|
||||
case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
|
||||
case 6 : b+=k[1]&0xffff; a+=k[0]; break;
|
||||
case 5 : b+=k[1]&0xff; a+=k[0]; break;
|
||||
case 4 : a+=k[0]; break;
|
||||
case 3 : a+=k[0]&0xffffff; break;
|
||||
case 2 : a+=k[0]&0xffff; break;
|
||||
case 1 : a+=k[0]&0xff; break;
|
||||
case 0 : return c; /* zero length strings require no mixing */
|
||||
}
|
||||
|
||||
#else /* make valgrind happy */
|
||||
|
||||
k8 = (const uint8_t *)k;
|
||||
switch(length)
|
||||
{
|
||||
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
|
||||
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
|
||||
case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
|
||||
case 9 : c+=k8[8]; /* fall through */
|
||||
case 8 : b+=k[1]; a+=k[0]; break;
|
||||
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
|
||||
case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
|
||||
case 5 : b+=k8[4]; /* fall through */
|
||||
case 4 : a+=k[0]; break;
|
||||
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
|
||||
case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
|
||||
case 1 : a+=k8[0]; break;
|
||||
case 0 : return c;
|
||||
}
|
||||
|
||||
#endif /* !valgrind */
|
||||
|
||||
} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
|
||||
const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
|
||||
const uint8_t *k8;
|
||||
|
||||
/*--------------- all but last block: aligned reads and different mixing */
|
||||
while (length > 12)
|
||||
{
|
||||
a += k[0] + (((uint32_t)k[1])<<16);
|
||||
b += k[2] + (((uint32_t)k[3])<<16);
|
||||
c += k[4] + (((uint32_t)k[5])<<16);
|
||||
mix(a,b,c);
|
||||
length -= 12;
|
||||
k += 6;
|
||||
}
|
||||
|
||||
/*----------------------------- handle the last (probably partial) block */
|
||||
k8 = (const uint8_t *)k;
|
||||
switch(length)
|
||||
{
|
||||
case 12: c+=k[4]+(((uint32_t)k[5])<<16);
|
||||
b+=k[2]+(((uint32_t)k[3])<<16);
|
||||
a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
|
||||
case 10: c+=k[4];
|
||||
b+=k[2]+(((uint32_t)k[3])<<16);
|
||||
a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 9 : c+=k8[8]; /* fall through */
|
||||
case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
|
||||
a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
|
||||
case 6 : b+=k[2];
|
||||
a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 5 : b+=k8[4]; /* fall through */
|
||||
case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
|
||||
case 2 : a+=k[0];
|
||||
break;
|
||||
case 1 : a+=k8[0];
|
||||
break;
|
||||
case 0 : return c; /* zero length requires no mixing */
|
||||
}
|
||||
|
||||
} else { /* need to read the key one byte at a time */
|
||||
const uint8_t *k = (const uint8_t *)key;
|
||||
|
||||
/*--------------- all but the last block: affect some 32 bits of (a,b,c) */
|
||||
while (length > 12)
|
||||
{
|
||||
a += k[0];
|
||||
a += ((uint32_t)k[1])<<8;
|
||||
a += ((uint32_t)k[2])<<16;
|
||||
a += ((uint32_t)k[3])<<24;
|
||||
b += k[4];
|
||||
b += ((uint32_t)k[5])<<8;
|
||||
b += ((uint32_t)k[6])<<16;
|
||||
b += ((uint32_t)k[7])<<24;
|
||||
c += k[8];
|
||||
c += ((uint32_t)k[9])<<8;
|
||||
c += ((uint32_t)k[10])<<16;
|
||||
c += ((uint32_t)k[11])<<24;
|
||||
mix(a,b,c);
|
||||
length -= 12;
|
||||
k += 12;
|
||||
}
|
||||
|
||||
/*-------------------------------- last block: affect all 32 bits of (c) */
|
||||
switch(length) /* all the case statements fall through */
|
||||
{
|
||||
case 12: c+=((uint32_t)k[11])<<24;
|
||||
case 11: c+=((uint32_t)k[10])<<16;
|
||||
case 10: c+=((uint32_t)k[9])<<8;
|
||||
case 9 : c+=k[8];
|
||||
case 8 : b+=((uint32_t)k[7])<<24;
|
||||
case 7 : b+=((uint32_t)k[6])<<16;
|
||||
case 6 : b+=((uint32_t)k[5])<<8;
|
||||
case 5 : b+=k[4];
|
||||
case 4 : a+=((uint32_t)k[3])<<24;
|
||||
case 3 : a+=((uint32_t)k[2])<<16;
|
||||
case 2 : a+=((uint32_t)k[1])<<8;
|
||||
case 1 : a+=k[0];
|
||||
break;
|
||||
case 0 : return c;
|
||||
}
|
||||
}
|
||||
|
||||
final(a,b,c);
|
||||
return c;
|
||||
|
||||
#else /* PURIFY_HATES_HASHLITTLE */
|
||||
/* I don't know what it is about Jenkins' hashlittle function, but
|
||||
* it drives purify insane, even with VALGRIND defined. It makes
|
||||
* purify unusable!! The code execution doesn't even make sense.
|
||||
* Below is a (probably) weaker hash function that at least allows
|
||||
* testing with purify.
|
||||
*/
|
||||
#define MAXINT_DIV_PHI 11400714819323198485U
|
||||
|
||||
uint32_t h, rest, *p, bytes, num_bytes;
|
||||
char *byteptr;
|
||||
|
||||
num_bytes = length;
|
||||
|
||||
/* First hash the uint32_t-sized portions of the key */
|
||||
h = 0;
|
||||
for (p = (uint32_t *)key, bytes=num_bytes;
|
||||
bytes >= (uint32_t) sizeof(uint32_t);
|
||||
bytes-=sizeof(uint32_t), p++){
|
||||
h = (h^(*p))*MAXINT_DIV_PHI;
|
||||
}
|
||||
|
||||
/* Then take care of the remaining bytes, if any */
|
||||
rest = 0;
|
||||
for (byteptr = (char *)p; bytes > 0; bytes--, byteptr++){
|
||||
rest = (rest<<8) | (*byteptr);
|
||||
}
|
||||
|
||||
/* If extra bytes, merge the two parts */
|
||||
if (rest)
|
||||
h = (h^rest)*MAXINT_DIV_PHI;
|
||||
|
||||
return h;
|
||||
#endif /* PURIFY_HATES_HASHLITTLE */
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
// Hash function hashlittle()
|
||||
// from lookup3.c, by Bob Jenkins, May 2006, Public Domain
|
||||
// bob_jenkins@burtleburtle.net
|
||||
|
||||
uint32_t hashlittle(const void *key, size_t length, uint32_t);
|
|
@ -501,7 +501,8 @@ int compare_standalone(const int i, const int j, void *ptr)
|
|||
|
||||
void Irregular::exchange_atom(double *sendbuf, int *sizes, double *recvbuf)
|
||||
{
|
||||
int i,m,n,offset,count;
|
||||
int i,m,n,count;
|
||||
bigint offset;
|
||||
|
||||
// post all receives
|
||||
|
||||
|
@ -739,11 +740,13 @@ int Irregular::create_data(int n, int *proclist, int sortflag)
|
|||
|
||||
void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf)
|
||||
{
|
||||
int i,m,n,offset,count;
|
||||
int i,n,count;
|
||||
bigint m; // these 2 lines enable send/recv buf to be larger than 2 GB
|
||||
char *dest;
|
||||
|
||||
// post all receives, starting after self copies
|
||||
|
||||
offset = num_self*nbytes;
|
||||
bigint offset = num_self*nbytes;
|
||||
for (int irecv = 0; irecv < nrecv_proc; irecv++) {
|
||||
MPI_Irecv(&recvbuf[offset],num_recv[irecv]*nbytes,MPI_CHAR,
|
||||
proc_recv[irecv],0,world,&request[irecv]);
|
||||
|
@ -765,18 +768,22 @@ void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf)
|
|||
n = 0;
|
||||
for (int isend = 0; isend < nsend_proc; isend++) {
|
||||
count = num_send[isend];
|
||||
dest = buf;
|
||||
for (i = 0; i < count; i++) {
|
||||
m = index_send[n++];
|
||||
memcpy(&buf[i*nbytes],&sendbuf[m*nbytes],nbytes);
|
||||
memcpy(dest,&sendbuf[m*nbytes],nbytes);
|
||||
dest += nbytes;
|
||||
}
|
||||
MPI_Send(buf,count*nbytes,MPI_CHAR,proc_send[isend],0,world);
|
||||
}
|
||||
|
||||
// copy datums to self, put at beginning of recvbuf
|
||||
|
||||
dest = recvbuf;
|
||||
for (i = 0; i < num_self; i++) {
|
||||
m = index_self[i];
|
||||
memcpy(&recvbuf[i*nbytes],&sendbuf[m*nbytes],nbytes);
|
||||
memcpy(dest,&sendbuf[m*nbytes],nbytes);
|
||||
dest += nbytes;
|
||||
}
|
||||
|
||||
// wait on all incoming messages
|
||||
|
|
|
@ -120,6 +120,9 @@ void ReadData::command(int narg, char **arg)
|
|||
{
|
||||
if (narg < 1) error->all(FLERR,"Illegal read_data command");
|
||||
|
||||
MPI_Barrier(world);
|
||||
double time1 = MPI_Wtime;
|
||||
|
||||
// optional args
|
||||
|
||||
addflag = NONE;
|
||||
|
@ -906,6 +909,18 @@ void ReadData::command(int narg, char **arg)
|
|||
|
||||
force->kspace = saved_kspace;
|
||||
}
|
||||
|
||||
// total time
|
||||
|
||||
MPI_Barrier(world);
|
||||
double time2 = MPI_Wtime;
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen," read_atoms CPU = %g secs\n",time2-time1);
|
||||
if (logfile)
|
||||
fprintf(logfile," read_atoms CPU = %g secs\n",time2-time1);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
|
|
@ -81,6 +81,9 @@ void ReadRestart::command(int narg, char **arg)
|
|||
if (domain->box_exist)
|
||||
error->all(FLERR,"Cannot read_restart after simulation box is defined");
|
||||
|
||||
MPI_Barrier(world);
|
||||
double time1 = MPI_Wtime;
|
||||
|
||||
MPI_Comm_rank(world,&me);
|
||||
MPI_Comm_size(world,&nprocs);
|
||||
|
||||
|
@ -562,6 +565,18 @@ void ReadRestart::command(int narg, char **arg)
|
|||
Special special(lmp);
|
||||
special.build();
|
||||
}
|
||||
|
||||
// total time
|
||||
|
||||
MPI_Barrier(world);
|
||||
double time2 = MPI_Wtime;
|
||||
|
||||
if (comm->me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen," read_restart CPU = %g secs\n",time2-time1);
|
||||
if (logfile)
|
||||
fprintf(logfile," read_restart CPU = %g secs\n",time2-time1);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
|
|
@ -76,7 +76,7 @@ void Replicate::command(int narg, char **arg)
|
|||
if (atom->nextra_grow || atom->nextra_restart || atom->nextra_store)
|
||||
error->all(FLERR,"Cannot replicate with fixes that store atom quantities");
|
||||
|
||||
// Record wall time for atom replication
|
||||
// record wall time for atom replication
|
||||
|
||||
MPI_Barrier(world);
|
||||
double time1 = MPI_Wtime();
|
||||
|
@ -762,15 +762,15 @@ void Replicate::command(int narg, char **arg)
|
|||
special.build();
|
||||
}
|
||||
|
||||
// Wall time
|
||||
// total time
|
||||
|
||||
MPI_Barrier(world);
|
||||
double time2 = MPI_Wtime();
|
||||
|
||||
if (me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen," Time spent = %g secs\n",time2-time1);
|
||||
fprintf(screen," replicate CPU = %g secs\n",time2-time1);
|
||||
if (logfile)
|
||||
fprintf(logfile," Time spent = %g secs\n",time2-time1);
|
||||
fprintf(logfile," replicate CPU = %g secs\n",time2-time1);
|
||||
}
|
||||
}
|
||||
|
|
1095
src/special.cpp
1095
src/special.cpp
File diff suppressed because it is too large
Load Diff
|
@ -28,27 +28,30 @@ class Special : protected Pointers {
|
|||
int me,nprocs;
|
||||
tagint **onetwo,**onethree,**onefour;
|
||||
|
||||
// data used by ring callback methods
|
||||
// data used by rendezvous callback methods
|
||||
|
||||
int *count;
|
||||
int **dflag;
|
||||
int max_rvous;
|
||||
|
||||
struct InRvous {
|
||||
int me;
|
||||
tagint atomID,partnerID;
|
||||
};
|
||||
|
||||
struct OutRvous {
|
||||
tagint atomID,partnerID;
|
||||
};
|
||||
|
||||
void dedup();
|
||||
void angle_trim();
|
||||
void dihedral_trim();
|
||||
void combine();
|
||||
void fix_alteration();
|
||||
void timer_output(double);
|
||||
|
||||
// callback functions for ring communication
|
||||
// callback function for rendezvous communication
|
||||
|
||||
static void ring_one(int, char *, void *);
|
||||
static void ring_two(int, char *, void *);
|
||||
static void ring_three(int, char *, void *);
|
||||
static void ring_four(int, char *, void *);
|
||||
static void ring_five(int, char *, void *);
|
||||
static void ring_six(int, char *, void *);
|
||||
static void ring_seven(int, char *, void *);
|
||||
static void ring_eight(int, char *, void *);
|
||||
static int rendezvous_1234(int, char *, int *&, char *&, void *);
|
||||
static int rendezvous_trim(int, char *, int *&, char *&, void *);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue