added rendezvous via all2all

This commit is contained in:
Steve Plimpton 2019-01-23 14:49:52 -07:00 committed by Axel Kohlmeyer
parent 981f12ebeb
commit fc002e30d3
No known key found for this signature in database
GPG Key ID: D9B44E93BF0C375A
7 changed files with 566 additions and 105 deletions

View File

@ -1576,8 +1576,9 @@ void FixRigidSmall::create_bodies(tagint *bodyID)
// func = compute bbox of each body, find atom closest to geometric center
char *buf;
int nreturn = comm->rendezvous(ncount,proclist,(char *) inbuf,sizeof(InRvous),
rendezvous_body,buf,sizeof(OutRvous),
int nreturn = comm->rendezvous(1,ncount,(char *) inbuf,sizeof(InRvous),
0,proclist,
rendezvous_body,0,buf,sizeof(OutRvous),
(void *) this);
OutRvous *outbuf = (OutRvous *) buf;

View File

@ -1068,9 +1068,9 @@ void FixShake::atom_owners()
// each proc assigned every 1/Pth atom
char *buf;
comm->rendezvous(nlocal,proclist,
(char *) idbuf,sizeof(IDRvous),
rendezvous_ids,buf,0,(void *) this);
comm->rendezvous(1,nlocal,(char *) idbuf,sizeof(IDRvous),
0,proclist,
rendezvous_ids,0,buf,0,(void *) this);
memory->destroy(proclist);
memory->sfree(idbuf);
@ -1174,9 +1174,10 @@ void FixShake::partner_info(int *npartner, tagint **partner_tag,
// receives all data needed to populate un-owned partner 4 values
char *buf;
int nreturn = comm->rendezvous(nsend,proclist,
(char *) inbuf,sizeof(PartnerInfo),
rendezvous_partners_info,buf,sizeof(PartnerInfo),
int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PartnerInfo),
0,proclist,
rendezvous_partners_info,
0,buf,sizeof(PartnerInfo),
(void *) this);
PartnerInfo *outbuf = (PartnerInfo *) buf;
@ -1194,7 +1195,8 @@ void FixShake::partner_info(int *npartner, tagint **partner_tag,
partner_type[i][j] = outbuf[m].type;
partner_massflag[i][j] = outbuf[m].massflag;
// only set partner_bondtype if my atom did not set it when setting up rendezvous
// only set partner_bondtype if my atom did not set it
// when setting up rendezvous
// if this proc set it, then sender of this datum set outbuf.bondtype = 0
if (partner_bondtype[i][j] == 0)
@ -1261,9 +1263,9 @@ void FixShake::nshake_info(int *npartner, tagint **partner_tag,
// receives all data needed to populate un-owned partner nshake
char *buf;
int nreturn = comm->rendezvous(nsend,proclist,
(char *) inbuf,sizeof(NShakeInfo),
rendezvous_nshake,buf,sizeof(NShakeInfo),
int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(NShakeInfo),
0,proclist,
rendezvous_nshake,0,buf,sizeof(NShakeInfo),
(void *) this);
NShakeInfo *outbuf = (NShakeInfo *) buf;
@ -1354,9 +1356,9 @@ void FixShake::shake_info(int *npartner, tagint **partner_tag,
// receives all data needed to populate un-owned shake info
char *buf;
int nreturn = comm->rendezvous(nsend,proclist,
(char *) inbuf,sizeof(ShakeInfo),
rendezvous_shake,buf,sizeof(ShakeInfo),
int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(ShakeInfo),
0,proclist,
rendezvous_shake,0,buf,sizeof(ShakeInfo),
(void *) this);
ShakeInfo *outbuf = (ShakeInfo *) buf;
@ -1412,7 +1414,7 @@ int FixShake::rendezvous_ids(int n, char *inbuf,
fsptr->atomIDs = atomIDs;
fsptr->procowner = procowner;
// flag = 0: no 2nd irregular comm needed in comm->rendezvous
// flag = 0: no second comm needed in rendezvous
flag = 0;
return 0;

View File

@ -729,34 +729,78 @@ void Comm::ring(int n, int nper, void *inbuf, int messtag,
/* ----------------------------------------------------------------------
rendezvous communication operation
three stages:
first Irregular converts inbuf from caller decomp to rvous decomp
first comm sends inbuf from caller decomp to rvous decomp
callback operates on data in rendevous decomp
last Irregular converts outbuf from rvous decomp back to caller decomp
second comm sends outbuf from rvous decomp back to caller decomp
inputs:
n = # of input datums
proclist = proc that owns each input datum in rendezvous decomposition
inbuf = list of input datums
insize = size in bytes of each input datum
which = perform (0) irregular or (1) MPI_All2allv communication
n = # of datums in inbuf
inbuf = vector of input datums
insize = byte size of each input datum
inorder = 0 for inbuf in random proc order, 1 for datums ordered by proc
procs: inorder 0 = proc to send each datum to, 1 = # of datums/proc,
callback = caller function to invoke in rendezvous decomposition
takes input datums, returns output datums
outorder = same as inorder, but for datums returned by callback()
ptr = pointer to caller class, passed to callback()
outputs:
nout = # of output datums (function return)
outbuf = list of output datums
outsize = size in bytes of each output datum
outbuf = vector of output datums
outsize = byte size of each output datum
callback inputs:
nrvous = # of rvous decomp datums in inbuf_rvous
inbuf_rvous = vector of rvous decomp input datums
ptr = pointer to caller class
callback outputs:
nrvous_out = # of rvous decomp output datums (function return)
flag = 0 for no second comm, 1 for outbuf_rvous = inbuf_rvous,
2 for second comm with new outbuf_rvous
procs_rvous = outorder 0 = proc to send each datum to, 1 = # of datums/proc
allocated
outbuf_rvous = vector of rvous decomp output datums
NOTE: could use MPI_INT or MPI_DOUBLE insead of MPI_CHAR
to avoid checked-for overflow in MPI_Alltoallv?
------------------------------------------------------------------------- */
int Comm::rendezvous(int n, int *proclist, char *inbuf, int insize,
int (*callback)(int, char *, int &, int *&, char *&, void *),
char *&outbuf, int outsize, void *ptr)
int Comm::
rendezvous(int which, int n, char *inbuf, int insize,
int inorder, int *procs,
int (*callback)(int, char *, int &, int *&, char *&, void *),
int outorder, char *&outbuf, int outsize, void *ptr)
{
// comm inbuf from caller decomposition to rendezvous decomposition
int nout;
if (which == 0)
nout = rendezvous_irregular(n,inbuf,insize,inorder,procs,callback,
outorder,outbuf,outsize,ptr);
else
nout = rendezvous_all2all(n,inbuf,insize,inorder,procs,callback,
outorder,outbuf,outsize,ptr);
return nout;
}
/* ---------------------------------------------------------------------- */
int Comm::
rendezvous_irregular(int n, char *inbuf, int insize, int inorder, int *procs,
int (*callback)(int, char *, int &, int *&, char *&, void *),
int outorder, char *&outbuf,
int outsize, void *ptr)
{
// irregular comm of inbuf from caller decomp to rendezvous decomp
Irregular *irregular = new Irregular(lmp);
int n_rvous = irregular->create_data(n,proclist); // add sort
char *inbuf_rvous = (char *) memory->smalloc((bigint) n_rvous*insize,
"rendezvous:inbuf_rvous");
int nrvous;
if (inorder) nrvous = irregular->create_data_grouped(n,procs);
else nrvous = irregular->create_data(n,procs);
char *inbuf_rvous = (char *) memory->smalloc((bigint) nrvous*insize,
"rendezvous:inbuf");
irregular->exchange_data(inbuf,insize,inbuf_rvous);
bigint irregular1_bytes = 0; //irregular->irregular_bytes;
irregular->destroy_data();
delete irregular;
@ -764,29 +808,253 @@ int Comm::rendezvous(int n, int *proclist, char *inbuf, int insize,
// callback() allocates/populates proclist_rvous and outbuf_rvous
int flag;
int *proclist_rvous;
int *procs_rvous;
char *outbuf_rvous;
int nout_rvous =
callback(n_rvous,inbuf_rvous,flag,proclist_rvous,outbuf_rvous,ptr);
int nrvous_out = callback(nrvous,inbuf_rvous,flag,
procs_rvous,outbuf_rvous,ptr);
if (flag != 1) memory->sfree(inbuf_rvous); // outbuf_rvous = inbuf_vous
if (flag == 0) return 0; // all nout_rvous are 0, no 2nd irregular
if (flag == 0) return 0; // all nout_rvous are 0, no 2nd comm stage
// comm outbuf from rendezvous decomposition back to caller
// irregular comm of outbuf from rendezvous decomp back to caller decomp
// caller will free outbuf
irregular = new Irregular(lmp);
int nout = irregular->create_data(nout_rvous,proclist_rvous);
outbuf = (char *) memory->smalloc((bigint) nout*outsize,"rendezvous:outbuf");
int nout;
if (outorder)
nout = irregular->create_data_grouped(nrvous_out,procs_rvous);
else nout = irregular->create_data(nrvous_out,procs_rvous);
outbuf = (char *) memory->smalloc((bigint) nout*outsize,
"rendezvous:outbuf");
irregular->exchange_data(outbuf_rvous,outsize,outbuf);
bigint irregular2_bytes = 0; //irregular->irregular_bytes;
irregular->destroy_data();
delete irregular;
memory->destroy(proclist_rvous);
memory->destroy(procs_rvous);
memory->sfree(outbuf_rvous);
// approximate memory tally
bigint rvous_bytes = 0;
rvous_bytes += n*insize; // inbuf
rvous_bytes += nout*outsize; // outbuf
rvous_bytes += nrvous*insize; // inbuf_rvous
rvous_bytes += nrvous_out*outsize; // outbuf_rvous
rvous_bytes += nrvous_out*sizeof(int); // procs_rvous
rvous_bytes += MAX(irregular1_bytes,irregular2_bytes); // max of 2 comms
// return number of output datums
return nout;
}
/* ---------------------------------------------------------------------- */
int Comm::
rendezvous_all2all(int n, char *inbuf, int insize, int inorder, int *procs,
int (*callback)(int, char *, int &, int *&, char *&, void *),
int outorder, char *&outbuf, int outsize, void *ptr)
{
int iproc;
bigint all2all1_bytes,all2all2_bytes;
int *sendcount,*sdispls,*recvcount,*rdispls;
int *procs_a2a;
bigint *offsets;
char *inbuf_a2a,*outbuf_a2a;
// create procs and inbuf for All2all if necesary
if (!inorder) {
memory->create(procs_a2a,nprocs,"rendezvous:procs");
inbuf_a2a = (char *) memory->smalloc((bigint) n*insize,
"rendezvous:inbuf");
memory->create(offsets,nprocs,"rendezvous:offsets");
for (int i = 0; i < nprocs; i++) procs_a2a[i] = 0;
for (int i = 0; i < n; i++) procs_a2a[procs[i]]++;
offsets[0] = 0;
for (int i = 1; i < nprocs; i++)
offsets[i] = offsets[i-1] + insize*procs_a2a[i-1];
bigint offset = 0;
for (int i = 0; i < n; i++) {
iproc = procs[i];
memcpy(&inbuf_a2a[offsets[iproc]],&inbuf[offset],insize);
offsets[iproc] += insize;
offset += insize;
}
all2all1_bytes = nprocs*sizeof(int) + nprocs*sizeof(bigint) + n*insize;
} else {
procs_a2a = procs;
inbuf_a2a = inbuf;
all2all1_bytes = 0;
}
// create args for MPI_Alltoallv() on input data
memory->create(sendcount,nprocs,"rendezvous:sendcount");
memcpy(sendcount,procs_a2a,nprocs*sizeof(int));
memory->create(recvcount,nprocs,"rendezvous:recvcount");
MPI_Alltoall(sendcount,1,MPI_INT,recvcount,1,MPI_INT,world);
memory->create(sdispls,nprocs,"rendezvous:sdispls");
memory->create(rdispls,nprocs,"rendezvous:rdispls");
sdispls[0] = rdispls[0] = 0;
for (int i = 1; i < nprocs; i++) {
sdispls[i] = sdispls[i-1] + sendcount[i-1];
rdispls[i] = rdispls[i-1] + recvcount[i-1];
}
int nrvous = rdispls[nprocs-1] + recvcount[nprocs-1];
// test for overflow of input data due to imbalance or insize
// means that individual sdispls or rdispls values overflow
int overflow = 0;
if ((bigint) n*insize > MAXSMALLINT) overflow = 1;
if ((bigint) nrvous*insize > MAXSMALLINT) overflow = 1;
int overflowall;
MPI_Allreduce(&overflow,&overflowall,1,MPI_INT,MPI_MAX,world);
if (overflowall) error->all(FLERR,"Overflow input size in rendezvous_a2a");
for (int i = 0; i < nprocs; i++) {
sendcount[i] *= insize;
sdispls[i] *= insize;
recvcount[i] *= insize;
rdispls[i] *= insize;
}
// all2all comm of inbuf from caller decomp to rendezvous decomp
char *inbuf_rvous = (char *) memory->smalloc((bigint) nrvous*insize,
"rendezvous:inbuf");
MPI_Alltoallv(inbuf_a2a,sendcount,sdispls,MPI_CHAR,
inbuf_rvous,recvcount,rdispls,MPI_CHAR,world);
if (!inorder) {
memory->destroy(procs_a2a);
memory->sfree(inbuf_a2a);
memory->destroy(offsets);
}
// peform rendezvous computation via callback()
// callback() allocates/populates proclist_rvous and outbuf_rvous
int flag;
int *procs_rvous;
char *outbuf_rvous;
int nrvous_out = callback(nrvous,inbuf_rvous,flag,
procs_rvous,outbuf_rvous,ptr);
if (flag != 1) memory->sfree(inbuf_rvous); // outbuf_rvous = inbuf_vous
if (flag == 0) return 0; // all nout_rvous are 0, no 2nd irregular
// create procs and outbuf for All2all if necesary
if (!outorder) {
memory->create(procs_a2a,nprocs,"rendezvous_a2a:procs");
outbuf_a2a = (char *) memory->smalloc((bigint) nrvous_out*outsize,
"rendezvous:outbuf");
memory->create(offsets,nprocs,"rendezvous:offsets");
for (int i = 0; i < nprocs; i++) procs_a2a[i] = 0;
for (int i = 0; i < nrvous_out; i++) procs_a2a[procs_rvous[i]]++;
offsets[0] = 0;
for (int i = 1; i < nprocs; i++)
offsets[i] = offsets[i-1] + outsize*procs_a2a[i-1];
bigint offset = 0;
for (int i = 0; i < nrvous_out; i++) {
iproc = procs_rvous[i];
memcpy(&outbuf_a2a[offsets[iproc]],&outbuf_rvous[offset],outsize);
offsets[iproc] += outsize;
offset += outsize;
}
all2all2_bytes = nprocs*sizeof(int) + nprocs*sizeof(bigint) +
nrvous_out*outsize;
} else {
procs_a2a = procs_rvous;
outbuf_a2a = outbuf_rvous;
all2all2_bytes = 0;
}
// comm outbuf from rendezvous decomposition back to caller
memcpy(sendcount,procs_a2a,nprocs*sizeof(int));
MPI_Alltoall(sendcount,1,MPI_INT,recvcount,1,MPI_INT,world);
sdispls[0] = rdispls[0] = 0;
for (int i = 1; i < nprocs; i++) {
sdispls[i] = sdispls[i-1] + sendcount[i-1];
rdispls[i] = rdispls[i-1] + recvcount[i-1];
}
int nout = rdispls[nprocs-1] + recvcount[nprocs-1];
// test for overflow of outbuf due to imbalance or outsize
// means that individual sdispls or rdispls values overflow
overflow = 0;
if ((bigint) nrvous*outsize > MAXSMALLINT) overflow = 1;
if ((bigint) nout*outsize > MAXSMALLINT) overflow = 1;
MPI_Allreduce(&overflow,&overflowall,1,MPI_INT,MPI_MAX,world);
if (overflowall) error->all(FLERR,"Overflow output in rendezvous_a2a");
for (int i = 0; i < nprocs; i++) {
sendcount[i] *= outsize;
sdispls[i] *= outsize;
recvcount[i] *= outsize;
rdispls[i] *= outsize;
}
// all2all comm of outbuf from rendezvous decomp back to caller decomp
// caller will free outbuf
outbuf = (char *) memory->smalloc((bigint) nout*outsize,"rendezvous:outbuf");
MPI_Alltoallv(outbuf_a2a,sendcount,sdispls,MPI_CHAR,
outbuf,recvcount,rdispls,MPI_CHAR,world);
memory->destroy(procs_rvous);
memory->sfree(outbuf_rvous);
if (!outorder) {
memory->destroy(procs_a2a);
memory->sfree(outbuf_a2a);
memory->destroy(offsets);
}
// clean up
memory->destroy(sendcount);
memory->destroy(recvcount);
memory->destroy(sdispls);
memory->destroy(rdispls);
// approximate memory tally
bigint rvous_bytes = 0;
rvous_bytes += n*insize; // inbuf
rvous_bytes += nout*outsize; // outbuf
rvous_bytes += nrvous*insize; // inbuf_rvous
rvous_bytes += nrvous_out*outsize; // outbuf_rvous
rvous_bytes += nrvous_out*sizeof(int); // procs_rvous
rvous_bytes += 4*nprocs*sizeof(int); // all2all vectors
rvous_bytes += MAX(all2all1_bytes,all2all2_bytes); // reorder ops
// return number of datums
return nout;

View File

@ -109,9 +109,9 @@ class Comm : protected Pointers {
void ring(int, int, void *, int, void (*)(int, char *, void *),
void *, void *, int self = 1);
int rendezvous(int, int *, char *, int,
int rendezvous(int, int, char *, int, int, int *,
int (*)(int, char *, int &, int *&, char *&, void *),
char *&, int, void *);
int, char *&, int, void *);
int read_lines_from_file(FILE *, int, int, char *);
int read_lines_from_file_universe(FILE *, int, int, char *);
@ -146,6 +146,14 @@ class Comm : protected Pointers {
int ncores; // # of cores per node
int coregrid[3]; // 3d grid of cores within a node
int user_coregrid[3]; // user request for cores in each dim
int rendezvous_irregular(int, char *, int, int, int *,
int (*)(int, char *, int &, int *&, char *&, void *),
int, char *&, int, void *);
int rendezvous_all2all(int, char *, int, int, int *,
int (*)(int, char *, int &, int *&, char *&, void *),
int, char *&, int, void *);
public:
enum{MULTIPLE};
};

View File

@ -622,6 +622,7 @@ int Irregular::create_data(int n, int *proclist, int sortflag)
num_send = new int[nsend_proc];
index_send = new int[n-work1[me]];
index_self = new int[work1[me]];
maxindex = n;
// proc_send = procs I send to
// num_send = # of datums I send to each proc
@ -679,8 +680,182 @@ int Irregular::create_data(int n, int *proclist, int sortflag)
// receive incoming messages
// proc_recv = procs I recv from
// num_recv = total size of message each proc sends me
// nrecvdatum = total size of data I recv
// num_recv = # of datums each proc sends me
// nrecvdatum = total # of datums I recv
int nrecvdatum = 0;
for (i = 0; i < nrecv_proc; i++) {
MPI_Recv(&num_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status);
proc_recv[i] = status->MPI_SOURCE;
nrecvdatum += num_recv[i];
}
nrecvdatum += num_self;
// sort proc_recv and num_recv by proc ID if requested
// useful for debugging to insure reproducible ordering of received datums
if (sortflag) {
int *order = new int[nrecv_proc];
int *proc_recv_ordered = new int[nrecv_proc];
int *num_recv_ordered = new int[nrecv_proc];
for (i = 0; i < nrecv_proc; i++) order[i] = i;
#if defined(LMP_QSORT)
proc_recv_copy = proc_recv;
qsort(order,nrecv_proc,sizeof(int),compare_standalone);
#else
merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
#endif
int j;
for (i = 0; i < nrecv_proc; i++) {
j = order[i];
proc_recv_ordered[i] = proc_recv[j];
num_recv_ordered[i] = num_recv[j];
}
memcpy(proc_recv,proc_recv_ordered,nrecv_proc*sizeof(int));
memcpy(num_recv,num_recv_ordered,nrecv_proc*sizeof(int));
delete [] order;
delete [] proc_recv_ordered;
delete [] num_recv_ordered;
}
// barrier to insure all MPI_ANY_SOURCE messages are received
// else another proc could proceed to exchange_data() and send to me
MPI_Barrier(world);
// return # of datums I will receive
return nrecvdatum;
}
/* ----------------------------------------------------------------------
create communication plan based on list of datums of uniform size
n = # of datums to send
procs = how many datums to send to each proc, must include self
sort = flag for sorting order of received messages by proc ID
return total # of datums I will recv, including any to self
------------------------------------------------------------------------- */
int Irregular::create_data_grouped(int n, int *procs, int sortflag)
{
int i,j,k,m;
// setup for collective comm
// work1 = # of datums I send to each proc, set self to 0
// work2 = 1 for all procs, used for ReduceScatter
for (i = 0; i < nprocs; i++) {
work1[i] = procs[i];
work2[i] = 1;
}
work1[me] = 0;
// nrecv_proc = # of procs I receive messages from, not including self
// options for performing ReduceScatter operation
// some are more efficient on some machines at big sizes
#ifdef LAMMPS_RS_ALLREDUCE_INPLACE
MPI_Allreduce(MPI_IN_PLACE,work1,nprocs,MPI_INT,MPI_SUM,world);
nrecv_proc = work1[me];
#else
#ifdef LAMMPS_RS_ALLREDUCE
MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world);
nrecv_proc = work2[me];
#else
MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world);
#endif
#endif
// allocate receive arrays
proc_recv = new int[nrecv_proc];
num_recv = new int[nrecv_proc];
request = new MPI_Request[nrecv_proc];
status = new MPI_Status[nrecv_proc];
// work1 = # of datums I send to each proc, including self
// nsend_proc = # of procs I send messages to, not including self
for (i = 0; i < nprocs; i++) work1[i] = procs[i];
nsend_proc = 0;
for (i = 0; i < nprocs; i++)
if (work1[i]) nsend_proc++;
if (work1[me]) nsend_proc--;
// allocate send and self arrays
proc_send = new int[nsend_proc];
num_send = new int[nsend_proc];
index_send = new int[n-work1[me]];
index_self = new int[work1[me]];
maxindex = n;
// proc_send = procs I send to
// num_send = # of datums I send to each proc
// num_self = # of datums I copy to self
// to balance pattern of send messages:
// each proc begins with iproc > me, continues until iproc = me
// reset work1 to store which send message each proc corresponds to
int iproc = me;
int isend = 0;
for (i = 0; i < nprocs; i++) {
iproc++;
if (iproc == nprocs) iproc = 0;
if (iproc == me) {
num_self = work1[iproc];
work1[iproc] = 0;
} else if (work1[iproc] > 0) {
proc_send[isend] = iproc;
num_send[isend] = work1[iproc];
work1[iproc] = isend;
isend++;
}
}
// work2 = offsets into index_send for each proc I send to
// m = ptr into index_self
// index_send = list of which datums to send to each proc
// 1st N1 values are datum indices for 1st proc,
// next N2 values are datum indices for 2nd proc, etc
// index_self = list of which datums to copy to self
work2[0] = 0;
for (i = 1; i < nsend_proc; i++) work2[i] = work2[i-1] + num_send[i-1];
m = 0;
i = 0;
for (iproc = 0; iproc < nprocs; iproc++) {
k = procs[iproc];
for (j = 0; j < k; j++) {
if (iproc == me) index_self[m++] = i++;
else {
isend = work1[iproc];
index_send[work2[isend]++] = i++;
}
}
}
// tell receivers how much data I send
// sendmax_proc = largest # of datums I send in a single message
sendmax_proc = 0;
for (i = 0; i < nsend_proc; i++) {
MPI_Request tmpReq; // Use non-blocking send to avoid possible deadlock
MPI_Isend(&num_send[i],1,MPI_INT,proc_send[i],0,world,&tmpReq);
MPI_Request_free(&tmpReq); // the MPI_Barrier below marks completion
sendmax_proc = MAX(sendmax_proc,num_send[i]);
}
// receive incoming messages
// proc_recv = procs I recv from
// num_recv = # of datums each proc sends me
// nrecvdatum = total # of datums I recv
int nrecvdatum = 0;
for (i = 0; i < nrecv_proc; i++) {
@ -789,6 +964,12 @@ void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf)
// wait on all incoming messages
if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status);
// approximate memory tally
bigint irregular_bytes = 2*nprocs*sizeof(int);
irregular_bytes += maxindex*sizeof(int);
irregular_bytes += maxbuf;
}
/* ----------------------------------------------------------------------

View File

@ -33,6 +33,7 @@ class Irregular : protected Pointers {
int *procassign = NULL);
int migrate_check();
int create_data(int, int *, int sortflag = 0);
int create_data_grouped(int, int *, int sortflag = 0);
void exchange_data(char *, int, char *);
void destroy_data();
bigint memory_usage();
@ -48,6 +49,7 @@ class Irregular : protected Pointers {
double *dbuf; // double buf for largest single atom send
int maxbuf; // size of char buf in bytes
char *buf; // char buf for largest single data send
int maxindex; // combined size of index_send + index_self
int *mproclist,*msizes; // persistent vectors in migrate_atoms
int maxlocal; // allocated size of mproclist and msizes

View File

@ -172,10 +172,9 @@ void Special::atom_owners()
IDRvous *idbuf = (IDRvous *)
memory->smalloc((bigint) nlocal*sizeof(IDRvous),"special:idbuf");
// setup input buf to rendezvous comm
// input datums = pairs of bonded atoms
// owning proc for each datum = random hash of atomID
// setup input buf for rendezvous comm
// one datum for each owned atom: datum = owning proc, atomID
// each proc assigned every 1/Pth atom
for (int i = 0; i < nlocal; i++) {
proclist[i] = tag[i] % nprocs;
@ -184,19 +183,18 @@ void Special::atom_owners()
}
// perform rendezvous operation
// each proc assigned every 1/Pth atom
char *buf;
comm->rendezvous(nlocal,proclist,
(char *) idbuf,sizeof(IDRvous),
rendezvous_ids,buf,0,(void *) this);
comm->rendezvous(1,nlocal,(char *) idbuf,sizeof(IDRvous),0,proclist,
rendezvous_ids,0,buf,0,(void *) this);
memory->destroy(proclist);
memory->sfree(idbuf);
}
/* ----------------------------------------------------------------------
onetwo build
onetwo build when newton_bond flag on
uses rendezvous comm
------------------------------------------------------------------------- */
void Special::onetwo_build_newton()
@ -225,9 +223,8 @@ void Special::onetwo_build_newton()
memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf");
// setup input buf to rendezvous comm
// input datums = pairs of bonded atoms
// owning proc for each datum = atomID % nprocs
// one datum for each bond partner: bond partner ID, atomID
// one datum for each unowned bond partner: bond partner ID, atomID
// owning proc for each datum = bond partner ID % nprocs
nsend = 0;
for (i = 0; i < nlocal; i++) {
@ -242,19 +239,19 @@ void Special::onetwo_build_newton()
}
// perform rendezvous operation
// each proc owns random subset of atoms
char *buf;
int nreturn = comm->rendezvous(nsend,proclist,
(char *) inbuf,sizeof(PairRvous),
rendezvous_pairs,buf,sizeof(PairRvous),
int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous),
0,proclist,
rendezvous_pairs,0,buf,sizeof(PairRvous),
(void *) this);
PairRvous *outbuf = (PairRvous *) buf;
memory->destroy(proclist);
memory->sfree(inbuf);
// set nspecial[0] and onetwo for all owned atoms based on output info
// set nspecial[0] and onetwo for all owned atoms
// based on owned info plus rendezvous output info
// output datums = pairs of atoms that are 1-2 neighbors
for (i = 0; i < nlocal; i++) {
@ -327,8 +324,8 @@ void Special::onetwo_build_newton_off()
}
/* ----------------------------------------------------------------------
onetwo build with newton_bond flag off
no need for rendezvous comm
onethree build
uses rendezvous comm
------------------------------------------------------------------------- */
void Special::onethree_build()
@ -355,10 +352,10 @@ void Special::onethree_build()
memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf");
// setup input buf to rendezvous comm
// input datums = all pairs of onetwo atoms (they are 1-3 neighbors)
// owning proc for each datum = random hash of atomID
// one datum for each owned atom: datum = owning proc, atomID
// one datum for each onetwo pair: datum = atomID1, atomID2
// datums = pairs of onetwo partners where either is unknown
// these pairs are onethree neighbors
// datum = onetwo ID, onetwo ID
// owning proc for each datum = onetwo ID % nprocs
nsend = 0;
for (i = 0; i < nlocal; i++) {
@ -377,20 +374,19 @@ void Special::onethree_build()
}
// perform rendezvous operation
// each proc owns random subset of atoms
// receives all info to form and return their onethree lists
char *buf;
int nreturn = comm->rendezvous(nsend,proclist,
(char *) inbuf,sizeof(PairRvous),
rendezvous_pairs,buf,sizeof(PairRvous),
int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous),
0,proclist,
rendezvous_pairs,0,buf,sizeof(PairRvous),
(void *) this);
PairRvous *outbuf = (PairRvous *) buf;
memory->destroy(proclist);
memory->sfree(inbuf);
// set nspecial[1] and onethree for all owned atoms based on output info
// set nspecial[1] and onethree for all owned atoms
// based on owned info plus rendezvous output info
// output datums = pairs of atoms that are 1-3 neighbors
for (i = 0; i < nlocal; i++) {
@ -434,7 +430,8 @@ void Special::onethree_build()
}
/* ----------------------------------------------------------------------
remove duplicates within each of onetwo, onethree, onefour individually
onefour build
uses rendezvous comm
------------------------------------------------------------------------- */
void Special::onefour_build()
@ -446,7 +443,6 @@ void Special::onefour_build()
int nlocal = atom->nlocal;
// nsend = # of my datums to send
// include nlocal datums with owner of each atom
int nsend = 0;
for (i = 0; i < nlocal; i++) {
@ -462,10 +458,10 @@ void Special::onefour_build()
memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf");
// setup input buf to rendezvous comm
// input datums = all pairs of onethree and onetwo atoms (they're 1-4 neighbors)
// owning proc for each datum = random hash of atomID
// one datum for each owned atom: datum = owning proc, atomID
// one datum for each onethree/onetwo pair: datum = atomID1, atomID2
// datums = pairs of onethree and onetwo partners where onethree is unknown
// these pairs are onefour neighbors
// datum = onetwo ID, onetwo ID
// owning proc for each datum = onethree ID % nprocs
nsend = 0;
for (i = 0; i < nlocal; i++) {
@ -483,20 +479,19 @@ void Special::onefour_build()
}
// perform rendezvous operation
// each proc owns random subset of atoms
// receives all info to form and return their onefour lists
char *buf;
int nreturn = comm->rendezvous(nsend,proclist,
(char *) inbuf,sizeof(PairRvous),
rendezvous_pairs,buf,sizeof(PairRvous),
int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous),
0,proclist,
rendezvous_pairs,0,buf,sizeof(PairRvous),
(void *) this);
PairRvous *outbuf = (PairRvous *) buf;
memory->destroy(proclist);
memory->sfree(inbuf);
// set nspecial[2] and onefour for all owned atoms based on output info
// set nspecial[2] and onefour for all owned atoms
// based on owned info plus rendezvous output info
// output datums = pairs of atoms that are 1-4 neighbors
for (i = 0; i < nlocal; i++) {
@ -780,6 +775,7 @@ void Special::combine()
trim list of 1-3 neighbors by checking defined angles
delete a 1-3 neigh if they are not end atoms of a defined angle
and if they are not 1,3 or 2,4 atoms of a defined dihedral
uses rendezvous comm
------------------------------------------------------------------------- */
void Special::angle_trim()
@ -849,6 +845,10 @@ void Special::angle_trim()
memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf");
// setup input buf to rendezvous comm
// datums = pairs of onetwo partners where either is unknown
// these pairs are onethree neighbors
// datum = onetwo ID, onetwo ID
// owning proc for each datum = onetwo ID % nprocs
nsend = 0;
for (i = 0; i < nlocal; i++) {
@ -902,14 +902,11 @@ void Special::angle_trim()
}
// perform rendezvous operation
// each proc owns random subset of atoms
// func = compute bbox of each body, flag atom closest to geometric center
// when done: each atom has atom ID of owning atom of its body
char *buf;
int nreturn = comm->rendezvous(nsend,proclist,
(char *) inbuf,sizeof(PairRvous),
rendezvous_pairs,buf,sizeof(PairRvous),
int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous),
0,proclist,
rendezvous_pairs,0,buf,sizeof(PairRvous),
(void *) this);
PairRvous *outbuf = (PairRvous *) buf;
@ -931,6 +928,8 @@ void Special::angle_trim()
flag[i][j] = 0;
// reset nspecial[1] and onethree for all owned atoms based on output info
// based on owned info plus rendezvous output info
// output datums = pairs of atoms that are 1-3 neighbors
for (i = 0; i < nlocal; i++) {
for (j = 0; j < num_angle[i]; j++) {
@ -1036,8 +1035,9 @@ void Special::angle_trim()
}
/* ----------------------------------------------------------------------
trim list of 1-4 neighbors by checking defined dihedrals
trim list of 1-4 neighbors by checking all defined dihedrals
delete a 1-4 neigh if they are not end atoms of a defined dihedral
uses rendezvous comm
------------------------------------------------------------------------- */
void Special::dihedral_trim()
@ -1068,12 +1068,11 @@ void Special::dihedral_trim()
" %g = # of 1-4 neighbors before dihedral trim\n",allcount);
}
// if dihedrals are defined, rendezvous dihedral 1-4 pairs
// if dihedrals are defined, rendezvous the dihedral 1-4 pairs
if (num_dihedral && atom->ndihedrals) {
// nsend = # of my datums to send
// latter is only for dihedrals where I own atom2 (newton bond off)
int nsend = 0;
for (i = 0; i < nlocal; i++) {
@ -1092,6 +1091,11 @@ void Special::dihedral_trim()
memory->smalloc((bigint) nsend*sizeof(PairRvous),"special:inbuf");
// setup input buf to rendezvous comm
// datums = pairs of onefour atom IDs in a dihedral defined for my atoms
// only dihedrals where I own atom2 (in case newton_bond off)
// datum = atom1 ID and atom4 ID
// send the datum twice, to owner of atom1 ID and atom4 ID
// owning procs for each datum = atom1 or atom4 ID % nprocs
nsend = 0;
for (i = 0; i < nlocal; i++) {
@ -1117,21 +1121,18 @@ void Special::dihedral_trim()
}
// perform rendezvous operation
// each proc owns random subset of atoms
// func = compute bbox of each body, flag atom closest to geometric center
// when done: each atom has atom ID of owning atom of its body
char *buf;
int nreturn = comm->rendezvous(nsend,proclist,
(char *) inbuf,sizeof(PairRvous),
rendezvous_pairs,buf,sizeof(PairRvous),
int nreturn = comm->rendezvous(1,nsend,(char *) inbuf,sizeof(PairRvous),
0,proclist,
rendezvous_pairs,0,buf,sizeof(PairRvous),
(void *) this);
PairRvous *outbuf = (PairRvous *) buf;
memory->destroy(proclist);
memory->sfree(inbuf);
// flag all onefour atoms to keep
// flag all of my onefour IDs to keep
int max = 0;
for (i = 0; i < nlocal; i++)
@ -1145,8 +1146,6 @@ void Special::dihedral_trim()
for (j = 0; j < nspecial[i][2]; j++)
flag[i][j] = 0;
// reset nspecial[2] and onefour for all owned atoms based on output info
for (i = 0; i < nlocal; i++) {
for (j = 0; j < num_dihedral[i]; j++) {
if (tag[i] != dihedral_atom2[i][j]) continue;
@ -1251,7 +1250,7 @@ int Special::rendezvous_ids(int n, char *inbuf,
sptr->procowner = procowner;
sptr->atomIDs = atomIDs;
// flag = 0: no 2nd irregular comm needed in comm->rendezvous
// flag = 0: no second comm needed in rendezvous
flag = 0;
return 0;
@ -1272,7 +1271,7 @@ int Special::rendezvous_pairs(int n, char *inbuf,
Atom *atom = sptr->atom;
Memory *memory = sptr->memory;
// clear atom map so it can be here as a hash table
// clear atom map so it can be used here as a hash table
// faster than an STL map for large atom counts
atom->map_clear();