git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12223 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2014-07-29 21:46:50 +00:00
parent e4abe8ee03
commit 8047d6058e
6 changed files with 473 additions and 192 deletions

View File

@ -279,14 +279,14 @@ void Balance::command(int narg, char **arg)
if (style == XYZ) {
if (comm->layout == LAYOUT_UNIFORM) {
if (xflag == USER || yflag == USER || zflag == USER)
comm->layout == LAYOUT_NONUNIFORM;
comm->layout = LAYOUT_NONUNIFORM;
} else if (comm->style == LAYOUT_NONUNIFORM) {
if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
comm->layout == LAYOUT_UNIFORM;
comm->layout = LAYOUT_UNIFORM;
} else if (comm->style == LAYOUT_TILED) {
if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
comm->layout == LAYOUT_UNIFORM;
else comm->layout == LAYOUT_NONUNIFORM;
comm->layout = LAYOUT_UNIFORM;
else comm->layout = LAYOUT_NONUNIFORM;
}
if (xflag == UNIFORM) {
@ -476,6 +476,7 @@ int *Balance::bisection(int sortflag)
if (!rcb) rcb = new RCB(lmp);
// NOTE: lo/hi args could be simulation box or particle bounding box
// if particle bbox, then mysplit needs to be reset to sim box
// NOTE: triclinic needs to be in lamda coords
int dim = domain->dimension;
@ -488,6 +489,10 @@ int *Balance::bisection(int sortflag)
// NOTE: this logic is specific to orthogonal boxes, not triclinic
comm->rcbnew = 1;
comm->rcbcut = rcb->cut;
comm->rcbcutdim = rcb->cutdim;
double (*mysplit)[2] = comm->mysplit;
mysplit[0][0] = (rcb->lo[0] - boxlo[0]) / prd[0];
@ -502,6 +507,8 @@ int *Balance::bisection(int sortflag)
if (rcb->hi[2] == boxhi[2]) mysplit[2][1] = 1.0;
else mysplit[2][1] = (rcb->hi[2] - boxlo[2]) / prd[2];
// return list of procs to send my atoms to
return rcb->sendproc;
}

View File

@ -60,6 +60,7 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
grid2proc = NULL;
xsplit = ysplit = zsplit = NULL;
rcbnew = 0;
// use of OpenMP threads
// query OpenMP for number of threads/process set by user at run-time

View File

@ -50,7 +50,10 @@ class Comm : protected Pointers {
// public settings specific to layout = TILED
int rcbnew; // 1 if just reset by rebalance, else 0
double mysplit[3][2]; // fractional (0-1) bounds of my sub-domain
double rcbcut; // RCB cut by this proc
int rcbcutdim; // dimension of RCB cut
// methods

View File

@ -36,6 +36,10 @@ using namespace LAMMPS_NS;
#define BUFMIN 1000
#define BUFEXTRA 1000
// NOTE: change this to 16 after debugged
#define DELTA_PROCS 1
enum{SINGLE,MULTI}; // same as in Comm
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
@ -66,14 +70,10 @@ CommTiled::CommTiled(LAMMPS *lmp, Comm *oldcomm) : Comm(*oldcomm)
CommTiled::~CommTiled()
{
free_swap();
if (sendlist) for (int i = 0; i < nswap; i++) memory->destroy(sendlist[i]);
memory->sfree(sendlist);
memory->destroy(maxsendlist);
memory->destroy(overlap);
memory->destroy(buf_send);
memory->destroy(buf_recv);
deallocate_swap(nswap);
}
/* ----------------------------------------------------------------------
@ -83,6 +83,8 @@ CommTiled::~CommTiled()
void CommTiled::init_buffers()
{
memory->create(overlap,nprocs,"comm:overlap");
maxexchange = maxexchange_atom + maxexchange_fix;
bufextra = maxexchange + BUFEXTRA;
@ -93,13 +95,6 @@ void CommTiled::init_buffers()
nswap = 2 * domain->dimension;
allocate_swap(nswap);
//sendlist = (int **) memory->smalloc(nswap*sizeof(int *),"comm:sendlist");
//memory->create(maxsendlist,nswap,"comm:maxsendlist");
//for (int i = 0; i < nswap; i++) {
// maxsendlist[i] = BUFMIN;
// memory->create(sendlist[i],BUFMIN,"comm:sendlist[i]");
//}
}
/* ----------------------------------------------------------------------
@ -115,9 +110,6 @@ void CommTiled::init()
if (triclinic)
error->all(FLERR,"Cannot yet use comm_style tiled with triclinic box");
if (domain->xperiodic || domain->yperiodic ||
(domain->dimension == 2 && domain->zperiodic))
error->all(FLERR,"Cannot yet use comm_style tiled with periodic box");
if (mode == MULTI)
error->all(FLERR,"Cannot yet use comm_style tiled with multi-mode comm");
@ -176,27 +168,44 @@ void CommTiled::init()
/* ----------------------------------------------------------------------
setup spatial-decomposition communication patterns
function of neighbor cutoff(s) & cutghostuser & current box size and tiling
sets nsendproc, nrecvproc, sendproc, recvproc
sets sendother, sendself, pbc_flag, pbc, sendbox
------------------------------------------------------------------------- */
void CommTiled::setup()
{
int i;
int dimension;
int *periodicity;
double *prd,*sublo,*subhi,*boxlo,*boxhi;
int dimension = domain->dimension;
int *periodicity = domain->periodicity;
double *prd = domain->prd;
double *boxlo = domain->boxlo;
double *boxhi = domain->boxhi;
double *sublo = domain->sublo;
double *subhi = domain->subhi;
// set function pointers
if (layout != LAYOUT_TILED) {
box_drop = &CommTiled::box_drop_brick;
box_other = &CommTiled::box_other_brick;
} else {
box_drop = &CommTiled::box_drop_tiled;
box_other = &CommTiled::box_other_tiled;
}
// if RCB decomp has just changed, gather needed global RCB info
if (rcbnew) {
rcbnew = 0;
memcpy(&rcbinfo[me].mysplit[0][0],&mysplit[0][0],6*sizeof(double));
rcbinfo[me].cut = rcbcut;
rcbinfo[me].dim = rcbcutdim;
MPI_Allgather(&rcbinfo[me],sizeof(RCBinfo),MPI_CHAR,
rcbinfo,sizeof(RCBinfo),MPI_CHAR,world);
}
// check that cutoff < any periodic box length
double cut = MAX(neighbor->cutneighmax,cutghostuser);
dimension = domain->dimension;
periodicity = domain->periodicity;
prd = domain->prd;
sublo = domain->sublo;
subhi = domain->subhi;
boxlo = domain->boxlo;
boxhi = domain->boxhi;
cutghost[0] = cutghost[1] = cutghost[2] = cut;
if ((periodicity[0] && cut > prd[0]) ||
@ -205,13 +214,12 @@ void CommTiled::setup()
error->all(FLERR,"Communication cutoff for comm_style tiled "
"cannot exceed periodic box length");
// NOTE: allocate overlap (to Nprocs?)
// NOTE: allocate 2nd dim of sendproc, recvproc, sendbox
// NOTE: func pointers for box_drop and box_other
// NOTE: write box_drop and box_other methods
// NOTE: for tiled, must do one-time gather of RCB cuts and proc boxes
// loop over 6 swap directions
// determine which procs I will send to and receive from in each swap
// done by intersecting ghost box with all proc sub-boxes it overlaps
// sets nsendproc, nrecvproc, sendproc, recvproc
// sets sendother, sendself, pbc_flag, pbc, sendbox
int *overlap;
int noverlap,noverlap1,indexme;
double lo1[3],hi1[3],lo2[3],hi2[3];
int one,two;
@ -220,7 +228,9 @@ void CommTiled::setup()
for (int idim = 0; idim < dimension; idim++) {
for (int iswap = 0; iswap < 2; iswap++) {
// ghost box in lower direction
// one = first ghost box in same periodic image
// two = second ghost box wrapped across periodic boundary
// either may not exist
one = 1;
lo1[0] = sublo[0]; lo1[1] = sublo[1]; lo1[2] = sublo[2];
@ -257,35 +267,35 @@ void CommTiled::setup()
}
}
// noverlap = # of overlaps of box1/2 with procs via box_drop()
// overlap = list of overlapping procs
// if overlap with self, indexme = index of me in list
indexme = -1;
noverlap = 0;
if (one) {
if (layout == LAYOUT_UNIFORM)
box_drop_uniform(idim,lo1,hi1,noverlap,overlap,indexme);
else if (layout == LAYOUT_NONUNIFORM)
box_drop_nonuniform(idim,lo1,hi1,noverlap,overlap,indexme);
else
box_drop_tiled(lo1,hi1,0,nprocs-1,noverlap,overlap,indexme);
}
if (one) (this->*box_drop)(idim,lo1,hi1,noverlap,indexme);
noverlap1 = noverlap;
if (two) {
if (layout == LAYOUT_UNIFORM)
box_drop_uniform(idim,lo2,hi2,noverlap,overlap,indexme);
else if (layout == LAYOUT_NONUNIFORM)
box_drop_nonuniform(idim,lo2,hi2,noverlap,overlap,indexme);
else
box_drop_tiled(lo2,hi2,0,nprocs-1,noverlap,overlap,indexme);
}
if (two) (this->*box_drop)(idim,lo2,hi2,noverlap,indexme);
// if self is in overlap list, move it to end of list
// if this (self) proc is in overlap list, move it to end of list
if (indexme >= 0) {
int tmp = overlap[noverlap-1];
overlap[noverlap-1] = overlap[indexme];
overlap[indexme] = tmp;
}
// reallocate 2nd dimensions of all send/recv arrays, based on noverlap
// # of sends of this swap = # of recvs of nswap +/- 1
if (noverlap > nprocmax[iswap]) {
int oldmax = nprocmax[iswap];
while (nprocmax[iswap] < noverlap) nprocmax[iswap] += DELTA_PROCS;
grow_swap_send(iswap,nprocmax[iswap],oldmax);
if (iswap == 0) grow_swap_recv(iswap+1,nprocmax[iswap]);
else grow_swap_recv(iswap-1,nprocmax[iswap]);
}
// overlap how has list of noverlap procs
// includes PBC effects
@ -320,12 +330,7 @@ void CommTiled::setup()
pbc[nswap][i][0] = pbc[nswap][i][1] = pbc[nswap][i][2] =
pbc[nswap][i][3] = pbc[nswap][i][4] = pbc[nswap][i][5] = 0;
if (layout == LAYOUT_UNIFORM)
box_other_uniform(overlap[i],oboxlo,oboxhi);
else if (layout == LAYOUT_NONUNIFORM)
box_other_nonuniform(overlap[i],oboxlo,oboxhi);
else
box_other_tiled(overlap[i],oboxlo,oboxhi);
(this->*box_other)(idim,iswap,overlap[i],oboxlo,oboxhi);
if (i < noverlap1) {
sbox[0] = MAX(oboxlo[0],lo1[0]);
@ -368,10 +373,6 @@ void CommTiled::setup()
nswap++;
}
}
// reallocate requests and statuses to max of any swap
}
/* ----------------------------------------------------------------------
@ -497,6 +498,9 @@ void CommTiled::reverse_comm()
// if comm_f_only set, exchange or copy directly from f, don't pack
for (int iswap = nswap-1; iswap >= 0; iswap--) {
nsend = nsendproc[iswap] - sendself[iswap];
nrecv = nrecvproc[iswap] - sendself[iswap];
if (comm_f_only) {
if (sendother[iswap]) {
for (i = 0; i < nsend; i++)
@ -587,7 +591,7 @@ void CommTiled::exchange()
void CommTiled::borders()
{
int i,n,irecv,ngroup,nlast,nsend,nrecv,ncount,rmaxswap;
int i,m,n,irecv,nlocal,nlast,nsend,nrecv,ncount,rmaxswap;
double xlo,xhi,ylo,yhi,zlo,zhi;
double *bbox;
double **x;
@ -609,35 +613,35 @@ void CommTiled::borders()
// for yz-dim swaps, check owned and ghost atoms
// store sent atom indices in list for use in future timesteps
// NOTE: assume SINGLE mode, add back in logic for MULTI mode later
// and for ngroup when bordergroup is set
x = atom->x;
for (i = 0; i < nsendproc[iswap]; i++) {
bbox = sendbox[iswap][i];
xlo = bbox[0]; xhi = bbox[1];
ylo = bbox[2]; yhi = bbox[3];
zlo = bbox[4]; zhi = bbox[5];
for (m = 0; m < nsendproc[iswap]; m++) {
bbox = sendbox[iswap][m];
xlo = bbox[0]; ylo = bbox[1]; zlo = bbox[2];
xhi = bbox[3]; yhi = bbox[4]; zhi = bbox[5];
ngroup = atom->nfirst;
nlocal = atom->nlocal;
if (iswap < 2) nlast = atom->nlocal;
else nlast = atom->nlocal + atom->nghost;
ncount = 0;
for (i = 0; i < ngroup; i++)
for (i = 0; i < nlocal; i++)
if (x[i][0] >= xlo && x[i][0] <= xhi &&
x[i][1] >= ylo && x[i][1] <= yhi &&
x[i][2] >= zlo && x[i][2] <= zhi) {
if (ncount == maxsendlist[iswap][i]) grow_list(iswap,i,ncount);
sendlist[iswap][i][ncount++] = i;
if (ncount == maxsendlist[iswap][m]) grow_list(iswap,i,ncount);
sendlist[iswap][m][ncount++] = i;
}
for (i = atom->nlocal; i < nlast; i++)
if (x[i][0] >= xlo && x[i][0] <= xhi &&
x[i][1] >= ylo && x[i][1] <= yhi &&
x[i][2] >= zlo && x[i][2] <= zhi) {
if (ncount == maxsendlist[iswap][i]) grow_list(iswap,i,ncount);
sendlist[iswap][i][ncount++] = i;
if (ncount == maxsendlist[iswap][m]) grow_list(iswap,i,ncount);
sendlist[iswap][m][ncount++] = i;
}
sendnum[iswap][i] = ncount;
sendnum[iswap][m] = ncount;
smax = MAX(smax,ncount);
}
@ -648,11 +652,11 @@ void CommTiled::borders()
nrecv = nrecvproc[iswap] - sendself[iswap];
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++)
MPI_Irecv(&recvnum[iswap][i],1,MPI_INT,
recvproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nsend; i++)
MPI_Send(&sendnum[iswap][i],1,MPI_INT,sendproc[iswap][i],0,world);
for (m = 0; m < nrecv; m++)
MPI_Irecv(&recvnum[iswap][m],1,MPI_INT,
recvproc[iswap][m],0,world,&requests[m]);
for (m = 0; m < nsend; m++)
MPI_Send(&sendnum[iswap][m],1,MPI_INT,sendproc[iswap][m],0,world);
}
if (sendself[iswap]) recvnum[iswap][nrecv] = sendnum[iswap][nsend];
if (sendother[iswap]) MPI_Waitall(nrecv,requests,statuses);
@ -660,18 +664,18 @@ void CommTiled::borders()
// setup other per swap/proc values from sendnum and recvnum
rmaxswap = 0;
for (i = 0; i < nrecvproc[iswap]; i++) {
rmaxswap += recvnum[iswap][i];
size_forward_recv[iswap][i] = recvnum[iswap][i]*size_forward;
size_reverse_send[iswap][i] = recvnum[iswap][i]*size_reverse;
size_reverse_recv[iswap][i] = sendnum[iswap][i]*size_reverse;
if (i == 0) {
for (m = 0; m < nrecvproc[iswap]; m++) {
rmaxswap += recvnum[iswap][m];
size_forward_recv[iswap][m] = recvnum[iswap][m]*size_forward;
size_reverse_send[iswap][m] = recvnum[iswap][m]*size_reverse;
size_reverse_recv[iswap][m] = sendnum[iswap][m]*size_reverse;
if (m == 0) {
firstrecv[iswap][0] = atom->nlocal + atom->nghost;
forward_recv_offset[iswap][0] = 0;
} else {
firstrecv[iswap][i] = firstrecv[iswap][i-1] + recvnum[iswap][i-1];
forward_recv_offset[iswap][i] =
forward_recv_offset[iswap][i-1] + recvnum[iswap][i-1];
firstrecv[iswap][m] = firstrecv[iswap][m-1] + recvnum[iswap][m-1];
forward_recv_offset[iswap][m] =
forward_recv_offset[iswap][m-1] + recvnum[iswap][m-1];
}
}
rmax = MAX(rmax,rmaxswap);
@ -685,15 +689,15 @@ void CommTiled::borders()
if (ghost_velocity) {
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++)
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
recvnum[iswap][i]*size_border,
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nsend; i++) {
n = avec->pack_border_vel(sendnum[iswap][i],sendlist[iswap][i],
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
for (m = 0; m < nrecv; m++)
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][m]],
recvnum[iswap][m]*size_border,
MPI_DOUBLE,recvproc[iswap][m],0,world,&requests[m]);
for (m = 0; m < nsend; m++) {
n = avec->pack_border_vel(sendnum[iswap][m],sendlist[iswap][m],
buf_send,pbc_flag[iswap][m],pbc[iswap][m]);
MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
sendproc[iswap][i],0,world);
sendproc[iswap][m],0,world);
}
}
@ -706,7 +710,7 @@ void CommTiled::borders()
}
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++) {
for (m = 0; m < nrecv; m++) {
MPI_Waitany(nrecv,requests,&irecv,&status);
avec->unpack_border(recvnum[iswap][irecv],firstrecv[iswap][irecv],
&buf_recv[forward_recv_offset[iswap][irecv]]);
@ -715,15 +719,15 @@ void CommTiled::borders()
} else {
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++)
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
recvnum[iswap][i]*size_border,
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
for (i = 0; i < nsend; i++) {
n = avec->pack_border(sendnum[iswap][i],sendlist[iswap][i],
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
for (m = 0; m < nrecv; m++)
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][m]],
recvnum[iswap][m]*size_border,
MPI_DOUBLE,recvproc[iswap][m],0,world,&requests[m]);
for (m = 0; m < nsend; m++) {
n = avec->pack_border(sendnum[iswap][m],sendlist[iswap][m],
buf_send,pbc_flag[iswap][m],pbc[iswap][m]);
MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
sendproc[iswap][i],0,world);
sendproc[iswap][m],0,world);
}
}
@ -736,7 +740,7 @@ void CommTiled::borders()
}
if (sendother[iswap]) {
for (i = 0; i < nrecv; i++) {
for (m = 0; m < nrecv; m++) {
MPI_Waitany(nrecv,requests,&irecv,&status);
avec->unpack_border(recvnum[iswap][irecv],firstrecv[iswap][irecv],
&buf_recv[forward_recv_offset[iswap][irecv]]);
@ -762,6 +766,8 @@ void CommTiled::borders()
if (map_style) atom->map_set();
}
// NOTE: remaining forward/reverse methods still need to be updated
/* ----------------------------------------------------------------------
forward communication invoked by a Pair
n = constant number of datums per atom
@ -1095,20 +1101,63 @@ int CommTiled::exchange_variable(int n, double *inbuf, double *&outbuf)
box is owned by me and extends in dim
------------------------------------------------------------------------- */
void CommTiled::box_drop_uniform(int dim, double *lo, double *hi,
int &noverlap, int *overlap, int &indexme)
void CommTiled::box_drop_brick(int idim, double *lo, double *hi,
int &noverlap, int &indexme)
{
// NOTE: this is not triclinic compatible
}
double *prd = domain->prd;
double *boxlo = domain->boxlo;
double *boxhi = domain->boxhi;
double *sublo = domain->sublo;
double *subhi = domain->subhi;
/* ----------------------------------------------------------------------
determine overlap list of Noverlap procs the lo/hi box overlaps
overlap = non-zero area in common between box and proc sub-domain
------------------------------------------------------------------------- */
int index,dir;
if (hi[idim] == sublo[idim]) {
index = myloc[idim] - 1;
dir = -1;
} else if (lo[idim] == subhi[idim]) {
index = myloc[idim] + 1;
dir = 1;
} else if (hi[idim] == boxhi[idim]) {
index = procgrid[idim] - 1;
dir = -1;
} else if (lo[idim] == boxlo[idim]) {
index = 0;
dir = 1;
}
void CommTiled::box_drop_nonuniform(int dim, double *lo, double *hi,
int &noverlap, int *overlap, int &indexme)
{
int other1,other2,proc;
double lower,upper;
double *split;
if (idim == 0) {
other1 = myloc[1]; other2 = myloc[2];
split = xsplit;
} else if (idim == 1) {
other1 = myloc[0]; other2 = myloc[2];
split = ysplit;
} else {
other1 = myloc[0]; other2 = myloc[1];
split = zsplit;
}
while (1) {
lower = boxlo[idim] + prd[idim]*split[index];
if (index < procgrid[idim]-1)
upper = boxlo[idim] + prd[idim]*split[index+1];
else upper = boxhi[idim];
if (lower >= hi[idim] || upper <= lo[idim]) break;
if (idim == 0) proc = grid2proc[index][other1][other2];
else if (idim == 1) proc = grid2proc[other1][index][other2];
else proc = grid2proc[other1][other2][idim];
if (proc == me) indexme = noverlap;
overlap[noverlap++] = proc;
index += dir;
if (index < 0 || index >= procgrid[idim]) break;
}
}
/* ----------------------------------------------------------------------
@ -1118,9 +1167,15 @@ void CommTiled::box_drop_nonuniform(int dim, double *lo, double *hi,
no need to split lo/hi box as recurse b/c OK if box extends outside RCB box
------------------------------------------------------------------------- */
void CommTiled::box_drop_tiled(double *lo, double *hi,
int proclower, int procupper,
int &noverlap, int *overlap, int &indexme)
void CommTiled::box_drop_tiled(int idim, double *lo, double *hi,
int &noverlap, int &indexme)
{
box_drop_tiled_recurse(lo,hi,0,nprocs-1,noverlap,indexme);
}
void CommTiled::box_drop_tiled_recurse(double *lo, double *hi,
int proclower, int procupper,
int &noverlap, int &indexme)
{
// end recursion when partition is a single proc
// add proc to overlap list
@ -1139,13 +1194,93 @@ void CommTiled::box_drop_tiled(double *lo, double *hi,
// cut = position of cut
int procmid = proclower + (procupper - proclower) / 2 + 1;
double cut = tree[procmid].cut;
int dim = tree[procmid].dim;
double cut = rcbinfo[procmid].cut;
int idim = rcbinfo[procmid].dim;
if (lo[dim] < cut)
box_drop_tiled(lo,hi,proclower,procmid-1,noverlap,overlap,indexme);
if (hi[dim] > cut)
box_drop_tiled(lo,hi,procmid,procupper,noverlap,overlap,indexme);
if (lo[idim] < cut)
box_drop_tiled_recurse(lo,hi,proclower,procmid-1,noverlap,indexme);
if (hi[idim] > cut)
box_drop_tiled_recurse(lo,hi,procmid,procupper,noverlap,indexme);
}
/* ----------------------------------------------------------------------
return other box owned by proc as lo/hi corner pts
------------------------------------------------------------------------- */
void CommTiled::box_other_brick(int idim, int iswap,
int proc, double *lo, double *hi)
{
double *prd = domain->prd;
double *boxlo = domain->boxlo;
double *boxhi = domain->boxhi;
double *sublo = domain->sublo;
double *subhi = domain->subhi;
lo[0] = sublo[0]; lo[1] = sublo[1]; lo[2] = sublo[2];
hi[0] = subhi[0]; hi[1] = subhi[1]; hi[2] = subhi[2];
int other1,other2,oproc;
double *split;
if (idim == 0) {
other1 = myloc[1]; other2 = myloc[2];
split = xsplit;
} else if (idim == 1) {
other1 = myloc[0]; other2 = myloc[2];
split = ysplit;
} else {
other1 = myloc[0]; other2 = myloc[1];
split = zsplit;
}
int dir = -1;
if (iswap) dir = 1;
int index = myloc[idim];
int n = procgrid[idim];
for (int i = 0; i < n; i++) {
index += dir;
if (index < 0) index = n-1;
else if (index >= n) index = 0;
if (idim == 0) oproc = grid2proc[index][other1][other2];
else if (idim == 1) oproc = grid2proc[other1][index][other2];
else oproc = grid2proc[other1][other2][idim];
if (proc == oproc) {
lo[idim] = boxlo[idim] + prd[idim]*split[index];
if (split[index+1] < 1.0)
hi[idim] = boxlo[idim] + prd[idim]*split[index+1];
else hi[idim] = boxhi[idim];
return;
}
}
}
/* ----------------------------------------------------------------------
return other box owned by proc as lo/hi corner pts
------------------------------------------------------------------------- */
void CommTiled::box_other_tiled(int idim, int iswap,
int proc, double *lo, double *hi)
{
double *prd = domain->prd;
double *boxlo = domain->boxlo;
double *boxhi = domain->boxhi;
double (*split)[2] = rcbinfo[proc].mysplit;
lo[0] = boxlo[0] + prd[0]*split[0][0];
if (split[0][1] < 1.0) hi[0] = boxlo[0] + prd[0]*split[0][1];
else hi[0] = boxhi[0];
lo[1] = boxlo[1] + prd[1]*split[1][0];
if (split[1][1] < 1.0) hi[1] = boxlo[1] + prd[1]*split[1][1];
else hi[1] = boxhi[1];
lo[2] = boxlo[2] + prd[2]*split[2][0];
if (split[2][1] < 1.0) hi[2] = boxlo[2] + prd[2]*split[2][1];
else hi[2] = boxhi[2];
}
/* ----------------------------------------------------------------------
@ -1184,7 +1319,7 @@ void CommTiled::grow_list(int iswap, int iwhich, int n)
{
maxsendlist[iswap][iwhich] = static_cast<int> (BUFFACTOR * n);
memory->grow(sendlist[iswap][iwhich],maxsendlist[iswap][iwhich],
"comm:sendlist[iswap]");
"comm:sendlist[i][j]");
}
/* ----------------------------------------------------------------------
@ -1193,34 +1328,169 @@ void CommTiled::grow_list(int iswap, int iwhich, int n)
void CommTiled::allocate_swap(int n)
{
memory->create(sendnum,n,"comm:sendnum");
memory->create(recvnum,n,"comm:recvnum");
memory->create(sendproc,n,"comm:sendproc");
memory->create(recvproc,n,"comm:recvproc");
memory->create(size_forward_recv,n,"comm:size");
memory->create(size_reverse_send,n,"comm:size");
memory->create(size_reverse_recv,n,"comm:size");
memory->create(firstrecv,n,"comm:firstrecv");
memory->create(pbc_flag,n,"comm:pbc_flag");
memory->create(pbc,n,6,"comm:pbc");
nsendproc = new int[n];
nrecvproc = new int[n];
sendother = new int[n];
sendself = new int[n];
nprocmax = new int[n];
sendproc = new int*[n];
recvproc = new int*[n];
sendnum = new int*[n];
recvnum = new int*[n];
size_forward_recv = new int*[n];
firstrecv = new int*[n];
size_reverse_send = new int*[n];
size_reverse_recv = new int*[n];
forward_recv_offset = new int*[n];
reverse_recv_offset = new int*[n];
pbc_flag = new int*[n];
pbc = new int**[n];
sendbox = new double**[n];
maxsendlist = new int*[n];
sendlist = new int**[n];
for (int i = 0; i < n; i++) {
sendproc[i] = recvproc[i] = NULL;
sendnum[i] = recvnum[i] = NULL;
size_forward_recv[i] = firstrecv[i] = NULL;
size_reverse_send[i] = size_reverse_recv[i] = NULL;
forward_recv_offset[i] = reverse_recv_offset[i] = NULL;
pbc_flag[i] = NULL;
pbc[i] = NULL;
sendbox[i] = NULL;
maxsendlist[i] = NULL;
sendlist[i] = NULL;
}
maxreqstat = 0;
requests = NULL;
statuses = NULL;
for (int i = 0; i < n; i++) {
nprocmax[i] = DELTA_PROCS;
grow_swap_send(i,DELTA_PROCS,0);
grow_swap_recv(i,DELTA_PROCS);
}
}
/* ----------------------------------------------------------------------
free memory for swaps
grow info for swap I, to allow for N procs to communicate with
ditto for complementary recv for swap I+1 or I-1, as invoked by caller
------------------------------------------------------------------------- */
void CommTiled::free_swap()
void CommTiled::grow_swap_send(int i, int n, int nold)
{
memory->destroy(sendnum);
memory->destroy(recvnum);
memory->destroy(sendproc);
memory->destroy(recvproc);
memory->destroy(size_forward_recv);
memory->destroy(size_reverse_send);
memory->destroy(size_reverse_recv);
memory->destroy(firstrecv);
memory->destroy(pbc_flag);
memory->destroy(pbc);
delete [] sendproc[i];
sendproc[i] = new int[n];
delete [] sendnum[i];
sendnum[i] = new int[n];
delete [] size_reverse_recv[i];
size_reverse_recv[i] = new int[n];
delete [] reverse_recv_offset[i];
reverse_recv_offset[i] = new int[n];
delete [] pbc_flag[i];
pbc_flag[i] = new int[n];
memory->destroy(pbc[i]);
memory->create(pbc[i],n,6,"comm:pbc_flag");
memory->destroy(sendbox[i]);
memory->create(sendbox[i],n,6,"comm:sendbox");
delete [] maxsendlist[i];
maxsendlist[i] = new int[n];
for (int j = 0; j < nold; j++) memory->destroy(sendlist[i][j]);
delete [] sendlist[i];
sendlist[i] = new int*[n];
for (int j = 0; j < n; j++) {
maxsendlist[i][j] = BUFMIN;
memory->create(sendlist[i][j],BUFMIN,"comm:sendlist[i][j]");
}
}
void CommTiled::grow_swap_recv(int i, int n)
{
delete [] recvproc[i];
recvproc[i] = new int[n];
delete [] recvnum[i];
recvnum[i] = new int[n];
delete [] size_forward_recv[i];
size_forward_recv[i] = new int[n];
delete [] firstrecv[i];
firstrecv[i] = new int[n];
delete [] forward_recv_offset[i];
forward_recv_offset[i] = new int[n];
delete [] size_reverse_send[i];
size_reverse_send[i] = new int[n];
if (n > maxreqstat) {
maxreqstat = n;
delete [] requests;
delete [] statuses;
requests = new MPI_Request[n];
statuses = new MPI_Status[n];
}
}
/* ----------------------------------------------------------------------
deallocate swap info
------------------------------------------------------------------------- */
void CommTiled::deallocate_swap(int n)
{
delete [] nsendproc;
delete [] nrecvproc;
delete [] sendother;
delete [] sendself;
for (int i = 0; i < n; i++) {
delete [] sendproc[i];
delete [] recvproc[i];
delete [] sendnum[i];
delete [] recvnum[i];
delete [] size_forward_recv[i];
delete [] firstrecv[i];
delete [] size_reverse_send[i];
delete [] size_reverse_recv[i];
delete [] forward_recv_offset[i];
delete [] reverse_recv_offset[i];
delete [] pbc_flag[i];
memory->destroy(pbc[i]);
memory->destroy(sendbox[i]);
delete [] maxsendlist[i];
for (int j = 0; j < nprocmax[i]; j++) memory->destroy(sendlist[i][j]);
delete [] sendlist[i];
}
delete [] sendproc;
delete [] recvproc;
delete [] sendnum;
delete [] recvnum;
delete [] size_forward_recv;
delete [] firstrecv;
delete [] size_reverse_send;
delete [] size_reverse_recv;
delete [] forward_recv_offset;
delete [] reverse_recv_offset;
delete [] pbc_flag;
delete [] pbc;
delete [] sendbox;
delete [] maxsendlist;
delete [] sendlist;
delete [] requests;
delete [] statuses;
delete [] nprocmax;
}
/* ----------------------------------------------------------------------

View File

@ -57,13 +57,13 @@ class CommTiled : public Comm {
int *nsendproc,*nrecvproc; // # of procs to send/recv to/from in each swap
int *sendother; // 1 if send to any other proc in each swap
int *sendself; // 1 if send to self in each swap
int **sendnum,**recvnum; // # of atoms to send/recv per swap/proc
int *nprocmax; // current max # of send procs for each swap
int **sendproc,**recvproc; // proc to send/recv to/from per swap/proc
int **sendnum,**recvnum; // # of atoms to send/recv per swap/proc
int **size_forward_recv; // # of values to recv in each forward swap/proc
int **firstrecv; // where to put 1st recv atom per swap/proc
int **size_reverse_send; // # to send in each reverse comm per swap/proc
int **size_reverse_recv; // # to recv in each reverse comm per swap/proc
int **forward_recv_offset; // forward comm offsets in buf_recv per swap/proc
int **reverse_recv_offset; // reverse comm offsets in buf_recv per swap/proc
@ -82,40 +82,43 @@ class CommTiled : public Comm {
int maxexchange; // max # of datums/atom in exchange comm
int bufextra; // extra space beyond maxsend in send buffer
int maxreqstat; // max size of Request and Status vectors
MPI_Request *requests;
MPI_Status *statuses;
int comm_x_only,comm_f_only; // 1 if only exchange x,f in for/rev comm
struct Tree {
double cut;
int dim;
struct RCBinfo {
double mysplit[3][2]; // fractional RCB bounding box for one proc
double cut; // position of cut this proc owns
int dim; // dimension = 0/1/2 of cut
};
Tree *tree;
// info from RCB decomp
double rcbcut;
int rcbcutdim;
double rcblo[3];
double rcbhi[3];
int *overlap;
RCBinfo *rcbinfo; // list of RCB info for all procs
void init_buffers();
void box_drop_uniform(int, double *, double *, int &, int *, int &);
void box_drop_nonuniform(int, double *, double *, int &, int *, int &);
void box_drop_tiled(double *, double *, int, int, int &, int *, int &);
// box drop and other functions
void box_other_uniform(int, double *, double *) {}
void box_other_nonuniform(int, double *, double *) {}
void box_other_tiled(int, double *, double *) {}
typedef void (CommTiled::*BoxDropPtr)(int, double *, double *, int &, int &);
BoxDropPtr box_drop;
void box_drop_brick(int, double *, double *, int &, int &);
void box_drop_tiled(int, double *, double *, int &, int &);
void box_drop_tiled_recurse(double *, double *, int, int, int &, int &);
void grow_send(int, int); // reallocate send buffer
void grow_recv(int); // free/allocate recv buffer
void grow_list(int, int, int); // reallocate sendlist for one swap/proc
void allocate_swap(int); // allocate swap arrays
void free_swap(); // free swap arrays
typedef void (CommTiled::*BoxOtherPtr)(int, int, int, double *, double *);
BoxOtherPtr box_other;
void box_other_brick(int, int, int, double *, double *);
void box_other_tiled(int, int, int, double *, double *);
void grow_send(int, int); // reallocate send buffer
void grow_recv(int); // free/allocate recv buffer
void grow_list(int, int, int); // reallocate sendlist for one swap/proc
void allocate_swap(int); // allocate swap arrays
void grow_swap_send(int, int, int); // grow swap arrays for send and recv
void grow_swap_recv(int, int);
void deallocate_swap(int); // deallocate swap arrays
};
}

View File

@ -301,34 +301,31 @@ void Domain::set_local_box()
double *zsplit = comm->zsplit;
sublo[0] = boxlo[0] + xprd*xsplit[myloc[0]];
if (myloc[0] < procgrid[0]-1)
subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1];
if (myloc[0] < procgrid[0]-1) subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1];
else subhi[0] = boxhi[0];
sublo[1] = boxlo[1] + yprd*ysplit[myloc[1]];
if (myloc[1] < procgrid[1]-1)
subhi[1] = boxlo[1] + yprd*ysplit[myloc[1]+1];
if (myloc[1] < procgrid[1]-1) subhi[1] = boxlo[1] + yprd*ysplit[myloc[1]+1];
else subhi[1] = boxhi[1];
sublo[2] = boxlo[2] + zprd*zsplit[myloc[2]];
if (myloc[2] < procgrid[2]-1)
subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1];
if (myloc[2] < procgrid[2]-1) subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1];
else subhi[2] = boxhi[2];
} else {
double (*mysplit)[2] = comm->mysplit;
sublo[0] = boxlo[0] + xprd*mysplit[0][0];
subhi[0] = boxlo[0] + xprd*mysplit[0][1];
if (mysplit[0][1] == 1.0) subhi[0] = boxhi[0];
if (mysplit[0][1] < 1.0) subhi[0] = boxlo[0] + xprd*mysplit[0][1];
else subhi[0] = boxhi[0];
sublo[1] = boxlo[1] + yprd*mysplit[1][0];
subhi[1] = boxlo[1] + yprd*mysplit[1][1];
if (mysplit[1][1] == 1.0) subhi[1] = boxhi[1];
if (mysplit[1][1] < 1.0) subhi[1] = boxlo[1] + yprd*mysplit[1][1];
else subhi[1] = boxhi[1];
sublo[2] = boxlo[2] + zprd*mysplit[2][0];
subhi[2] = boxlo[2] + zprd*mysplit[2][1];
if (mysplit[2][1] == 1.0) subhi[2] = boxhi[2];
if (mysplit[2][1] < 1.0) subhi[2] = boxlo[2] + zprd*mysplit[2][1];
else subhi[2] = boxhi[2];
}
}