diff --git a/src/MANYBODY/pair_tersoff_zbl.cpp b/src/MANYBODY/pair_tersoff_zbl.cpp index a4f6ee3edd..d8e044e078 100644 --- a/src/MANYBODY/pair_tersoff_zbl.cpp +++ b/src/MANYBODY/pair_tersoff_zbl.cpp @@ -66,7 +66,7 @@ void PairTersoffZBL::read_file(char *file) int params_per_line = 21; char **words = new char*[params_per_line+1]; - delete [] params; + memory->sfree(params); params = NULL; nparams = 0; diff --git a/src/MOLECULE/pair_lj_charmm_coul_charmm.cpp b/src/MOLECULE/pair_lj_charmm_coul_charmm.cpp index 875f4c95a2..1a4aeed1d3 100644 --- a/src/MOLECULE/pair_lj_charmm_coul_charmm.cpp +++ b/src/MOLECULE/pair_lj_charmm_coul_charmm.cpp @@ -282,7 +282,8 @@ void PairLJCharmmCoulCharmm::coeff(int narg, char **arg) void PairLJCharmmCoulCharmm::init_style() { if (!atom->q_flag) - error->all(FLERR,"Pair style lj/charmm/coul/charmm requires atom attribute q"); + error->all(FLERR, + "Pair style lj/charmm/coul/charmm requires atom attribute q"); neighbor->request(this); diff --git a/src/REPLICA/verlet_split.cpp b/src/REPLICA/verlet_split.cpp index e84cbc81ec..ee8d6d7ac5 100644 --- a/src/REPLICA/verlet_split.cpp +++ b/src/REPLICA/verlet_split.cpp @@ -132,33 +132,39 @@ VerletSplit::VerletSplit(LAMMPS *lmp, int narg, char **arg) : if (universe->me == 0) { if (universe->uscreen) { - fprintf(universe->uscreen,"Rspace/Kspace procs in each block:\n"); + fprintf(universe->uscreen, + "Per-block Rspace/Kspace proc IDs (original proc IDs):\n"); int m = 0; for (int i = 0; i < universe->nprocs/(ratio+1); i++) { fprintf(universe->uscreen," block %d:",i); int kspace_proc = bmapall[m]; for (int j = 1; j <= ratio; j++) fprintf(universe->uscreen," %d",bmapall[m+j]); - fprintf(universe->uscreen," %d\n",kspace_proc); - /* + fprintf(universe->uscreen," %d (",kspace_proc); kspace_proc = bmapall[m]; for (int j = 1; j <= ratio; j++) fprintf(universe->uscreen," %d", - universe->proc2original[bmapall[m+j]]); - fprintf(universe->uscreen," %d\n",universe->proc2original[kspace_proc]); - */ + universe->uni2orig[bmapall[m+j]]); + fprintf(universe->uscreen," %d)",universe->uni2orig[kspace_proc]); m += ratio + 1; } } if (universe->ulogfile) { - fprintf(universe->ulogfile,"Rspace/Kspace procs in each block:\n"); + fprintf(universe->ulogfile, + "Per-block Rspace/Kspace proc IDs (original proc IDs):\n"); int m = 0; for (int i = 0; i < universe->nprocs/(ratio+1); i++) { fprintf(universe->ulogfile," block %d:",i); - int kspace_proc = bmapall[m++]; + int kspace_proc = bmapall[m]; for (int j = 1; j <= ratio; j++) - fprintf(universe->ulogfile," %d",bmapall[m++]); - fprintf(universe->ulogfile," %d\n",kspace_proc); + fprintf(universe->ulogfile," %d",bmapall[m+j]); + fprintf(universe->ulogfile," %d (",kspace_proc); + kspace_proc = bmapall[m]; + for (int j = 1; j <= ratio; j++) + fprintf(universe->ulogfile," %d", + universe->uni2orig[bmapall[m+j]]); + fprintf(universe->ulogfile," %d)",universe->uni2orig[kspace_proc]); + m += ratio + 1; } } } diff --git a/src/comm.cpp b/src/comm.cpp index 4ce4d81e43..1ee622cf67 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -164,24 +164,18 @@ void Comm::set_proc_grid() ProcMap *pmap = new ProcMap(lmp); // create 3d grid of processors, produces procgrid - // can fail (on one partition) if constrained by other partition - // if numa_grid() fails, try onelevel_grid() - int flag; if (gridflag == ONELEVEL) { - flag = pmap->onelevel_grid(nprocs,user_procgrid,procgrid, - otherflag,other_style,other_procgrid); - if (!flag) error->all(FLERR,"Could not create grid of processors"); + pmap->onelevel_grid(nprocs,user_procgrid,procgrid, + otherflag,other_style,other_procgrid); } else if (gridflag == TWOLEVEL) { - flag = pmap->twolevel_grid(nprocs,user_procgrid,procgrid, - ncores,user_coregrid,coregrid, - otherflag,other_style,other_procgrid); - if (!flag) error->all(FLERR,"Could not create grid of processors"); + pmap->twolevel_grid(nprocs,user_procgrid,procgrid, + ncores,user_coregrid,coregrid, + otherflag,other_style,other_procgrid); } else if (gridflag == NUMA) { - flag = pmap->numa_grid(nprocs,user_procgrid,procgrid,coregrid); - if (!flag) error->all(FLERR,"Could not create grid of processors"); + pmap->numa_grid(nprocs,user_procgrid,procgrid,coregrid); } else if (gridflag == CUSTOM) { pmap->custom_grid(customfile,nprocs,user_procgrid,procgrid); diff --git a/src/procmap.cpp b/src/procmap.cpp index 846f4c71b1..1893fb9757 100644 --- a/src/procmap.cpp +++ b/src/procmap.cpp @@ -36,50 +36,116 @@ enum{MULTIPLE}; // same as in Comm ProcMap::ProcMap(LAMMPS *lmp) : Pointers(lmp) {} /* ---------------------------------------------------------------------- - create a one-level 3d grid of procs via procs2box() + create a one-level 3d grid of procs ------------------------------------------------------------------------- */ -int ProcMap::onelevel_grid(int nprocs, int *user_procgrid, int *procgrid, - int otherflag, int other_style_caller, - int *other_procgrid_caller) +void ProcMap::onelevel_grid(int nprocs, int *user_procgrid, int *procgrid, + int otherflag, int other_style, + int *other_procgrid) { - other_style = other_style_caller; - other_procgrid[0] = other_procgrid_caller[0]; - other_procgrid[1] = other_procgrid_caller[1]; - other_procgrid[2] = other_procgrid_caller[2]; + int **factors; - int flag = procs2box(nprocs,user_procgrid,procgrid,1,1,1,otherflag); - return flag; + // factors = list of all possible 3 factors of processor count + + int npossible = factor(nprocs,NULL); + memory->create(factors,npossible,3,"procmap:factors"); + npossible = factor(nprocs,factors); + + // constrain by 2d, user request, other partition + + if (domain->dimension == 2) npossible = cull_2d(npossible,factors,3); + npossible = cull_user(npossible,factors,3,user_procgrid); + if (otherflag) npossible = cull_other(npossible,factors,3, + other_style,other_procgrid); + + // user/other constraints make failure possible + + if (npossible == 0) + error->all(FLERR,"Could not create 3d grid of processors"); + + // select best set of 3 factors based on surface area of proc sub-domains + + best_factors(npossible,factors,procgrid,1,1,1); + + // clean-up + + memory->destroy(factors); } /* ---------------------------------------------------------------------- - create a two-level 3d grid of procs and cores via procs2box() + create a two-level 3d grid of procs ------------------------------------------------------------------------- */ -int ProcMap::twolevel_grid(int nprocs, int *user_procgrid, int *procgrid, - int ncores, int *user_coregrid, int *coregrid, - int otherflag, int other_style_caller, - int *other_procgrid_caller) +void ProcMap::twolevel_grid(int nprocs, int *user_procgrid, int *procgrid, + int ncores, int *user_coregrid, int *coregrid, + int otherflag, int other_style, + int *other_procgrid) { + int **nfactors,**cfactors,**factors; + if (nprocs % ncores) - error->all(FLERR,"Processors twogrid requres proc count " + error->all(FLERR,"Processors twogrid requires proc count " "be a multiple of core count"); + // nfactors = list of all possible 3 factors of node count + // constrain by 2d + int nnpossible = factor(nprocs/ncores,NULL); + memory->create(nfactors,nnpossible,3,"procmap:nfactors"); + nnpossible = factor(nprocs/ncores,nfactors); - error->all(FLERR, - "The twolevel option is not yet supported, but will be soon"); - return 1; + if (domain->dimension == 2) nnpossible = cull_2d(nnpossible,nfactors,3); + + // cfactors = list of all possible 3 factors of core count + // constrain by 2d + + int ncpossible = factor(ncores,NULL); + memory->create(cfactors,ncpossible,3,"procmap:cfactors"); + ncpossible = factor(ncores,cfactors); + + if (domain->dimension == 2) ncpossible = cull_2d(ncpossible,cfactors,3); + ncpossible = cull_user(ncpossible,cfactors,3,user_coregrid); + + // factors = all combinations of nfactors and cfactors + // factors stores additional index pointing to corresponding cfactors + // constrain by user request, other partition + + int npossible = nnpossible * ncpossible; + memory->create(factors,npossible,4,"procmap:factors"); + npossible = combine_factors(nnpossible,nfactors,ncpossible,cfactors,factors); + + npossible = cull_user(npossible,factors,4,user_procgrid); + if (otherflag) npossible = cull_other(npossible,factors,4, + other_style,other_procgrid); + + // user/other constraints make failure possible + + if (npossible == 0) + error->all(FLERR,"Could not create twolevel 3d grid of processors"); + + // select best set of 3 factors based on surface area of proc sub-domains + // index points to corresponding core factorization + + int index = best_factors(npossible,factors,procgrid,1,1,1); + + coregrid[0] = cfactors[factors[index][3]][0]; + coregrid[1] = cfactors[factors[index][3]][1]; + coregrid[2] = cfactors[factors[index][3]][2]; + + // clean-up + + memory->destroy(nfactors); + memory->destroy(cfactors); + memory->destroy(factors); } /* ---------------------------------------------------------------------- create a 3d grid of procs that does a 2-level hierarchy within a node auto-detects NUMA sockets within a multi-core node - return 1 if successful, 0 if not ------------------------------------------------------------------------- */ -int ProcMap::numa_grid(int nprocs, int *user_procgrid, int *procgrid, - int *numagrid) +void ProcMap::numa_grid(int nprocs, int *user_procgrid, int *procgrid, + int *numagrid) { // hardwire this for now @@ -117,16 +183,16 @@ int ProcMap::numa_grid(int nprocs, int *user_procgrid, int *procgrid, user_procgrid[0] > 1 || // user specified grid > 1 in any dim user_procgrid[1] > 1 || user_procgrid[2] > 1) - return 0; + error->all(FLERR,"Could not create numa 3d grid of processors"); + // user settings for the factorization per numa node // currently not user settable + // if user specifies 1 for a proc grid dimension, + // also use 1 for the numa grid dimension int user_numagrid[3]; user_numagrid[0] = user_numagrid[1] = user_numagrid[2] = 0; - - // if user specifies 1 for a proc grid dimension, - // also use 1 for the numa grid dimension if (user_procgrid[0] == 1) user_numagrid[0] = 1; if (user_procgrid[1] == 1) user_numagrid[1] = 1; @@ -134,23 +200,52 @@ int ProcMap::numa_grid(int nprocs, int *user_procgrid, int *procgrid, // initial factorization within NUMA node - procs2box(procs_per_numa,user_numagrid,numagrid,1,1,1,0); - if (numagrid[0]*numagrid[1]*numagrid[2] != procs_per_numa) - error->all(FLERR,"Bad grid of processors"); - + int **numafactors; + int numapossible = factor(procs_per_numa,NULL); + memory->create(numafactors,numapossible,3,"procmap:numafactors"); + numapossible = factor(procs_per_numa,numafactors); + + if (domain->dimension == 2) + numapossible = cull_2d(numapossible,numafactors,3); + numapossible = cull_user(numapossible,numafactors,3,user_numagrid); + + if (numapossible == 0) + error->all(FLERR,"Could not create numa grid of processors"); + + best_factors(numapossible,numafactors,numagrid,1,1,1); + + // user_nodegrid = implied user contraints on nodes + + int user_nodegrid[3]; + user_nodegrid[0] = user_procgrid[0] / numagrid[0]; + user_nodegrid[1] = user_procgrid[1] / numagrid[1]; + user_nodegrid[2] = user_procgrid[2] / numagrid[2]; + // factorization for the grid of NUMA nodes int node_count = nprocs / procs_per_numa; - procs2box(node_count,user_procgrid,nodegrid, - numagrid[0],numagrid[1],numagrid[2],0); - if (procgrid[0]*procgrid[1]*procgrid[2] != node_count) - error->all(FLERR,"Bad grid of processors"); + + int **nodefactors; + int nodepossible = factor(node_count,NULL); + memory->create(nodefactors,nodepossible,3,"procmap:nodefactors"); + nodepossible = factor(node_count,nodefactors); + + if (domain->dimension == 2) + nodepossible = cull_2d(nodepossible,nodefactors,3); + nodepossible = cull_user(nodepossible,nodefactors,3,user_nodegrid); + + if (nodepossible == 0) + error->all(FLERR,"Could not create numa grid of processors"); + + best_factors(nodepossible,nodefactors,nodegrid, + numagrid[0],numagrid[1],numagrid[2]); // repeat NUMA node factorization using subdomain sizes // refines the factorization if the user specified the node layout + // NOTE: this will not re-enforce user-procgrid constraint will it? - procs2box(procs_per_numa,user_numagrid,numagrid, - procgrid[0],procgrid[1],procgrid[2],0); + best_factors(numapossible,numafactors,numagrid, + nodegrid[0],nodegrid[1],nodegrid[2]); // assign a unique id to each node @@ -166,8 +261,6 @@ int ProcMap::numa_grid(int nprocs, int *user_procgrid, int *procgrid, procgrid[0] = nodegrid[0] * numagrid[0]; procgrid[1] = nodegrid[1] * numagrid[1]; procgrid[2] = nodegrid[2] * numagrid[2]; - - return 1; } /* ---------------------------------------------------------------------- @@ -245,136 +338,6 @@ void ProcMap::custom_grid(char *cfile, int nprocs, if (flag) error->all(FLERR,"Processors custom grid file is invalid"); } -/* ---------------------------------------------------------------------- - assign nprocs to 3d box so as to minimize surface area - area = surface area of each of 3 faces of simulation box divided by sx,sy,sz - for triclinic, area = cross product of 2 edge vectors stored in h matrix - valid assignment will be factorization of nprocs = Px by Py by Pz - user_factors = if non-zero, factors are specified by user - sx,sy,sz = scale box xyz dimension by dividing by sx,sy,sz - other = 1 to enforce compatability with other partition's layout - return factors = # of procs assigned to each dimension - return 1 if factor successfully, 0 if not -------------------------------------------------------------------------- */ - -int ProcMap::procs2box(int nprocs, int *user_factors, int *factors, - const int sx, const int sy, const int sz, int other) -{ - factors[0] = user_factors[0]; - factors[1] = user_factors[1]; - factors[2] = user_factors[2]; - - // all 3 proc counts are specified - - if (factors[0] && factors[1] && factors[2]) return 1; - - // 2 out of 3 proc counts are specified - - if (factors[0] > 0 && factors[1] > 0) { - factors[2] = nprocs/(factors[0]*factors[1]); - return 1; - } else if (factors[0] > 0 && factors[2] > 0) { - factors[1] = nprocs/(factors[0]*factors[2]); - return 1; - } else if (factors[1] > 0 && factors[2] > 0) { - factors[0] = nprocs/(factors[1]*factors[2]); - return 1; - } - - // determine cross-sectional areas for orthogonal and triclinic boxes - // area[0] = xy, area[1] = xz, area[2] = yz - - double area[3]; - if (domain->triclinic == 0) { - area[0] = domain->xprd * domain->yprd / (sx * sy); - area[1] = domain->xprd * domain->zprd / (sx * sz); - area[2] = domain->yprd * domain->zprd / (sy * sz); - } else { - double *h = domain->h; - double a[3],b[3],c[3]; - a[0] = h[0]; a[1] = 0.0; a[2] = 0.0; - b[0] = h[5]; b[1] = h[1]; b[2] = 0.0; - MathExtra::cross3(a,b,c); - area[0] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sx * sy); - a[0] = h[0]; a[1] = 0.0; a[2] = 0.0; - b[0] = h[4]; b[1] = h[3]; b[2] = h[2]; - MathExtra::cross3(a,b,c); - area[1] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sx * sz); - a[0] = h[5]; a[1] = h[1]; a[2] = 0.0; - b[0] = h[4]; b[1] = h[3]; b[2] = h[2]; - MathExtra::cross3(a,b,c); - area[2] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sy * sz); - } - - double bestsurf = 2.0 * (area[0]+area[1]+area[2]); - - // loop thru all possible factorizations of nprocs - // only consider valid cases that match procgrid settings - // surf = surface area of a proc sub-domain - // only consider cases that match user_factors & other_procgrid settings - // success = 1 if valid factoriztion is found - // may not be if other constraint is enforced - - int ipx,ipy,ipz,valid; - double surf; - - int success = 0; - ipx = 1; - while (ipx <= nprocs) { - valid = 1; - if (user_factors[0] && ipx != user_factors[0]) valid = 0; - if (other) { - if (other_style == MULTIPLE && other_procgrid[0] % ipx) valid = 0; - } - if (nprocs % ipx) valid = 0; - - if (!valid) { - ipx++; - continue; - } - - ipy = 1; - while (ipy <= nprocs/ipx) { - valid = 1; - if (user_factors[1] && ipy != user_factors[1]) valid = 0; - if (other) { - if (other_style == MULTIPLE && other_procgrid[1] % ipy) valid = 0; - } - if ((nprocs/ipx) % ipy) valid = 0; - if (!valid) { - ipy++; - continue; - } - - ipz = nprocs/ipx/ipy; - valid = 1; - if (user_factors[2] && ipz != user_factors[2]) valid = 0; - if (other) { - if (other_style == MULTIPLE && other_procgrid[2] % ipz) valid = 0; - } - if (domain->dimension == 2 && ipz != 1) valid = 0; - if (!valid) { - ipy++; - continue; - } - - surf = area[0]/ipx/ipy + area[1]/ipx/ipz + area[2]/ipy/ipz; - if (surf < bestsurf) { - success = 1; - bestsurf = surf; - factors[0] = ipx; - factors[1] = ipy; - factors[2] = ipz; - } - ipy++; - } - - ipx++; - } - - return success; -} - /* ---------------------------------------------------------------------- map processors to 3d grid via MPI_Cart routines MPI may do layout in machine-optimized fashion @@ -471,6 +434,44 @@ void ProcMap::xyz_map(char *xyz, int *procgrid, void ProcMap::xyz_map(char *xyz, int *procgrid, int *coregrid, int *myloc, int procneigh[3][2], int ***grid2proc) { + int me; + MPI_Comm_rank(world,&me); + + int i,j,k; + for (i = 0; i < procgrid[0]; i++) + for (j = 0; j < procgrid[1]; j++) + for (k = 0; k < procgrid[2]; k++) { + grid2proc[i][j][k] = k*procgrid[1]*procgrid[0] + j*procgrid[0] + i; + if (xyz[0] == 'x' && xyz[1] == 'y' && xyz[2] == 'z') + grid2proc[i][j][k] = k*procgrid[1]*procgrid[0] + j*procgrid[0] + i; + else if (xyz[0] == 'x' && xyz[1] == 'z' && xyz[2] == 'y') + grid2proc[i][j][k] = j*procgrid[2]*procgrid[0] + k*procgrid[0] + i; + else if (xyz[0] == 'y' && xyz[1] == 'x' && xyz[2] == 'z') + grid2proc[i][j][k] = k*procgrid[0]*procgrid[1] + i*procgrid[1] + j; + else if (xyz[0] == 'y' && xyz[1] == 'z' && xyz[2] == 'x') + grid2proc[i][j][k] = i*procgrid[2]*procgrid[1] + k*procgrid[1] + j; + else if (xyz[0] == 'z' && xyz[1] == 'x' && xyz[2] == 'y') + grid2proc[i][j][k] = j*procgrid[0]*procgrid[2] + i*procgrid[2] + k; + else if (xyz[0] == 'z' && xyz[1] == 'y' && xyz[2] == 'x') + grid2proc[i][j][k] = i*procgrid[1]*procgrid[2] + j*procgrid[2] + k; + + if (grid2proc[i][j][k] == me) { + myloc[0] = i; myloc[1] = j, myloc[2] = k; + } + } + + int minus,plus; + grid_shift(myloc[0],procgrid[0],minus,plus); + procneigh[0][0] = grid2proc[minus][myloc[1]][myloc[2]]; + procneigh[0][1] = grid2proc[plus][myloc[1]][myloc[2]]; + + grid_shift(myloc[1],procgrid[1],minus,plus); + procneigh[1][0] = grid2proc[myloc[0]][minus][myloc[2]]; + procneigh[1][1] = grid2proc[myloc[0]][plus][myloc[2]]; + + grid_shift(myloc[2],procgrid[2],minus,plus); + procneigh[2][0] = grid2proc[myloc[0]][myloc[1]][minus]; + procneigh[2][1] = grid2proc[myloc[0]][myloc[1]][plus]; } /* ---------------------------------------------------------------------- @@ -596,18 +597,6 @@ void ProcMap::custom_map(int *procgrid, memory->destroy(cmap); } -/* ---------------------------------------------------------------------- - minus,plus = indices of neighboring processors in a dimension -------------------------------------------------------------------------- */ - -void ProcMap::grid_shift(int myloc, int nprocs, int &minus, int &plus) -{ - minus = myloc - 1; - if (minus < 0) minus = nprocs - 1; - plus = myloc + 1; - if (plus == nprocs) plus = 0; -} - /* ---------------------------------------------------------------------- output mapping of processors to 3d grid to file ------------------------------------------------------------------------- */ @@ -679,3 +668,136 @@ void ProcMap::output(char *file, int *procgrid, int ***grid2proc) if (me == 0) fclose(fp); } + +/* ---------------------------------------------------------------------- + generate all possible 3-integer factorizations of N + store them in factors if non-NULL + return # of factorizations +------------------------------------------------------------------------- */ + +int ProcMap::factor(int n, int **factors) +{ + int i,j,nyz; + + int m = 0; + for (i = 1; i <= n; i++) { + if (n % i) continue; + nyz = n/i; + for (j = 1; j <= nyz; j++) { + if (nyz % j) continue; + if (factors) { + factors[m][0] = i; + factors[m][1] = j; + factors[m][2] = nyz/j; + } + m++; + } + } + + return m; +} + +/* ---------------------------------------------------------------------- +------------------------------------------------------------------------- */ + +int ProcMap::combine_factors(int n1, int **factors1, int n2, int **factors2, + int **factors) +{ + int m = 0; + + return n1*n2; +} + +/* ---------------------------------------------------------------------- +------------------------------------------------------------------------- */ + +int ProcMap::cull_2d(int n, int **factors, int m) +{ + return 0; +} + +/* ---------------------------------------------------------------------- +------------------------------------------------------------------------- */ + +int ProcMap::cull_user(int n, int **factors, int m, int *user_factors) +{ + return 0; +} + +/* ---------------------------------------------------------------------- +------------------------------------------------------------------------- */ + +int ProcMap::cull_other(int n, int **factors, int m, + int other_style, int *other_grid) +{ + return 0; +} + +/* ---------------------------------------------------------------------- + choose best factors from list of Npossible factors + best = minimal surface area of sub-domain + return best = 3 factors + return index of best factors in factors +------------------------------------------------------------------------- */ + +int ProcMap::best_factors(int npossible, int **factors, int *best, + const int sx, const int sy, const int sz) +{ + // determine cross-sectional areas for orthogonal and triclinic boxes + // for triclinic, area = cross product of 2 edge vectors stored in h matrix + // area[3] = surface area 3 box faces divided by sx,sy,sz + // area[0] = xy, area[1] = xz, area[2] = yz + + double area[3]; + if (domain->triclinic == 0) { + area[0] = domain->xprd * domain->yprd / (sx*sy); + area[1] = domain->xprd * domain->zprd / (sx*sz); + area[2] = domain->yprd * domain->zprd / (sy*sz); + } else { + double *h = domain->h; + double a[3],b[3],c[3]; + a[0] = h[0]; a[1] = 0.0; a[2] = 0.0; + b[0] = h[5]; b[1] = h[1]; b[2] = 0.0; + MathExtra::cross3(a,b,c); + area[0] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sx*sy); + a[0] = h[0]; a[1] = 0.0; a[2] = 0.0; + b[0] = h[4]; b[1] = h[3]; b[2] = h[2]; + MathExtra::cross3(a,b,c); + area[1] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sx*sz); + a[0] = h[5]; a[1] = h[1]; a[2] = 0.0; + b[0] = h[4]; b[1] = h[3]; b[2] = h[2]; + MathExtra::cross3(a,b,c); + area[2] = sqrt(c[0]*c[0] + c[1]*c[1] + c[2]*c[2]) / (sy*sz); + } + + int index; + double surf; + double bestsurf = 2.0 * (area[0]+area[1]+area[2]); + + for (int m = 0; m < npossible; m++) { + surf = area[0]/factors[m][0]/factors[m][1] + + area[1]/factors[m][0]/factors[m][2] + + area[2]/factors[m][1]/factors[m][2]; + if (surf < bestsurf) { + best[0] = factors[m][0]; + best[1] = factors[m][1]; + best[2] = factors[m][2]; + index = m; + } + } + + return index; +} + +/* ---------------------------------------------------------------------- + minus,plus = indices of neighboring processors in a dimension +------------------------------------------------------------------------- */ + +void ProcMap::grid_shift(int myloc, int nprocs, int &minus, int &plus) +{ + minus = myloc - 1; + if (minus < 0) minus = nprocs - 1; + plus = myloc + 1; + if (plus == nprocs) plus = 0; +} + diff --git a/src/procmap.h b/src/procmap.h index 80d9b5a0e9..2704a4e339 100644 --- a/src/procmap.h +++ b/src/procmap.h @@ -22,9 +22,9 @@ class ProcMap : protected Pointers { public: ProcMap(class LAMMPS *); ~ProcMap() {} - int onelevel_grid(int, int *, int *, int, int, int *); - int twolevel_grid(int, int *, int *, int, int *, int *, int, int, int *); - int numa_grid(int, int *, int *, int *); + void onelevel_grid(int, int *, int *, int, int, int *); + void twolevel_grid(int, int *, int *, int, int *, int *, int, int, int *); + void numa_grid(int, int *, int *, int *); void custom_grid(char *, int, int *, int *); void cart_map(int, int *, int *, int [3][2], int ***); void cart_map(int, int *, int *, int *, int [3][2], int ***); @@ -35,15 +35,19 @@ class ProcMap : protected Pointers { void output(char *, int *, int ***); private: - int other_style; - int other_procgrid[3]; - int nodegrid[3]; + int nodegrid[3]; // NUMA params int node_id; int procs_per_node; int procs_per_numa; - int **cmap; - int procs2box(int, int *, int *, const int, const int, const int, int); + int **cmap; // info in custom grid file + + int factor(int, int **); + int combine_factors(int, int **, int, int **, int **); + int cull_2d(int, int **, int); + int cull_user(int, int **, int, int *); + int cull_other(int, int **, int, int, int *); + int best_factors(int, int **, int *, int, int, int); void grid_shift(int, int, int &, int &); };