Optimizations to kspace_style MSM, including improving the single-core performance and increasing the parallel scalability. A bug in MSM for mixed periodic and non-periodic boundary conditions was also fixed.

git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@9597 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
pscrozi 2013-03-06 21:12:28 +00:00
parent a3d29c1665
commit ed72c6c5ae
7 changed files with 1211 additions and 267 deletions

View File

@ -30,8 +30,8 @@ CommGrid::CommGrid(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse,
int pxlo, int pxhi, int pylo, int pyhi, int pzlo, int pzhi)
: Pointers(lmp)
{
me = comm->me;
gridcomm = gcomm;
MPI_Comm_rank(gridcomm,&me);
nforward = forward;
nreverse = reverse;
@ -50,6 +50,61 @@ CommGrid::CommGrid(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse,
outzlo = ozlo;
outzhi = ozhi;
outxlo_max = oxlo;
outxhi_max = oxhi;
outylo_max = oylo;
outyhi_max = oyhi;
outzlo_max = ozlo;
outzhi_max = ozhi;
procxlo = pxlo;
procxhi = pxhi;
procylo = pylo;
procyhi = pyhi;
proczlo = pzlo;
proczhi = pzhi;
nswap = 0;
swap = NULL;
buf1 = buf2 = NULL;
}
/* ---------------------------------------------------------------------- */
CommGrid::CommGrid(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse,
int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi,
int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi,
int oxlo_max, int oxhi_max, int oylo_max, int oyhi_max, int ozlo_max, int ozhi_max,
int pxlo, int pxhi, int pylo, int pyhi, int pzlo, int pzhi)
: Pointers(lmp)
{
gridcomm = gcomm;
MPI_Comm_rank(gridcomm,&me);
nforward = forward;
nreverse = reverse;
inxlo = ixlo;
inxhi = ixhi;
inylo = iylo;
inyhi = iyhi;
inzlo = izlo;
inzhi = izhi;
outxlo = oxlo;
outxhi = oxhi;
outylo = oylo;
outyhi = oyhi;
outzlo = ozlo;
outzhi = ozhi;
outxlo_max = oxlo_max;
outxhi_max = oxhi_max;
outylo_max = oylo_max;
outyhi_max = oyhi_max;
outzlo_max = ozlo_max;
outzhi_max = ozhi_max;
procxlo = pxlo;
procxhi = pxhi;
procylo = pylo;
@ -482,7 +537,8 @@ void CommGrid::reverse_comm(KSpace *kspace, int which)
/* ----------------------------------------------------------------------
create 1d list of offsets into 3d array section (xlo:xhi,ylo:yhi,zlo:zhi)
assume 3d array is allocated as (outxlo:outxhi,outylo:outyhi,outzlo:outzhi)
assume 3d array is allocated as (outxlo_max:outxhi_max,outylo_max:outyhi_max,
outzlo_max:outzhi_max)
------------------------------------------------------------------------- */
int CommGrid::indices(int *&list,
@ -491,15 +547,15 @@ int CommGrid::indices(int *&list,
int nmax = (xhi-xlo+1) * (yhi-ylo+1) * (zhi-zlo+1);
memory->create(list,nmax,"Commgrid:list");
int nx = (outxhi-outxlo+1);
int ny = (outyhi-outylo+1);
int nx = (outxhi_max-outxlo_max+1);
int ny = (outyhi_max-outylo_max+1);
int n = 0;
int ix,iy,iz;
for (iz = zlo; iz <= zhi; iz++)
for (iy = ylo; iy <= yhi; iy++)
for (ix = xlo; ix <= xhi; ix++)
list[n++] = (iz-outzlo)*ny*nx + (iy-outylo)*nx + (ix-outxlo);
list[n++] = (iz-outzlo_max)*ny*nx + (iy-outylo_max)*nx + (ix-outxlo_max);
return nmax;
}

View File

@ -32,6 +32,11 @@ class CommGrid : protected Pointers {
int, int, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int);
CommGrid(class LAMMPS *, MPI_Comm, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int);
~CommGrid();
void ghost_notify();
int ghost_overlap();
@ -55,6 +60,7 @@ class CommGrid : protected Pointers {
int inxlo,inxhi,inylo,inyhi,inzlo,inzhi;
int outxlo,outxhi,outylo,outyhi,outzlo,outzhi;
int outxlo_max,outxhi_max,outylo_max,outyhi_max,outzlo_max,outzhi_max;
int procxlo,procxhi,procylo,procyhi,proczlo,proczhi;
int ghostxlo,ghostxhi,ghostylo,ghostyhi,ghostzlo,ghostzhi;

File diff suppressed because it is too large Load Diff

View File

@ -51,8 +51,10 @@ class MSM : public KSpace {
int *nxhi_in,*nyhi_in,*nzhi_in;
int *nxlo_out,*nylo_out,*nzlo_out;
int *nxhi_out,*nyhi_out,*nzhi_out;
int *ngrid;
int *ngrid,*active_flag;
int *alpha,*betax,*betay,*betaz;
int nxlo_out_all,nylo_out_all,nzlo_out_all;
int nxhi_out_all,nyhi_out_all,nzhi_out_all;
int nxlo_direct,nxhi_direct,nylo_direct;
int nyhi_direct,nzlo_direct,nzhi_direct;
int nmax_direct;
@ -108,14 +110,19 @@ class MSM : public KSpace {
void particle_map();
void make_rho();
virtual void direct(int);
void direct_peratom(int);
void direct_top(int);
void direct_peratom_top(int);
void restriction(int);
void prolongation(int);
void grid_swap_forward(int,double*** &);
void grid_swap_reverse(int,double*** &);
void fieldforce();
void fieldforce_peratom();
void compute_phis(const double &, const double &, const double &);
void compute_phis_and_dphis(const double &, const double &, const double &);
double compute_phi(const double &);
double compute_dphi(const double &);
inline double compute_phi(const double &);
inline double compute_dphi(const double &);
void get_g_direct();
void get_virial_direct();
void get_g_direct_top(int);
@ -153,9 +160,9 @@ E: Kspace style requires atom attribute q
The atom style defined does not have these attributes.
E: Cannot use slab correction with MSM
E: Slab correction not needed for MSM
Slab correction can only be used with Ewald and PPPM, not MSM.
Slab correction can only be used with Ewald and PPPM and is not needed by MSM.
E: MSM order must be 4, 6, 8, or 10

View File

@ -78,15 +78,19 @@ void MSMCG::compute(int eflag, int vflag)
int i,j,n;
// set energy/virial flags
// invoke allocate_peratom() if needed for first time
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = evflag_atom = eflag_global = vflag_global =
eflag_atom = vflag_atom = eflag_either = vflag_either = 0;
// invoke allocate_peratom() if needed for first time
if (vflag_atom && !peratom_allocate_flag) {
allocate_peratom();
for (n=0; n<levels; n++) {
cg_peratom_all->ghost_notify();
cg_peratom_all->setup();
for (int n=0; n<levels; n++) {
if (!active_flag[n]) continue;
cg_peratom[n]->ghost_notify();
cg_peratom[n]->setup();
}
@ -161,28 +165,52 @@ void MSMCG::compute(int eflag, int vflag)
particle_map();
make_rho();
// all procs reverse communicate charge density values from their ghost grid points
// to fully sum contribution in their 3d grid
current_level = 0;
cg[0]->reverse_comm(this,REVERSE_RHO);
cg_all->reverse_comm(this,REVERSE_RHO);
// all procs communicate density values from their ghost cells
// to fully sum contribution in their 3d bricks
// forward communicate charge density values to fill ghost grid points
// compute direct sum interaction and then restrict to coarser grid
for (n=0; n<=levels-2; n++) {
for (int n=0; n<=levels-2; n++) {
if (!active_flag[n]) continue;
current_level = n;
cg[n]->forward_comm(this,FORWARD_RHO);
direct(n);
restriction(n);
}
// top grid level
current_level = levels-1;
cg[levels-1]->forward_comm(this,FORWARD_RHO);
direct_top(levels-1);
for (n=levels-2; n>=0; n--) {
// compute direct interation for top grid level for nonperiodic
// and for second from top grid level for periodic
if (active_flag[levels-1]) {
if (domain->nonperiodic) {
current_level = levels-1;
cg[levels-1]->forward_comm(this,FORWARD_RHO);
direct_top(levels-1);
cg[levels-1]->reverse_comm(this,REVERSE_AD);
if (vflag_atom)
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
} else {
// Here using MPI_Allreduce is cheaper than using commgrid
grid_swap_forward(levels-1,qgrid[levels-1]);
direct(levels-1);
grid_swap_reverse(levels-1,egrid[levels-1]);
current_level = levels-1;
if (vflag_atom)
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
}
}
// prolongate energy/virial from coarser grid to finer grid
// reverse communicate from ghost grid points to get full sum
for (int n=levels-2; n>=0; n--) {
if (!active_flag[n]) continue;
prolongation(n);
current_level = n;
@ -198,26 +226,25 @@ void MSMCG::compute(int eflag, int vflag)
// to fill ghost cells surrounding their 3d bricks
current_level = 0;
cg[0]->forward_comm(this,FORWARD_AD);
cg_all->forward_comm(this,FORWARD_AD);
// extra per-atom energy/virial communication
if (vflag_atom)
cg_peratom[0]->forward_comm(this,FORWARD_AD_PERATOM);
cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);
// calculate the force on my particles (interpolation)
fieldforce();
// calculate the per-atom energy for my particles
// calculate the per-atom energy/virial for my particles
if (evflag_atom) fieldforce_peratom();
// total long-range energy
const double qscale = force->qqrd2e * scale;
// Total long-range energy
if (eflag_global) {
double energy_all;
MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
@ -228,7 +255,7 @@ void MSMCG::compute(int eflag, int vflag)
energy *= 0.5*qscale;
}
// Total long-range virial
// total long-range virial
if (vflag_global) {
double virial_all[6];
@ -338,7 +365,7 @@ void MSMCG::make_rho()
dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
compute_phis_and_dphis(dx,dy,dz);
compute_phis(dx,dy,dz);
z0 = q[i];
for (n = nlower; n <= nupper; n++) {

View File

@ -43,6 +43,9 @@ extern "C" {
#define MPI_MINLOC 5
#define MPI_LOR 6
#define MPI_UNDEFINED -1
#define MPI_COMM_NULL -1
#define MPI_ANY_SOURCE -1
#define MPI_Comm int

View File

@ -79,15 +79,19 @@ void MSMCGOMP::compute(int eflag, int vflag)
int i,j,n;
// set energy/virial flags
// invoke allocate_peratom() if needed for first time
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = evflag_atom = eflag_global = vflag_global =
eflag_atom = vflag_atom = eflag_either = vflag_either = 0;
// invoke allocate_peratom() if needed for first time
if (vflag_atom && !peratom_allocate_flag) {
allocate_peratom();
for (n=0; n<levels; n++) {
cg_peratom_all->ghost_notify();
cg_peratom_all->setup();
for (int n=0; n<levels; n++) {
if (!active_flag[n]) continue;
cg_peratom[n]->ghost_notify();
cg_peratom[n]->setup();
}
@ -162,13 +166,17 @@ void MSMCGOMP::compute(int eflag, int vflag)
particle_map();
make_rho();
// all procs reverse communicate charge density values from their ghost grid points
// to fully sum contribution in their 3d grid
current_level = 0;
cg[0]->reverse_comm(this,REVERSE_RHO);
cg_all->reverse_comm(this,REVERSE_RHO);
// all procs communicate density values from their ghost cells
// to fully sum contribution in their 3d bricks
// forward communicate charge density values to fill ghost grid points
// compute direct sum interaction and then restrict to coarser grid
for (n=0; n<=levels-2; n++) {
for (int n=0; n<=levels-2; n++) {
if (!active_flag[n]) continue;
current_level = n;
cg[n]->forward_comm(this,FORWARD_RHO);
@ -176,14 +184,34 @@ void MSMCGOMP::compute(int eflag, int vflag)
restriction(n);
}
// top grid level
current_level = levels-1;
cg[levels-1]->forward_comm(this,FORWARD_RHO);
direct_top(levels-1);
// compute direct interation for top grid level for nonperiodic
// and for second from top grid level for periodic
for (n=levels-2; n>=0; n--) {
if (active_flag[levels-1]) {
if (domain->nonperiodic) {
current_level = levels-1;
cg[levels-1]->forward_comm(this,FORWARD_RHO);
direct_top(levels-1);
cg[levels-1]->reverse_comm(this,REVERSE_AD);
if (vflag_atom)
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
} else {
// Here using MPI_Allreduce is cheaper than using commgrid
grid_swap_forward(levels-1,qgrid[levels-1]);
direct(levels-1);
grid_swap_reverse(levels-1,egrid[levels-1]);
current_level = levels-1;
if (vflag_atom)
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
}
}
// prolongate energy/virial from coarser grid to finer grid
// reverse communicate from ghost grid points to get full sum
for (int n=levels-2; n>=0; n--) {
if (!active_flag[n]) continue;
prolongation(n);
current_level = n;
@ -199,25 +227,24 @@ void MSMCGOMP::compute(int eflag, int vflag)
// to fill ghost cells surrounding their 3d bricks
current_level = 0;
cg[0]->forward_comm(this,FORWARD_AD);
cg_all->forward_comm(this,FORWARD_AD);
// extra per-atom energy/virial communication
if (vflag_atom)
cg_peratom[0]->forward_comm(this,FORWARD_AD_PERATOM);
cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);
// calculate the force on my particles (interpolation)
fieldforce();
// calculate the per-atom energy for my particles
// calculate the per-atom energy/virial for my particles
if (evflag_atom) fieldforce_peratom();
const double qscale = force->qqrd2e * scale;
// total long-range energy
// Total long-range energy
const double qscale = force->qqrd2e * scale;
if (eflag_global) {
double energy_all;
@ -229,7 +256,7 @@ void MSMCGOMP::compute(int eflag, int vflag)
energy *= 0.5*qscale;
}
// Total long-range virial
// total long-range virial
if (vflag_global) {
double virial_all[6];
@ -352,7 +379,7 @@ void MSMCGOMP::make_rho()
dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
compute_phis_and_dphis(dx,dy,dz);
compute_phis(dx,dy,dz);
z0 = q[i];
for (n = nlower; n <= nupper; n++) {