forked from lijiext/lammps
Optimizations to kspace_style MSM, including improving the single-core performance and increasing the parallel scalability. A bug in MSM for mixed periodic and non-periodic boundary conditions was also fixed.
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@9597 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
a3d29c1665
commit
ed72c6c5ae
|
@ -30,8 +30,8 @@ CommGrid::CommGrid(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse,
|
|||
int pxlo, int pxhi, int pylo, int pyhi, int pzlo, int pzhi)
|
||||
: Pointers(lmp)
|
||||
{
|
||||
me = comm->me;
|
||||
gridcomm = gcomm;
|
||||
MPI_Comm_rank(gridcomm,&me);
|
||||
|
||||
nforward = forward;
|
||||
nreverse = reverse;
|
||||
|
@ -50,6 +50,61 @@ CommGrid::CommGrid(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse,
|
|||
outzlo = ozlo;
|
||||
outzhi = ozhi;
|
||||
|
||||
outxlo_max = oxlo;
|
||||
outxhi_max = oxhi;
|
||||
outylo_max = oylo;
|
||||
outyhi_max = oyhi;
|
||||
outzlo_max = ozlo;
|
||||
outzhi_max = ozhi;
|
||||
|
||||
procxlo = pxlo;
|
||||
procxhi = pxhi;
|
||||
procylo = pylo;
|
||||
procyhi = pyhi;
|
||||
proczlo = pzlo;
|
||||
proczhi = pzhi;
|
||||
|
||||
nswap = 0;
|
||||
swap = NULL;
|
||||
buf1 = buf2 = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
CommGrid::CommGrid(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse,
|
||||
int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi,
|
||||
int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi,
|
||||
int oxlo_max, int oxhi_max, int oylo_max, int oyhi_max, int ozlo_max, int ozhi_max,
|
||||
int pxlo, int pxhi, int pylo, int pyhi, int pzlo, int pzhi)
|
||||
: Pointers(lmp)
|
||||
{
|
||||
gridcomm = gcomm;
|
||||
MPI_Comm_rank(gridcomm,&me);
|
||||
|
||||
nforward = forward;
|
||||
nreverse = reverse;
|
||||
|
||||
inxlo = ixlo;
|
||||
inxhi = ixhi;
|
||||
inylo = iylo;
|
||||
inyhi = iyhi;
|
||||
inzlo = izlo;
|
||||
inzhi = izhi;
|
||||
|
||||
outxlo = oxlo;
|
||||
outxhi = oxhi;
|
||||
outylo = oylo;
|
||||
outyhi = oyhi;
|
||||
outzlo = ozlo;
|
||||
outzhi = ozhi;
|
||||
|
||||
outxlo_max = oxlo_max;
|
||||
outxhi_max = oxhi_max;
|
||||
outylo_max = oylo_max;
|
||||
outyhi_max = oyhi_max;
|
||||
outzlo_max = ozlo_max;
|
||||
outzhi_max = ozhi_max;
|
||||
|
||||
procxlo = pxlo;
|
||||
procxhi = pxhi;
|
||||
procylo = pylo;
|
||||
|
@ -482,7 +537,8 @@ void CommGrid::reverse_comm(KSpace *kspace, int which)
|
|||
|
||||
/* ----------------------------------------------------------------------
|
||||
create 1d list of offsets into 3d array section (xlo:xhi,ylo:yhi,zlo:zhi)
|
||||
assume 3d array is allocated as (outxlo:outxhi,outylo:outyhi,outzlo:outzhi)
|
||||
assume 3d array is allocated as (outxlo_max:outxhi_max,outylo_max:outyhi_max,
|
||||
outzlo_max:outzhi_max)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int CommGrid::indices(int *&list,
|
||||
|
@ -491,15 +547,15 @@ int CommGrid::indices(int *&list,
|
|||
int nmax = (xhi-xlo+1) * (yhi-ylo+1) * (zhi-zlo+1);
|
||||
memory->create(list,nmax,"Commgrid:list");
|
||||
|
||||
int nx = (outxhi-outxlo+1);
|
||||
int ny = (outyhi-outylo+1);
|
||||
int nx = (outxhi_max-outxlo_max+1);
|
||||
int ny = (outyhi_max-outylo_max+1);
|
||||
|
||||
int n = 0;
|
||||
int ix,iy,iz;
|
||||
for (iz = zlo; iz <= zhi; iz++)
|
||||
for (iy = ylo; iy <= yhi; iy++)
|
||||
for (ix = xlo; ix <= xhi; ix++)
|
||||
list[n++] = (iz-outzlo)*ny*nx + (iy-outylo)*nx + (ix-outxlo);
|
||||
list[n++] = (iz-outzlo_max)*ny*nx + (iy-outylo_max)*nx + (ix-outxlo_max);
|
||||
|
||||
return nmax;
|
||||
}
|
||||
|
|
|
@ -32,6 +32,11 @@ class CommGrid : protected Pointers {
|
|||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int);
|
||||
CommGrid(class LAMMPS *, MPI_Comm, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int);
|
||||
~CommGrid();
|
||||
void ghost_notify();
|
||||
int ghost_overlap();
|
||||
|
@ -55,6 +60,7 @@ class CommGrid : protected Pointers {
|
|||
|
||||
int inxlo,inxhi,inylo,inyhi,inzlo,inzhi;
|
||||
int outxlo,outxhi,outylo,outyhi,outzlo,outzhi;
|
||||
int outxlo_max,outxhi_max,outylo_max,outyhi_max,outzlo_max,outzhi_max;
|
||||
int procxlo,procxhi,procylo,procyhi,proczlo,proczhi;
|
||||
int ghostxlo,ghostxhi,ghostylo,ghostyhi,ghostzlo,ghostzhi;
|
||||
|
||||
|
|
1254
src/KSPACE/msm.cpp
1254
src/KSPACE/msm.cpp
File diff suppressed because it is too large
Load Diff
|
@ -51,8 +51,10 @@ class MSM : public KSpace {
|
|||
int *nxhi_in,*nyhi_in,*nzhi_in;
|
||||
int *nxlo_out,*nylo_out,*nzlo_out;
|
||||
int *nxhi_out,*nyhi_out,*nzhi_out;
|
||||
int *ngrid;
|
||||
int *ngrid,*active_flag;
|
||||
int *alpha,*betax,*betay,*betaz;
|
||||
int nxlo_out_all,nylo_out_all,nzlo_out_all;
|
||||
int nxhi_out_all,nyhi_out_all,nzhi_out_all;
|
||||
int nxlo_direct,nxhi_direct,nylo_direct;
|
||||
int nyhi_direct,nzlo_direct,nzhi_direct;
|
||||
int nmax_direct;
|
||||
|
@ -108,14 +110,19 @@ class MSM : public KSpace {
|
|||
void particle_map();
|
||||
void make_rho();
|
||||
virtual void direct(int);
|
||||
void direct_peratom(int);
|
||||
void direct_top(int);
|
||||
void direct_peratom_top(int);
|
||||
void restriction(int);
|
||||
void prolongation(int);
|
||||
void grid_swap_forward(int,double*** &);
|
||||
void grid_swap_reverse(int,double*** &);
|
||||
void fieldforce();
|
||||
void fieldforce_peratom();
|
||||
void compute_phis(const double &, const double &, const double &);
|
||||
void compute_phis_and_dphis(const double &, const double &, const double &);
|
||||
double compute_phi(const double &);
|
||||
double compute_dphi(const double &);
|
||||
inline double compute_phi(const double &);
|
||||
inline double compute_dphi(const double &);
|
||||
void get_g_direct();
|
||||
void get_virial_direct();
|
||||
void get_g_direct_top(int);
|
||||
|
@ -153,9 +160,9 @@ E: Kspace style requires atom attribute q
|
|||
|
||||
The atom style defined does not have these attributes.
|
||||
|
||||
E: Cannot use slab correction with MSM
|
||||
E: Slab correction not needed for MSM
|
||||
|
||||
Slab correction can only be used with Ewald and PPPM, not MSM.
|
||||
Slab correction can only be used with Ewald and PPPM and is not needed by MSM.
|
||||
|
||||
E: MSM order must be 4, 6, 8, or 10
|
||||
|
||||
|
|
|
@ -78,15 +78,19 @@ void MSMCG::compute(int eflag, int vflag)
|
|||
int i,j,n;
|
||||
|
||||
// set energy/virial flags
|
||||
// invoke allocate_peratom() if needed for first time
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = evflag_atom = eflag_global = vflag_global =
|
||||
eflag_atom = vflag_atom = eflag_either = vflag_either = 0;
|
||||
|
||||
// invoke allocate_peratom() if needed for first time
|
||||
|
||||
if (vflag_atom && !peratom_allocate_flag) {
|
||||
allocate_peratom();
|
||||
for (n=0; n<levels; n++) {
|
||||
cg_peratom_all->ghost_notify();
|
||||
cg_peratom_all->setup();
|
||||
for (int n=0; n<levels; n++) {
|
||||
if (!active_flag[n]) continue;
|
||||
cg_peratom[n]->ghost_notify();
|
||||
cg_peratom[n]->setup();
|
||||
}
|
||||
|
@ -161,28 +165,52 @@ void MSMCG::compute(int eflag, int vflag)
|
|||
particle_map();
|
||||
make_rho();
|
||||
|
||||
// all procs reverse communicate charge density values from their ghost grid points
|
||||
// to fully sum contribution in their 3d grid
|
||||
|
||||
current_level = 0;
|
||||
cg[0]->reverse_comm(this,REVERSE_RHO);
|
||||
cg_all->reverse_comm(this,REVERSE_RHO);
|
||||
|
||||
// all procs communicate density values from their ghost cells
|
||||
// to fully sum contribution in their 3d bricks
|
||||
// forward communicate charge density values to fill ghost grid points
|
||||
// compute direct sum interaction and then restrict to coarser grid
|
||||
|
||||
for (n=0; n<=levels-2; n++) {
|
||||
for (int n=0; n<=levels-2; n++) {
|
||||
if (!active_flag[n]) continue;
|
||||
current_level = n;
|
||||
cg[n]->forward_comm(this,FORWARD_RHO);
|
||||
|
||||
direct(n);
|
||||
restriction(n);
|
||||
}
|
||||
|
||||
// top grid level
|
||||
|
||||
current_level = levels-1;
|
||||
cg[levels-1]->forward_comm(this,FORWARD_RHO);
|
||||
direct_top(levels-1);
|
||||
|
||||
for (n=levels-2; n>=0; n--) {
|
||||
// compute direct interation for top grid level for nonperiodic
|
||||
// and for second from top grid level for periodic
|
||||
|
||||
if (active_flag[levels-1]) {
|
||||
if (domain->nonperiodic) {
|
||||
current_level = levels-1;
|
||||
cg[levels-1]->forward_comm(this,FORWARD_RHO);
|
||||
direct_top(levels-1);
|
||||
cg[levels-1]->reverse_comm(this,REVERSE_AD);
|
||||
if (vflag_atom)
|
||||
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
|
||||
} else {
|
||||
// Here using MPI_Allreduce is cheaper than using commgrid
|
||||
grid_swap_forward(levels-1,qgrid[levels-1]);
|
||||
direct(levels-1);
|
||||
grid_swap_reverse(levels-1,egrid[levels-1]);
|
||||
current_level = levels-1;
|
||||
if (vflag_atom)
|
||||
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
|
||||
}
|
||||
}
|
||||
|
||||
// prolongate energy/virial from coarser grid to finer grid
|
||||
// reverse communicate from ghost grid points to get full sum
|
||||
|
||||
for (int n=levels-2; n>=0; n--) {
|
||||
if (!active_flag[n]) continue;
|
||||
prolongation(n);
|
||||
|
||||
current_level = n;
|
||||
|
@ -198,26 +226,25 @@ void MSMCG::compute(int eflag, int vflag)
|
|||
// to fill ghost cells surrounding their 3d bricks
|
||||
|
||||
current_level = 0;
|
||||
|
||||
cg[0]->forward_comm(this,FORWARD_AD);
|
||||
cg_all->forward_comm(this,FORWARD_AD);
|
||||
|
||||
// extra per-atom energy/virial communication
|
||||
|
||||
if (vflag_atom)
|
||||
cg_peratom[0]->forward_comm(this,FORWARD_AD_PERATOM);
|
||||
cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);
|
||||
|
||||
// calculate the force on my particles (interpolation)
|
||||
|
||||
fieldforce();
|
||||
|
||||
// calculate the per-atom energy for my particles
|
||||
// calculate the per-atom energy/virial for my particles
|
||||
|
||||
if (evflag_atom) fieldforce_peratom();
|
||||
|
||||
// total long-range energy
|
||||
|
||||
const double qscale = force->qqrd2e * scale;
|
||||
|
||||
// Total long-range energy
|
||||
|
||||
if (eflag_global) {
|
||||
double energy_all;
|
||||
MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
|
@ -228,7 +255,7 @@ void MSMCG::compute(int eflag, int vflag)
|
|||
energy *= 0.5*qscale;
|
||||
}
|
||||
|
||||
// Total long-range virial
|
||||
// total long-range virial
|
||||
|
||||
if (vflag_global) {
|
||||
double virial_all[6];
|
||||
|
@ -338,7 +365,7 @@ void MSMCG::make_rho()
|
|||
dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
|
||||
dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
|
||||
|
||||
compute_phis_and_dphis(dx,dy,dz);
|
||||
compute_phis(dx,dy,dz);
|
||||
|
||||
z0 = q[i];
|
||||
for (n = nlower; n <= nupper; n++) {
|
||||
|
|
|
@ -43,6 +43,9 @@ extern "C" {
|
|||
#define MPI_MINLOC 5
|
||||
#define MPI_LOR 6
|
||||
|
||||
#define MPI_UNDEFINED -1
|
||||
#define MPI_COMM_NULL -1
|
||||
|
||||
#define MPI_ANY_SOURCE -1
|
||||
|
||||
#define MPI_Comm int
|
||||
|
|
|
@ -79,15 +79,19 @@ void MSMCGOMP::compute(int eflag, int vflag)
|
|||
int i,j,n;
|
||||
|
||||
// set energy/virial flags
|
||||
// invoke allocate_peratom() if needed for first time
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = evflag_atom = eflag_global = vflag_global =
|
||||
eflag_atom = vflag_atom = eflag_either = vflag_either = 0;
|
||||
|
||||
// invoke allocate_peratom() if needed for first time
|
||||
|
||||
if (vflag_atom && !peratom_allocate_flag) {
|
||||
allocate_peratom();
|
||||
for (n=0; n<levels; n++) {
|
||||
cg_peratom_all->ghost_notify();
|
||||
cg_peratom_all->setup();
|
||||
for (int n=0; n<levels; n++) {
|
||||
if (!active_flag[n]) continue;
|
||||
cg_peratom[n]->ghost_notify();
|
||||
cg_peratom[n]->setup();
|
||||
}
|
||||
|
@ -162,13 +166,17 @@ void MSMCGOMP::compute(int eflag, int vflag)
|
|||
particle_map();
|
||||
make_rho();
|
||||
|
||||
// all procs reverse communicate charge density values from their ghost grid points
|
||||
// to fully sum contribution in their 3d grid
|
||||
|
||||
current_level = 0;
|
||||
cg[0]->reverse_comm(this,REVERSE_RHO);
|
||||
cg_all->reverse_comm(this,REVERSE_RHO);
|
||||
|
||||
// all procs communicate density values from their ghost cells
|
||||
// to fully sum contribution in their 3d bricks
|
||||
// forward communicate charge density values to fill ghost grid points
|
||||
// compute direct sum interaction and then restrict to coarser grid
|
||||
|
||||
for (n=0; n<=levels-2; n++) {
|
||||
for (int n=0; n<=levels-2; n++) {
|
||||
if (!active_flag[n]) continue;
|
||||
current_level = n;
|
||||
cg[n]->forward_comm(this,FORWARD_RHO);
|
||||
|
||||
|
@ -176,14 +184,34 @@ void MSMCGOMP::compute(int eflag, int vflag)
|
|||
restriction(n);
|
||||
}
|
||||
|
||||
// top grid level
|
||||
|
||||
current_level = levels-1;
|
||||
cg[levels-1]->forward_comm(this,FORWARD_RHO);
|
||||
direct_top(levels-1);
|
||||
// compute direct interation for top grid level for nonperiodic
|
||||
// and for second from top grid level for periodic
|
||||
|
||||
for (n=levels-2; n>=0; n--) {
|
||||
if (active_flag[levels-1]) {
|
||||
if (domain->nonperiodic) {
|
||||
current_level = levels-1;
|
||||
cg[levels-1]->forward_comm(this,FORWARD_RHO);
|
||||
direct_top(levels-1);
|
||||
cg[levels-1]->reverse_comm(this,REVERSE_AD);
|
||||
if (vflag_atom)
|
||||
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
|
||||
} else {
|
||||
// Here using MPI_Allreduce is cheaper than using commgrid
|
||||
grid_swap_forward(levels-1,qgrid[levels-1]);
|
||||
direct(levels-1);
|
||||
grid_swap_reverse(levels-1,egrid[levels-1]);
|
||||
current_level = levels-1;
|
||||
if (vflag_atom)
|
||||
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
|
||||
}
|
||||
}
|
||||
|
||||
// prolongate energy/virial from coarser grid to finer grid
|
||||
// reverse communicate from ghost grid points to get full sum
|
||||
|
||||
for (int n=levels-2; n>=0; n--) {
|
||||
if (!active_flag[n]) continue;
|
||||
prolongation(n);
|
||||
|
||||
current_level = n;
|
||||
|
@ -199,25 +227,24 @@ void MSMCGOMP::compute(int eflag, int vflag)
|
|||
// to fill ghost cells surrounding their 3d bricks
|
||||
|
||||
current_level = 0;
|
||||
|
||||
cg[0]->forward_comm(this,FORWARD_AD);
|
||||
cg_all->forward_comm(this,FORWARD_AD);
|
||||
|
||||
// extra per-atom energy/virial communication
|
||||
|
||||
if (vflag_atom)
|
||||
cg_peratom[0]->forward_comm(this,FORWARD_AD_PERATOM);
|
||||
cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);
|
||||
|
||||
// calculate the force on my particles (interpolation)
|
||||
|
||||
fieldforce();
|
||||
|
||||
// calculate the per-atom energy for my particles
|
||||
// calculate the per-atom energy/virial for my particles
|
||||
|
||||
if (evflag_atom) fieldforce_peratom();
|
||||
|
||||
const double qscale = force->qqrd2e * scale;
|
||||
// total long-range energy
|
||||
|
||||
// Total long-range energy
|
||||
const double qscale = force->qqrd2e * scale;
|
||||
|
||||
if (eflag_global) {
|
||||
double energy_all;
|
||||
|
@ -229,7 +256,7 @@ void MSMCGOMP::compute(int eflag, int vflag)
|
|||
energy *= 0.5*qscale;
|
||||
}
|
||||
|
||||
// Total long-range virial
|
||||
// total long-range virial
|
||||
|
||||
if (vflag_global) {
|
||||
double virial_all[6];
|
||||
|
@ -352,7 +379,7 @@ void MSMCGOMP::make_rho()
|
|||
dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
|
||||
dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
|
||||
|
||||
compute_phis_and_dphis(dx,dy,dz);
|
||||
compute_phis(dx,dy,dz);
|
||||
|
||||
z0 = q[i];
|
||||
for (n = nlower; n <= nupper; n++) {
|
||||
|
|
Loading…
Reference in New Issue