Adding changes to 2 FFT PPPM so that it runs faster in NPT mode. This is due to computing self-force pre-coeffs once that used to be computed after volume changes. These pre-coeffs are not dependent on box size/shape.

git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@8762 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
pscrozi 2012-09-10 23:10:31 +00:00
parent c58dc68076
commit 509a0bb16f
2 changed files with 88 additions and 76 deletions

View File

@ -84,6 +84,8 @@ PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
fkx = fky = fkz = NULL;
buf1 = buf2 = buf3 = buf4 = NULL;
sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL;
density_A_brick = density_B_brick = NULL;
density_A_fft = density_B_fft = NULL;
@ -369,6 +371,7 @@ void PPPM::init()
// pre-compute 1d charge distribution coefficients
compute_gf_denom();
if (differentiation_flag == 1) compute_sf_precoeff();
compute_rho_coeff();
}
@ -455,7 +458,6 @@ void PPPM::setup()
if (differentiation_flag == 1) {
compute_gf_ad();
compute_sf_coeff();
} else {
compute_gf_ik();
}
@ -612,6 +614,14 @@ void PPPM::allocate()
if (differentiation_flag == 1) {
memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:u_brick");
memory->create(sf_precoeff1,nfft_both,"pppm:sf_precoeff1");
memory->create(sf_precoeff2,nfft_both,"pppm:sf_precoeff2");
memory->create(sf_precoeff3,nfft_both,"pppm:sf_precoeff3");
memory->create(sf_precoeff4,nfft_both,"pppm:sf_precoeff4");
memory->create(sf_precoeff5,nfft_both,"pppm:sf_precoeff5");
memory->create(sf_precoeff6,nfft_both,"pppm:sf_precoeff6");
} else {
memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:vdx_brick");
@ -689,6 +699,12 @@ void PPPM::deallocate()
memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
if (differentiation_flag == 1) {
memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy(sf_precoeff1);
memory->destroy(sf_precoeff2);
memory->destroy(sf_precoeff3);
memory->destroy(sf_precoeff4);
memory->destroy(sf_precoeff5);
memory->destroy(sf_precoeff6);
} else {
memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
@ -1478,6 +1494,8 @@ void PPPM::compute_gf_ad()
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double numerator,denominator;
for (i == 0; i <= 5; i++) sf_coeff[i] = 0.0;
n = 0;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
@ -1517,40 +1535,55 @@ void PPPM::compute_gf_ad()
if (sqk != 0.0) {
numerator = 4.0*MY_PI/sqk;
denominator = gf_denom(snx2,sny2,snz2);
greensfn[n++] = numerator*sx*sy*sz*wx*wy*wz/denominator;
} else greensfn[n++] = 0.0;
greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
sf_coeff[5] += sf_precoeff6[n]*greensfn[n++];
} else {
greensfn[n] = 0.0;
sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
sf_coeff[5] += sf_precoeff6[n]*greensfn[n++];
}
}
}
}
// Compute the coefficients for the self-force correction
double prex, prey, prez;
prex = prey = prez = MY_PI/volume;
prex *= nx_pppm/xprd;
prey *= ny_pppm/yprd;
prez *= nz_pppm/zprd_slab;
sf_coeff[0] *= prex;
sf_coeff[1] *= prex*2;
sf_coeff[2] *= prey;
sf_coeff[3] *= prey*2;
sf_coeff[4] *= prez;
sf_coeff[5] *= prez*2;
// communicate values with other procs
double tmp[6];
MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
}
/* ----------------------------------------------------------------------
compute self force coefficients for ad-differentiation scheme
------------------------------------------------------------------------- */
void PPPM::compute_sf_coeff()
void PPPM::compute_sf_precoeff()
{
int i,j,k,l,m,n;
double *prd;
// volume-dependent factors
// adjust z dimension for 2d slab PPPM
// z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
volume = xprd * yprd * zprd_slab;
double unitkx = (2.0*MY_PI/xprd);
double unitky = (2.0*MY_PI/yprd);
double unitkz = (2.0*MY_PI/zprd_slab);
int i,k,l,m,n,nb;
int nx,ny,nz,kper,lper,mper;
double argx,argy,argz;
double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5];
@ -1558,11 +1591,7 @@ void PPPM::compute_sf_coeff()
double u0,u1,u2,u3,u4,u5,u6;
double sum1,sum2,sum3,sum4,sum5,sum6;
int nb = 2;
double form = 1.0;
for (n = 0; n < 6; n++) sf_coeff[n] = 0.0;
nb = 2;
n = 0;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
@ -1576,43 +1605,43 @@ void PPPM::compute_sf_coeff()
sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
for (i = -nb; i <= nb; i++) {
qx0 = unitkx*(kper+nx_pppm*i);
qx1 = unitkx*(kper+nx_pppm*(i+1));
qx2 = unitkx*(kper+nx_pppm*(i+2));
qx0 = 2.0*MY_PI*(kper+nx_pppm*i);
qx1 = 2.0*MY_PI*(kper+nx_pppm*(i+1));
qx2 = 2.0*MY_PI*(kper+nx_pppm*(i+2));
wx0[i+2] = 1.0;
wx1[i+2] = 1.0;
wx2[i+2] = 1.0;
argx = 0.5*qx0*xprd/nx_pppm;
argx = 0.5*qx0/nx_pppm;
if (argx != 0.0) wx0[i+2] = pow(sin(argx)/argx,order);
argx = 0.5*qx1*xprd/nx_pppm;
argx = 0.5*qx1/nx_pppm;
if (argx != 0.0) wx1[i+2] = pow(sin(argx)/argx,order);
argx = 0.5*qx2*xprd/nx_pppm;
argx = 0.5*qx2/nx_pppm;
if (argx != 0.0) wx2[i+2] = pow(sin(argx)/argx,order);
qy0 = unitky*(lper+ny_pppm*i);
qy1 = unitky*(lper+ny_pppm*(i+1));
qy2 = unitky*(lper+ny_pppm*(i+2));
qy0 = 2.0*MY_PI*(lper+ny_pppm*i);
qy1 = 2.0*MY_PI*(lper+ny_pppm*(i+1));
qy2 = 2.0*MY_PI*(lper+ny_pppm*(i+2));
wy0[i+2] = 1.0;
wy1[i+2] = 1.0;
wy2[i+2] = 1.0;
argy = 0.5*qy0*yprd/ny_pppm;
argy = 0.5*qy0/ny_pppm;
if (argy != 0.0) wy0[i+2] = pow(sin(argy)/argy,order);
argy = 0.5*qy1*yprd/ny_pppm;
argy = 0.5*qy1/ny_pppm;
if (argy != 0.0) wy1[i+2] = pow(sin(argy)/argy,order);
argy = 0.5*qy2*yprd/ny_pppm;
argy = 0.5*qy2/ny_pppm;
if (argy != 0.0) wy2[i+2] = pow(sin(argy)/argy,order);
qz0 = unitkz*(mper+nz_pppm*i);
qz1 = unitkz*(mper+nz_pppm*(i+1));
qz2 = unitkz*(mper+nz_pppm*(i+2));
qz0 = 2.0*MY_PI*(mper+nz_pppm*i);
qz1 = 2.0*MY_PI*(mper+nz_pppm*(i+1));
qz2 = 2.0*MY_PI*(mper+nz_pppm*(i+2));
wz0[i+2] = 1.0;
wz1[i+2] = 1.0;
wz2[i+2] = 1.0;
argz = 0.5*qz0*zprd_slab/nz_pppm;
argz = 0.5*qz0/nz_pppm;
if (argz != 0.0) wz0[i+2] = pow(sin(argz)/argz,order);
argz = 0.5*qz1*zprd_slab/nz_pppm;
argz = 0.5*qz1/nz_pppm;
if (argz != 0.0) wz1[i+2] = pow(sin(argz)/argz,order);
argz = 0.5*qz2*zprd_slab/nz_pppm;
argz = 0.5*qz2/nz_pppm;
if (argz != 0.0) wz2[i+2] = pow(sin(argz)/argz,order);
}
@ -1637,39 +1666,21 @@ void PPPM::compute_sf_coeff()
}
}
// multiplication with Green's function
// store values
sf_coeff[0] += sum1 * greensfn[n];
sf_coeff[1] += sum2 * greensfn[n];
sf_coeff[2] += sum3 * greensfn[n];
sf_coeff[3] += sum4 * greensfn[n];
sf_coeff[4] += sum5 * greensfn[n];
sf_coeff[5] += sum6 * greensfn[n++];
sf_precoeff1[n] = sum1;
sf_precoeff2[n] = sum2;
sf_precoeff3[n] = sum3;
sf_precoeff4[n] = sum4;
sf_precoeff5[n] = sum5;
sf_precoeff6[n++] = sum6;
}
}
}
// perform multiplication with prefactors
double prex, prey, prez;
prex = prey = prez = MY_PI/volume;
prex *= nx_pppm/xprd;
prey *= ny_pppm/yprd;
prez *= nz_pppm/zprd_slab;
sf_coeff[0] *= prex;
sf_coeff[1] *= prex*2;
sf_coeff[2] *= prey;
sf_coeff[3] *= prey*2;
sf_coeff[4] *= prez;
sf_coeff[5] *= prez*2;
// communicate values with other procs
double tmp[6];
MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
}
/* ----------------------------------------------------------------------
ghost-swap to accumulate full density in brick decomposition
remap density from 3d brick decomposition to FFT decomposition

View File

@ -81,6 +81,7 @@ class PPPM : public KSpace {
double *gf_b;
FFT_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff;
double *sf_precoeff1, *sf_precoeff2, *sf_precoeff3, *sf_precoeff4, *sf_precoeff5, *sf_precoeff6;
double sf_coeff[6]; // coefficients for calculating ad self-forces
double **acons;
@ -121,7 +122,7 @@ class PPPM : public KSpace {
void compute_gf_denom();
void compute_gf_ik();
void compute_gf_ad();
void compute_sf_coeff();
void compute_sf_precoeff();
virtual void particle_map();
virtual void make_rho();