From 2bcf10827cc2239544e149a4f2203c2e88e8f109 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Thu, 6 Mar 2014 15:28:27 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11588 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/KSPACE/ewald.cpp | 2984 +++---- src/KSPACE/ewald_disp.cpp | 2950 +++---- src/KSPACE/pppm.cpp | 7002 +++++++-------- src/KSPACE/pppm_disp.cpp | 16418 ++++++++++++++++++------------------ src/KSPACE/pppm_old.cpp | 5726 ++++++------- 5 files changed, 17540 insertions(+), 17540 deletions(-) diff --git a/src/KSPACE/ewald.cpp b/src/KSPACE/ewald.cpp index f750c2cbf3..a684ce80a5 100644 --- a/src/KSPACE/ewald.cpp +++ b/src/KSPACE/ewald.cpp @@ -1,1492 +1,1492 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) - per-atom energy/virial added by German Samolyuk (ORNL), Stan Moore (BYU) - group/group energy/force added by Stan Moore (BYU) - triclinic added by Stan Moore (SNL) -------------------------------------------------------------------------- */ - -#include "mpi.h" -#include "stdlib.h" -#include "stdio.h" -#include "string.h" -#include "math.h" -#include "ewald.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "pair.h" -#include "domain.h" -#include "math_const.h" -#include "memory.h" -#include "error.h" - -using namespace LAMMPS_NS; -using namespace MathConst; - -#define SMALL 0.00001 - -/* ---------------------------------------------------------------------- */ - -Ewald::Ewald(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) -{ - if (narg != 1) error->all(FLERR,"Illegal kspace_style ewald command"); - - ewaldflag = 1; - group_group_enable = 1; - group_allocate_flag = 0; - - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - kmax = 0; - kxvecs = kyvecs = kzvecs = NULL; - ug = NULL; - eg = vg = NULL; - sfacrl = sfacim = sfacrl_all = sfacim_all = NULL; - - nmax = 0; - ek = NULL; - cs = sn = NULL; - - kcount = 0; -} - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -Ewald::~Ewald() -{ - deallocate(); - if (group_allocate_flag) deallocate_groups(); - memory->destroy(ek); - memory->destroy3d_offset(cs,-kmax_created); - memory->destroy3d_offset(sn,-kmax_created); -} - -/* ---------------------------------------------------------------------- */ - -void Ewald::init() -{ - if (comm->me == 0) { - if (screen) fprintf(screen,"Ewald initialization ...\n"); - if (logfile) fprintf(logfile,"Ewald initialization ...\n"); - } - - // error check - - triclinic_check(); - if (domain->dimension == 2) - error->all(FLERR,"Cannot use Ewald with 2d simulation"); - - if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); - - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with Ewald"); - if (slabflag) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab Ewald"); - if (domain->triclinic) - error->all(FLERR,"Cannot (yet) use Ewald with triclinic box " - "and slab correction"); - } - - // extract short-range Coulombic cutoff from pair style - - scale = 1.0; - - pair_check(); - - int itmp; - double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); - if (p_cutoff == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - double cutoff = *p_cutoff; - - qsum = qsqsum = 0.0; - for (int i = 0; i < atom->nlocal; i++) { - qsum += atom->q[i]; - qsqsum += atom->q[i]*atom->q[i]; - } - - double tmp; - MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum = tmp; - MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsqsum = tmp; - - if (qsqsum == 0.0) - error->all(FLERR,"Cannot use kspace solver on system with no charge"); - if (fabs(qsum) > SMALL && comm->me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - // setup K-space resolution - - q2 = qsqsum * force->qqrd2e; - bigint natoms = atom->natoms; - - triclinic = domain->triclinic; - - // use xprd,yprd,zprd even if triclinic so grid size is the same - // adjust z dimension for 2d slab Ewald - // 3d Ewald just uses zprd since slab_volfactor = 1.0 - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - // make initial g_ewald estimate - // based on desired accuracy and real space cutoff - // fluid-occupied volume used to estimate real-space error - // zprd used rather than zprd_slab - - if (!gewaldflag) { - if (accuracy <= 0.0) - error->all(FLERR,"KSpace accuracy must be > 0"); - g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); - if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; - else g_ewald = sqrt(-log(g_ewald)) / cutoff; - } - - // setup Ewald coefficients so can print stats - - setup(); - - // final RMS accuracy - - double lprx = rms(kxmax_orig,xprd,natoms,q2); - double lpry = rms(kymax_orig,yprd,natoms,q2); - double lprz = rms(kzmax_orig,zprd_slab,natoms,q2); - double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); - double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); - double tpr = estimate_table_accuracy(q2_over_sqrt,spr); - double estimated_accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); - - // stats - - if (comm->me == 0) { - if (screen) { - fprintf(screen," G vector (1/distance) = %g\n",g_ewald); - fprintf(screen," estimated absolute RMS force accuracy = %g\n", - estimated_accuracy); - fprintf(screen," estimated relative force accuracy = %g\n", - estimated_accuracy/two_charge_force); - fprintf(screen," KSpace vectors: actual max1d max3d = %d %d %d\n", - kcount,kmax,kmax3d); - fprintf(screen," kxmax kymax kzmax = %d %d %d\n", - kxmax,kymax,kzmax); - } - if (logfile) { - fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); - fprintf(logfile," estimated absolute RMS force accuracy = %g\n", - estimated_accuracy); - fprintf(logfile," estimated relative force accuracy = %g\n", - estimated_accuracy/two_charge_force); - fprintf(logfile," KSpace vectors: actual max1d max3d = %d %d %d\n", - kcount,kmax,kmax3d); - fprintf(logfile," kxmax kymax kzmax = %d %d %d\n", - kxmax,kymax,kzmax); - } - } -} - -/* ---------------------------------------------------------------------- - adjust Ewald coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void Ewald::setup() -{ - // volume-dependent factors - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - - // adjustment of z dimension for 2d slab Ewald - // 3d Ewald just uses zprd since slab_volfactor = 1.0 - - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - unitk[0] = 2.0*MY_PI/xprd; - unitk[1] = 2.0*MY_PI/yprd; - unitk[2] = 2.0*MY_PI/zprd_slab; - - int kmax_old = kmax; - - if (kewaldflag == 0) { - - // determine kmax - // function of current box size, accuracy, G_ewald (short-range cutoff) - - bigint natoms = atom->natoms; - double err; - kxmax = 1; - kymax = 1; - kzmax = 1; - - err = rms(kxmax,xprd,natoms,q2); - while (err > accuracy) { - kxmax++; - err = rms(kxmax,xprd,natoms,q2); - } - - err = rms(kymax,yprd,natoms,q2); - while (err > accuracy) { - kymax++; - err = rms(kymax,yprd,natoms,q2); - } - - err = rms(kzmax,zprd_slab,natoms,q2); - while (err > accuracy) { - kzmax++; - err = rms(kzmax,zprd_slab,natoms,q2); - } - - kmax = MAX(kxmax,kymax); - kmax = MAX(kmax,kzmax); - kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; - - double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; - double gsqymx = unitk[1]*unitk[1]*kymax*kymax; - double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; - gsqmx = MAX(gsqxmx,gsqymx); - gsqmx = MAX(gsqmx,gsqzmx); - - kxmax_orig = kxmax; - kymax_orig = kymax; - kzmax_orig = kzmax; - - // scale lattice vectors for triclinic skew - - if (triclinic) { - double tmp[3]; - tmp[0] = kxmax/xprd; - tmp[1] = kymax/yprd; - tmp[2] = kzmax/zprd; - lamda2xT(&tmp[0],&tmp[0]); - kxmax = MAX(1,static_cast(tmp[0])); - kymax = MAX(1,static_cast(tmp[1])); - kzmax = MAX(1,static_cast(tmp[2])); - - kmax = MAX(kxmax,kymax); - kmax = MAX(kmax,kzmax); - kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; - } - - } else { - - kxmax = kx_ewald; - kymax = ky_ewald; - kzmax = kz_ewald; - - kxmax_orig = kxmax; - kymax_orig = kymax; - kzmax_orig = kzmax; - - kmax = MAX(kxmax,kymax); - kmax = MAX(kmax,kzmax); - kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; - - double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; - double gsqymx = unitk[1]*unitk[1]*kymax*kymax; - double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; - gsqmx = MAX(gsqxmx,gsqymx); - gsqmx = MAX(gsqmx,gsqzmx); - } - - gsqmx *= 1.00001; - - // if size has grown, reallocate k-dependent and nlocal-dependent arrays - - if (kmax > kmax_old) { - deallocate(); - allocate(); - group_allocate_flag = 0; - - memory->destroy(ek); - memory->destroy3d_offset(cs,-kmax_created); - memory->destroy3d_offset(sn,-kmax_created); - nmax = atom->nmax; - memory->create(ek,nmax,3,"ewald:ek"); - memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs"); - memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn"); - kmax_created = kmax; - } - - // pre-compute Ewald coefficients - - if (triclinic == 0) - coeffs(); - else - coeffs_triclinic(); -} - -/* ---------------------------------------------------------------------- - compute RMS accuracy for a dimension -------------------------------------------------------------------------- */ - -double Ewald::rms(int km, double prd, bigint natoms, double q2) -{ - double value = 2.0*q2*g_ewald/prd * - sqrt(1.0/(MY_PI*km*natoms)) * - exp(-MY_PI*MY_PI*km*km/(g_ewald*g_ewald*prd*prd)); - - return value; -} - -/* ---------------------------------------------------------------------- - compute the Ewald long-range force, energy, virial -------------------------------------------------------------------------- */ - -void Ewald::compute(int eflag, int vflag) -{ - int i,j,k; - - // set energy/virial flags - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = evflag_atom = eflag_global = vflag_global = - eflag_atom = vflag_atom = 0; - - // extend size of per-atom arrays if necessary - - if (atom->nlocal > nmax) { - memory->destroy(ek); - memory->destroy3d_offset(cs,-kmax_created); - memory->destroy3d_offset(sn,-kmax_created); - nmax = atom->nmax; - memory->create(ek,nmax,3,"ewald:ek"); - memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs"); - memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn"); - kmax_created = kmax; - } - - // partial structure factors on each processor - // total structure factor by summing over procs - - if (triclinic == 0) - eik_dot_r(); - else - eik_dot_r_triclinic(); - - MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world); - MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world); - - // K-space portion of electric field - // double loop over K-vectors and local atoms - // perform per-atom calculations if needed - - double **f = atom->f; - double *q = atom->q; - int nlocal = atom->nlocal; - - int kx,ky,kz; - double cypz,sypz,exprl,expim,partial,partial_peratom; - - for (i = 0; i < nlocal; i++) { - ek[i][0] = 0.0; - ek[i][1] = 0.0; - ek[i][2] = 0.0; - } - - for (k = 0; k < kcount; k++) { - kx = kxvecs[k]; - ky = kyvecs[k]; - kz = kzvecs[k]; - - for (i = 0; i < nlocal; i++) { - cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; - sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; - exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; - expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; - partial = expim*sfacrl_all[k] - exprl*sfacim_all[k]; - ek[i][0] += partial*eg[k][0]; - ek[i][1] += partial*eg[k][1]; - ek[i][2] += partial*eg[k][2]; - - if (evflag_atom) { - partial_peratom = exprl*sfacrl_all[k] + expim*sfacim_all[k]; - if (eflag_atom) eatom[i] += q[i]*ug[k]*partial_peratom; - if (vflag_atom) - for (j = 0; j < 6; j++) - vatom[i][j] += ug[k]*vg[k][j]*partial_peratom; - } - } - } - - // convert E-field to force - - const double qscale = force->qqrd2e * scale; - - for (i = 0; i < nlocal; i++) { - f[i][0] += qscale * q[i]*ek[i][0]; - f[i][1] += qscale * q[i]*ek[i][1]; - if (slabflag != 2) f[i][2] += qscale * q[i]*ek[i][2]; - } - - // global energy - - if (eflag_global) { - for (k = 0; k < kcount; k++) - energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + - sfacim_all[k]*sfacim_all[k]); - energy -= g_ewald*qsqsum/MY_PIS + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy *= qscale; - } - - // global virial - - if (vflag_global) { - double uk; - for (k = 0; k < kcount; k++) { - uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]); - for (j = 0; j < 6; j++) virial[j] += uk*vg[k][j]; - } - for (j = 0; j < 6; j++) virial[j] *= qscale; - } - - // per-atom energy/virial - // energy includes self-energy correction - - if (evflag_atom) { - if (eflag_atom) { - for (i = 0; i < nlocal; i++) { - eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / - (g_ewald*g_ewald*volume); - eatom[i] *= qscale; - } - } - - if (vflag_atom) - for (i = 0; i < nlocal; i++) - for (j = 0; j < 6; j++) vatom[i][j] *= q[i]*qscale; - } - - // 2d slab correction - - if (slabflag == 1) slabcorr(); -} - -/* ---------------------------------------------------------------------- */ - -void Ewald::eik_dot_r() -{ - int i,k,l,m,n,ic; - double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4; - double sqk,clpm,slpm; - - double **x = atom->x; - double *q = atom->q; - int nlocal = atom->nlocal; - - n = 0; - - // (k,0,0), (0,l,0), (0,0,m) - - for (ic = 0; ic < 3; ic++) { - sqk = unitk[ic]*unitk[ic]; - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - for (i = 0; i < nlocal; i++) { - cs[0][ic][i] = 1.0; - sn[0][ic][i] = 0.0; - cs[1][ic][i] = cos(unitk[ic]*x[i][ic]); - sn[1][ic][i] = sin(unitk[ic]*x[i][ic]); - cs[-1][ic][i] = cs[1][ic][i]; - sn[-1][ic][i] = -sn[1][ic][i]; - cstr1 += q[i]*cs[1][ic][i]; - sstr1 += q[i]*sn[1][ic][i]; - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - } - } - - for (m = 2; m <= kmax; m++) { - for (ic = 0; ic < 3; ic++) { - sqk = m*unitk[ic] * m*unitk[ic]; - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - for (i = 0; i < nlocal; i++) { - cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - - sn[m-1][ic][i]*sn[1][ic][i]; - sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + - cs[m-1][ic][i]*sn[1][ic][i]; - cs[-m][ic][i] = cs[m][ic][i]; - sn[-m][ic][i] = -sn[m][ic][i]; - cstr1 += q[i]*cs[m][ic][i]; - sstr1 += q[i]*sn[m][ic][i]; - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - } - } - } - - // 1 = (k,l,0), 2 = (k,-l,0) - - for (k = 1; k <= kxmax; k++) { - for (l = 1; l <= kymax; l++) { - sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]); - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - cstr2 = 0.0; - sstr2 = 0.0; - for (i = 0; i < nlocal; i++) { - cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]); - sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]); - cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]); - sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]); - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - sfacrl[n] = cstr2; - sfacim[n++] = sstr2; - } - } - } - - // 1 = (0,l,m), 2 = (0,l,-m) - - for (l = 1; l <= kymax; l++) { - for (m = 1; m <= kzmax; m++) { - sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]); - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - cstr2 = 0.0; - sstr2 = 0.0; - for (i = 0; i < nlocal; i++) { - cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]); - sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]); - cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]); - sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]); - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - sfacrl[n] = cstr2; - sfacim[n++] = sstr2; - } - } - } - - // 1 = (k,0,m), 2 = (k,0,-m) - - for (k = 1; k <= kxmax; k++) { - for (m = 1; m <= kzmax; m++) { - sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]); - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - cstr2 = 0.0; - sstr2 = 0.0; - for (i = 0; i < nlocal; i++) { - cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]); - sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]); - cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]); - sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]); - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - sfacrl[n] = cstr2; - sfacim[n++] = sstr2; - } - } - } - - // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) - - for (k = 1; k <= kxmax; k++) { - for (l = 1; l <= kymax; l++) { - for (m = 1; m <= kzmax; m++) { - sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) + - (m*unitk[2] * m*unitk[2]); - if (sqk <= gsqmx) { - cstr1 = 0.0; - sstr1 = 0.0; - cstr2 = 0.0; - sstr2 = 0.0; - cstr3 = 0.0; - sstr3 = 0.0; - cstr4 = 0.0; - sstr4 = 0.0; - for (i = 0; i < nlocal; i++) { - clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; - slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; - cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); - sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); - - clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; - slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; - cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); - sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); - - clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; - slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; - cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); - sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); - - clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; - slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; - cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); - sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); - } - sfacrl[n] = cstr1; - sfacim[n++] = sstr1; - sfacrl[n] = cstr2; - sfacim[n++] = sstr2; - sfacrl[n] = cstr3; - sfacim[n++] = sstr3; - sfacrl[n] = cstr4; - sfacim[n++] = sstr4; - } - } - } - } -} - -/* ---------------------------------------------------------------------- */ - -void Ewald::eik_dot_r_triclinic() -{ - int i,k,l,m,n,ic; - double cstr1,sstr1; - double sqk,clpm,slpm; - - double **x = atom->x; - double *q = atom->q; - int nlocal = atom->nlocal; - - double unitk_lamda[3]; - - double max_kvecs[3]; - max_kvecs[0] = kxmax; - max_kvecs[1] = kymax; - max_kvecs[2] = kzmax; - - // (k,0,0), (0,l,0), (0,0,m) - - for (ic = 0; ic < 3; ic++) { - unitk_lamda[0] = 0.0; - unitk_lamda[1] = 0.0; - unitk_lamda[2] = 0.0; - unitk_lamda[ic] = 2.0*MY_PI; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - sqk = unitk_lamda[ic]*unitk_lamda[ic]; - if (sqk <= gsqmx) { - for (i = 0; i < nlocal; i++) { - cs[0][ic][i] = 1.0; - sn[0][ic][i] = 0.0; - cs[1][ic][i] = cos(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]); - sn[1][ic][i] = sin(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]); - cs[-1][ic][i] = cs[1][ic][i]; - sn[-1][ic][i] = -sn[1][ic][i]; - } - } - } - - for (ic = 0; ic < 3; ic++) { - for (m = 2; m <= max_kvecs[ic]; m++) { - unitk_lamda[0] = 0.0; - unitk_lamda[1] = 0.0; - unitk_lamda[2] = 0.0; - unitk_lamda[ic] = 2.0*MY_PI*m; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - sqk = unitk_lamda[ic]*unitk_lamda[ic]; - for (i = 0; i < nlocal; i++) { - cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - - sn[m-1][ic][i]*sn[1][ic][i]; - sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + - cs[m-1][ic][i]*sn[1][ic][i]; - cs[-m][ic][i] = cs[m][ic][i]; - sn[-m][ic][i] = -sn[m][ic][i]; - } - } - } - - for (n = 0; n < kcount; n++) { - k = kxvecs[n]; - l = kyvecs[n]; - m = kzvecs[n]; - cstr1 = 0.0; - sstr1 = 0.0; - for (i = 0; i < nlocal; i++) { - clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; - slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; - cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); - sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); - } - sfacrl[n] = cstr1; - sfacim[n] = sstr1; - } -} - -/* ---------------------------------------------------------------------- - pre-compute coefficients for each Ewald K-vector -------------------------------------------------------------------------- */ - -void Ewald::coeffs() -{ - int k,l,m; - double sqk,vterm; - - double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald); - double preu = 4.0*MY_PI/volume; - - kcount = 0; - - // (k,0,0), (0,l,0), (0,0,m) - - for (m = 1; m <= kmax; m++) { - sqk = (m*unitk[0]) * (m*unitk[0]); - if (sqk <= gsqmx) { - kxvecs[kcount] = m; - kyvecs[kcount] = 0; - kzvecs[kcount] = 0; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*m*ug[kcount]; - eg[kcount][1] = 0.0; - eg[kcount][2] = 0.0; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*(unitk[0]*m)*(unitk[0]*m); - vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0; - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++; - } - sqk = (m*unitk[1]) * (m*unitk[1]); - if (sqk <= gsqmx) { - kxvecs[kcount] = 0; - kyvecs[kcount] = m; - kzvecs[kcount] = 0; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 2.0*unitk[1]*m*ug[kcount]; - eg[kcount][2] = 0.0; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0 + vterm*(unitk[1]*m)*(unitk[1]*m); - vg[kcount][2] = 1.0; - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++; - } - sqk = (m*unitk[2]) * (m*unitk[2]); - if (sqk <= gsqmx) { - kxvecs[kcount] = 0; - kyvecs[kcount] = 0; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 0.0; - eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++; - } - } - - // 1 = (k,l,0), 2 = (k,-l,0) - - for (k = 1; k <= kxmax; k++) { - for (l = 1; l <= kymax; l++) { - sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l); - if (sqk <= gsqmx) { - kxvecs[kcount] = k; - kyvecs[kcount] = l; - kzvecs[kcount] = 0; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = 0.0; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0; - vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++; - - kxvecs[kcount] = k; - kyvecs[kcount] = -l; - kzvecs[kcount] = 0; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = 0.0; - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0; - vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++;; - } - } - } - - // 1 = (0,l,m), 2 = (0,l,-m) - - for (l = 1; l <= kymax; l++) { - for (m = 1; m <= kzmax; m++) { - sqk = (unitk[1]*l) * (unitk[1]*l) + (unitk[2]*m) * (unitk[2]*m); - if (sqk <= gsqmx) { - kxvecs[kcount] = 0; - kyvecs[kcount] = l; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; - kcount++; - - kxvecs[kcount] = 0; - kyvecs[kcount] = l; - kzvecs[kcount] = -m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; - kcount++; - } - } - } - - // 1 = (k,0,m), 2 = (k,0,-m) - - for (k = 1; k <= kxmax; k++) { - for (m = 1; m <= kzmax; m++) { - sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[2]*m) * (unitk[2]*m); - if (sqk <= gsqmx) { - kxvecs[kcount] = k; - kyvecs[kcount] = 0; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = 0.0; - eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = 0.0; - vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = 0.0; - kcount++; - - kxvecs[kcount] = k; - kyvecs[kcount] = 0; - kzvecs[kcount] = -m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = 0.0; - eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = 0.0; - vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = 0.0; - kcount++; - } - } - } - - // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) - - for (k = 1; k <= kxmax; k++) { - for (l = 1; l <= kymax; l++) { - for (m = 1; m <= kzmax; m++) { - sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l) + - (unitk[2]*m) * (unitk[2]*m); - if (sqk <= gsqmx) { - kxvecs[kcount] = k; - kyvecs[kcount] = l; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; - kcount++; - - kxvecs[kcount] = k; - kyvecs[kcount] = -l; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; - kcount++; - - kxvecs[kcount] = k; - kyvecs[kcount] = l; - kzvecs[kcount] = -m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; - kcount++; - - kxvecs[kcount] = k; - kyvecs[kcount] = -l; - kzvecs[kcount] = -m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; - eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; - eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; - vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); - vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); - vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); - vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; - vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; - vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; - kcount++; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - pre-compute coefficients for each Ewald K-vector for a triclinic - system -------------------------------------------------------------------------- */ - -void Ewald::coeffs_triclinic() -{ - int k,l,m; - double sqk,vterm; - - double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald); - double preu = 4.0*MY_PI/volume; - - double unitk_lamda[3]; - - kcount = 0; - - // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) - - for (k = 1; k <= kxmax; k++) { - for (l = -kymax; l <= kymax; l++) { - for (m = -kzmax; m <= kzmax; m++) { - unitk_lamda[0] = 2.0*MY_PI*k; - unitk_lamda[1] = 2.0*MY_PI*l; - unitk_lamda[2] = 2.0*MY_PI*m; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - sqk = unitk_lamda[0]*unitk_lamda[0] + unitk_lamda[1]*unitk_lamda[1] + - unitk_lamda[2]*unitk_lamda[2]; - if (sqk <= gsqmx) { - kxvecs[kcount] = k; - kyvecs[kcount] = l; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitk_lamda[0]*ug[kcount]; - eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount]; - eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*unitk_lamda[0]*unitk_lamda[0]; - vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1]; - vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; - vg[kcount][3] = vterm*unitk_lamda[0]*unitk_lamda[1]; - vg[kcount][4] = vterm*unitk_lamda[0]*unitk_lamda[2]; - vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2]; - kcount++; - } - } - } - } - - // 1 = (0,l,m), 2 = (0,l,-m) - - for (l = 1; l <= kymax; l++) { - for (m = -kzmax; m <= kzmax; m++) { - unitk_lamda[0] = 0.0; - unitk_lamda[1] = 2.0*MY_PI*l; - unitk_lamda[2] = 2.0*MY_PI*m; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - sqk = unitk_lamda[1]*unitk_lamda[1] + unitk_lamda[2]*unitk_lamda[2]; - if (sqk <= gsqmx) { - kxvecs[kcount] = 0; - kyvecs[kcount] = l; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount]; - eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1]; - vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2]; - kcount++; - } - } - } - - // (0,0,m) - - for (m = 1; m <= kmax; m++) { - unitk_lamda[0] = 0.0; - unitk_lamda[1] = 0.0; - unitk_lamda[2] = 2.0*MY_PI*m; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - sqk = unitk_lamda[2]*unitk_lamda[2]; - if (sqk <= gsqmx) { - kxvecs[kcount] = 0; - kyvecs[kcount] = 0; - kzvecs[kcount] = m; - ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 0.0; - eg[kcount][1] = 0.0; - eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; - vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; - vg[kcount][3] = 0.0; - vg[kcount][4] = 0.0; - vg[kcount][5] = 0.0; - kcount++; - } - } -} - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors -------------------------------------------------------------------------- */ - -void Ewald::allocate() -{ - kxvecs = new int[kmax3d]; - kyvecs = new int[kmax3d]; - kzvecs = new int[kmax3d]; - - ug = new double[kmax3d]; - memory->create(eg,kmax3d,3,"ewald:eg"); - memory->create(vg,kmax3d,6,"ewald:vg"); - - sfacrl = new double[kmax3d]; - sfacim = new double[kmax3d]; - sfacrl_all = new double[kmax3d]; - sfacim_all = new double[kmax3d]; -} - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors -------------------------------------------------------------------------- */ - -void Ewald::deallocate() -{ - delete [] kxvecs; - delete [] kyvecs; - delete [] kzvecs; - - delete [] ug; - memory->destroy(eg); - memory->destroy(vg); - - delete [] sfacrl; - delete [] sfacim; - delete [] sfacrl_all; - delete [] sfacim_all; -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void Ewald::slabcorr() -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int nlocal = atom->nlocal; - - double dipole = 0.0; - for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // need to make non-neutral systems and/or - // per-atom energy translationally invariant - - double dipole_r2 = 0.0; - if (eflag_atom || fabs(qsum) > SMALL) { - for (int i = 0; i < nlocal; i++) - dipole_r2 += q[i]*x[i][2]*x[i][2]; - - // sum local contributions - - double tmp; - MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2 = tmp; - } - - // compute corrections - - const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - - qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; - const double qscale = force->qqrd2e * scale; - - if (eflag_global) energy += qscale * e_slabcorr; - - // per-atom energy - - if (eflag_atom) { - double efact = qscale * MY_2PI/volume; - for (int i = 0; i < nlocal; i++) - eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + - qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); - } - - // add on force corrections - - double ffact = qscale * (-4.0*MY_PI/volume); - double **f = atom->f; - - for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); -} - -/* ---------------------------------------------------------------------- - memory usage of local arrays -------------------------------------------------------------------------- */ - -double Ewald::memory_usage() -{ - double bytes = 3 * kmax3d * sizeof(int); - bytes += (1 + 3 + 6) * kmax3d * sizeof(double); - bytes += 4 * kmax3d * sizeof(double); - bytes += nmax*3 * sizeof(double); - bytes += 2 * (2*kmax+1)*3*nmax * sizeof(double); - return bytes; -} - -/* ---------------------------------------------------------------------- - group-group interactions - ------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - compute the Ewald total long-range force and energy for groups A and B - ------------------------------------------------------------------------- */ - -void Ewald::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag) -{ - if (slabflag && triclinic) - error->all(FLERR,"Cannot (yet) use K-space slab " - "correction with compute group/group for triclinic systems"); - - int i,k; - - if (!group_allocate_flag) { - allocate_groups(); - group_allocate_flag = 1; - } - - e2group = 0.0; //energy - f2group[0] = 0.0; //force in x-direction - f2group[1] = 0.0; //force in y-direction - f2group[2] = 0.0; //force in z-direction - - // partial and total structure factors for groups A and B - - for (k = 0; k < kcount; k++) { - - // group A - - sfacrl_A[k] = 0.0; - sfacim_A[k] = 0.0; - sfacrl_A_all[k] = 0.0; - sfacim_A_all[k] = 0; - - // group B - - sfacrl_B[k] = 0.0; - sfacim_B[k] = 0.0; - sfacrl_B_all[k] = 0.0; - sfacim_B_all[k] = 0.0; - } - - double *q = atom->q; - int nlocal = atom->nlocal; - int *mask = atom->mask; - - int kx,ky,kz; - double cypz,sypz,exprl,expim; - - // partial structure factors for groups A and B on each processor - - for (k = 0; k < kcount; k++) { - kx = kxvecs[k]; - ky = kyvecs[k]; - kz = kzvecs[k]; - - for (i = 0; i < nlocal; i++) { - - if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) - if (AA_flag) continue; - - if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { - - cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; - sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; - exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; - expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; - - // group A - - if (mask[i] & groupbit_A) { - sfacrl_A[k] += q[i]*exprl; - sfacim_A[k] += q[i]*expim; - } - - // group B - - if (mask[i] & groupbit_B) { - sfacrl_B[k] += q[i]*exprl; - sfacim_B[k] += q[i]*expim; - } - } - } - } - - // total structure factor by summing over procs - - MPI_Allreduce(sfacrl_A,sfacrl_A_all,kcount,MPI_DOUBLE,MPI_SUM,world); - MPI_Allreduce(sfacim_A,sfacim_A_all,kcount,MPI_DOUBLE,MPI_SUM,world); - - MPI_Allreduce(sfacrl_B,sfacrl_B_all,kcount,MPI_DOUBLE,MPI_SUM,world); - MPI_Allreduce(sfacim_B,sfacim_B_all,kcount,MPI_DOUBLE,MPI_SUM,world); - - const double qscale = force->qqrd2e * scale; - double partial_group; - - // total group A <--> group B energy - // self and boundary correction terms are in compute_group_group.cpp - - for (k = 0; k < kcount; k++) { - partial_group = sfacrl_A_all[k]*sfacrl_B_all[k] + - sfacim_A_all[k]*sfacim_B_all[k]; - e2group += ug[k]*partial_group; - } - - e2group *= qscale; - - // total group A <--> group B force - - for (k = 0; k < kcount; k++) { - partial_group = sfacim_A_all[k]*sfacrl_B_all[k] - - sfacrl_A_all[k]*sfacim_B_all[k]; - f2group[0] += eg[k][0]*partial_group; - f2group[1] += eg[k][1]*partial_group; - if (slabflag != 2) f2group[2] += eg[k][2]*partial_group; - } - - f2group[0] *= qscale; - f2group[1] *= qscale; - f2group[2] *= qscale; - - // 2d slab correction - - if (slabflag == 1) - slabcorr_groups(groupbit_A, groupbit_B, AA_flag); -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void Ewald::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag) -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - double qsum_A = 0.0; - double qsum_B = 0.0; - double dipole_A = 0.0; - double dipole_B = 0.0; - double dipole_r2_A = 0.0; - double dipole_r2_B = 0.0; - - for (int i = 0; i < nlocal; i++) { - if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) - if (AA_flag) continue; - - if (mask[i] & groupbit_A) { - qsum_A += q[i]; - dipole_A += q[i]*x[i][2]; - dipole_r2_A += q[i]*x[i][2]*x[i][2]; - } - - if (mask[i] & groupbit_B) { - qsum_B += q[i]; - dipole_B += q[i]*x[i][2]; - dipole_r2_B += q[i]*x[i][2]*x[i][2]; - } - } - - // sum local contributions to get total charge and global dipole moment - // for each group - - double tmp; - MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum_A = tmp; - - MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum_B = tmp; - - MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_A = tmp; - - MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_B = tmp; - - MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2_A = tmp; - - MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2_B = tmp; - - // compute corrections - - const double qscale = force->qqrd2e * scale; - const double efact = qscale * MY_2PI/volume; - - e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B + - qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0); - - // add on force corrections - - const double ffact = qscale * (-4.0*MY_PI/volume); - f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A); -} - -/* ---------------------------------------------------------------------- - allocate group-group memory that depends on # of K-vectors -------------------------------------------------------------------------- */ - -void Ewald::allocate_groups() -{ - // group A - - sfacrl_A = new double[kmax3d]; - sfacim_A = new double[kmax3d]; - sfacrl_A_all = new double[kmax3d]; - sfacim_A_all = new double[kmax3d]; - - // group B - - sfacrl_B = new double[kmax3d]; - sfacim_B = new double[kmax3d]; - sfacrl_B_all = new double[kmax3d]; - sfacim_B_all = new double[kmax3d]; -} - -/* ---------------------------------------------------------------------- - deallocate group-group memory that depends on # of K-vectors -------------------------------------------------------------------------- */ - -void Ewald::deallocate_groups() -{ - // group A - - delete [] sfacrl_A; - delete [] sfacim_A; - delete [] sfacrl_A_all; - delete [] sfacim_A_all; - - // group B - - delete [] sfacrl_B; - delete [] sfacim_B; - delete [] sfacrl_B_all; - delete [] sfacim_B_all; -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) + per-atom energy/virial added by German Samolyuk (ORNL), Stan Moore (BYU) + group/group energy/force added by Stan Moore (BYU) + triclinic added by Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "mpi.h" +#include "stdlib.h" +#include "stdio.h" +#include "string.h" +#include "math.h" +#include "ewald.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "pair.h" +#include "domain.h" +#include "math_const.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.00001 + +/* ---------------------------------------------------------------------- */ + +Ewald::Ewald(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) +{ + if (narg != 1) error->all(FLERR,"Illegal kspace_style ewald command"); + + ewaldflag = 1; + group_group_enable = 1; + group_allocate_flag = 0; + + accuracy_relative = fabs(force->numeric(FLERR,arg[0])); + + kmax = 0; + kxvecs = kyvecs = kzvecs = NULL; + ug = NULL; + eg = vg = NULL; + sfacrl = sfacim = sfacrl_all = sfacim_all = NULL; + + nmax = 0; + ek = NULL; + cs = sn = NULL; + + kcount = 0; +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +Ewald::~Ewald() +{ + deallocate(); + if (group_allocate_flag) deallocate_groups(); + memory->destroy(ek); + memory->destroy3d_offset(cs,-kmax_created); + memory->destroy3d_offset(sn,-kmax_created); +} + +/* ---------------------------------------------------------------------- */ + +void Ewald::init() +{ + if (comm->me == 0) { + if (screen) fprintf(screen,"Ewald initialization ...\n"); + if (logfile) fprintf(logfile,"Ewald initialization ...\n"); + } + + // error check + + triclinic_check(); + if (domain->dimension == 2) + error->all(FLERR,"Cannot use Ewald with 2d simulation"); + + if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with Ewald"); + if (slabflag) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab Ewald"); + if (domain->triclinic) + error->all(FLERR,"Cannot (yet) use Ewald with triclinic box " + "and slab correction"); + } + + // extract short-range Coulombic cutoff from pair style + + scale = 1.0; + + pair_check(); + + int itmp; + double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); + if (p_cutoff == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + double cutoff = *p_cutoff; + + qsum = qsqsum = 0.0; + for (int i = 0; i < atom->nlocal; i++) { + qsum += atom->q[i]; + qsqsum += atom->q[i]*atom->q[i]; + } + + double tmp; + MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum = tmp; + MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsqsum = tmp; + + if (qsqsum == 0.0) + error->all(FLERR,"Cannot use kspace solver on system with no charge"); + if (fabs(qsum) > SMALL && comm->me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // setup K-space resolution + + q2 = qsqsum * force->qqrd2e; + bigint natoms = atom->natoms; + + triclinic = domain->triclinic; + + // use xprd,yprd,zprd even if triclinic so grid size is the same + // adjust z dimension for 2d slab Ewald + // 3d Ewald just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired accuracy and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + if (!gewaldflag) { + if (accuracy <= 0.0) + error->all(FLERR,"KSpace accuracy must be > 0"); + g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); + if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; + else g_ewald = sqrt(-log(g_ewald)) / cutoff; + } + + // setup Ewald coefficients so can print stats + + setup(); + + // final RMS accuracy + + double lprx = rms(kxmax_orig,xprd,natoms,q2); + double lpry = rms(kymax_orig,yprd,natoms,q2); + double lprz = rms(kzmax_orig,zprd_slab,natoms,q2); + double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); + double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); + double tpr = estimate_table_accuracy(q2_over_sqrt,spr); + double estimated_accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); + + // stats + + if (comm->me == 0) { + if (screen) { + fprintf(screen," G vector (1/distance) = %g\n",g_ewald); + fprintf(screen," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(screen," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(screen," KSpace vectors: actual max1d max3d = %d %d %d\n", + kcount,kmax,kmax3d); + fprintf(screen," kxmax kymax kzmax = %d %d %d\n", + kxmax,kymax,kzmax); + } + if (logfile) { + fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); + fprintf(logfile," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(logfile," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(logfile," KSpace vectors: actual max1d max3d = %d %d %d\n", + kcount,kmax,kmax3d); + fprintf(logfile," kxmax kymax kzmax = %d %d %d\n", + kxmax,kymax,kzmax); + } + } +} + +/* ---------------------------------------------------------------------- + adjust Ewald coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void Ewald::setup() +{ + // volume-dependent factors + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + + // adjustment of z dimension for 2d slab Ewald + // 3d Ewald just uses zprd since slab_volfactor = 1.0 + + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + unitk[0] = 2.0*MY_PI/xprd; + unitk[1] = 2.0*MY_PI/yprd; + unitk[2] = 2.0*MY_PI/zprd_slab; + + int kmax_old = kmax; + + if (kewaldflag == 0) { + + // determine kmax + // function of current box size, accuracy, G_ewald (short-range cutoff) + + bigint natoms = atom->natoms; + double err; + kxmax = 1; + kymax = 1; + kzmax = 1; + + err = rms(kxmax,xprd,natoms,q2); + while (err > accuracy) { + kxmax++; + err = rms(kxmax,xprd,natoms,q2); + } + + err = rms(kymax,yprd,natoms,q2); + while (err > accuracy) { + kymax++; + err = rms(kymax,yprd,natoms,q2); + } + + err = rms(kzmax,zprd_slab,natoms,q2); + while (err > accuracy) { + kzmax++; + err = rms(kzmax,zprd_slab,natoms,q2); + } + + kmax = MAX(kxmax,kymax); + kmax = MAX(kmax,kzmax); + kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; + + double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; + double gsqymx = unitk[1]*unitk[1]*kymax*kymax; + double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; + gsqmx = MAX(gsqxmx,gsqymx); + gsqmx = MAX(gsqmx,gsqzmx); + + kxmax_orig = kxmax; + kymax_orig = kymax; + kzmax_orig = kzmax; + + // scale lattice vectors for triclinic skew + + if (triclinic) { + double tmp[3]; + tmp[0] = kxmax/xprd; + tmp[1] = kymax/yprd; + tmp[2] = kzmax/zprd; + lamda2xT(&tmp[0],&tmp[0]); + kxmax = MAX(1,static_cast(tmp[0])); + kymax = MAX(1,static_cast(tmp[1])); + kzmax = MAX(1,static_cast(tmp[2])); + + kmax = MAX(kxmax,kymax); + kmax = MAX(kmax,kzmax); + kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; + } + + } else { + + kxmax = kx_ewald; + kymax = ky_ewald; + kzmax = kz_ewald; + + kxmax_orig = kxmax; + kymax_orig = kymax; + kzmax_orig = kzmax; + + kmax = MAX(kxmax,kymax); + kmax = MAX(kmax,kzmax); + kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; + + double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; + double gsqymx = unitk[1]*unitk[1]*kymax*kymax; + double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; + gsqmx = MAX(gsqxmx,gsqymx); + gsqmx = MAX(gsqmx,gsqzmx); + } + + gsqmx *= 1.00001; + + // if size has grown, reallocate k-dependent and nlocal-dependent arrays + + if (kmax > kmax_old) { + deallocate(); + allocate(); + group_allocate_flag = 0; + + memory->destroy(ek); + memory->destroy3d_offset(cs,-kmax_created); + memory->destroy3d_offset(sn,-kmax_created); + nmax = atom->nmax; + memory->create(ek,nmax,3,"ewald:ek"); + memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs"); + memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn"); + kmax_created = kmax; + } + + // pre-compute Ewald coefficients + + if (triclinic == 0) + coeffs(); + else + coeffs_triclinic(); +} + +/* ---------------------------------------------------------------------- + compute RMS accuracy for a dimension +------------------------------------------------------------------------- */ + +double Ewald::rms(int km, double prd, bigint natoms, double q2) +{ + double value = 2.0*q2*g_ewald/prd * + sqrt(1.0/(MY_PI*km*natoms)) * + exp(-MY_PI*MY_PI*km*km/(g_ewald*g_ewald*prd*prd)); + + return value; +} + +/* ---------------------------------------------------------------------- + compute the Ewald long-range force, energy, virial +------------------------------------------------------------------------- */ + +void Ewald::compute(int eflag, int vflag) +{ + int i,j,k; + + // set energy/virial flags + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + // extend size of per-atom arrays if necessary + + if (atom->nlocal > nmax) { + memory->destroy(ek); + memory->destroy3d_offset(cs,-kmax_created); + memory->destroy3d_offset(sn,-kmax_created); + nmax = atom->nmax; + memory->create(ek,nmax,3,"ewald:ek"); + memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs"); + memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn"); + kmax_created = kmax; + } + + // partial structure factors on each processor + // total structure factor by summing over procs + + if (triclinic == 0) + eik_dot_r(); + else + eik_dot_r_triclinic(); + + MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world); + + // K-space portion of electric field + // double loop over K-vectors and local atoms + // perform per-atom calculations if needed + + double **f = atom->f; + double *q = atom->q; + int nlocal = atom->nlocal; + + int kx,ky,kz; + double cypz,sypz,exprl,expim,partial,partial_peratom; + + for (i = 0; i < nlocal; i++) { + ek[i][0] = 0.0; + ek[i][1] = 0.0; + ek[i][2] = 0.0; + } + + for (k = 0; k < kcount; k++) { + kx = kxvecs[k]; + ky = kyvecs[k]; + kz = kzvecs[k]; + + for (i = 0; i < nlocal; i++) { + cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; + sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; + exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; + expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; + partial = expim*sfacrl_all[k] - exprl*sfacim_all[k]; + ek[i][0] += partial*eg[k][0]; + ek[i][1] += partial*eg[k][1]; + ek[i][2] += partial*eg[k][2]; + + if (evflag_atom) { + partial_peratom = exprl*sfacrl_all[k] + expim*sfacim_all[k]; + if (eflag_atom) eatom[i] += q[i]*ug[k]*partial_peratom; + if (vflag_atom) + for (j = 0; j < 6; j++) + vatom[i][j] += ug[k]*vg[k][j]*partial_peratom; + } + } + } + + // convert E-field to force + + const double qscale = force->qqrd2e * scale; + + for (i = 0; i < nlocal; i++) { + f[i][0] += qscale * q[i]*ek[i][0]; + f[i][1] += qscale * q[i]*ek[i][1]; + if (slabflag != 2) f[i][2] += qscale * q[i]*ek[i][2]; + } + + // global energy + + if (eflag_global) { + for (k = 0; k < kcount; k++) + energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + + sfacim_all[k]*sfacim_all[k]); + energy -= g_ewald*qsqsum/MY_PIS + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy *= qscale; + } + + // global virial + + if (vflag_global) { + double uk; + for (k = 0; k < kcount; k++) { + uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]); + for (j = 0; j < 6; j++) virial[j] += uk*vg[k][j]; + } + for (j = 0; j < 6; j++) virial[j] *= qscale; + } + + // per-atom energy/virial + // energy includes self-energy correction + + if (evflag_atom) { + if (eflag_atom) { + for (i = 0; i < nlocal; i++) { + eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / + (g_ewald*g_ewald*volume); + eatom[i] *= qscale; + } + } + + if (vflag_atom) + for (i = 0; i < nlocal; i++) + for (j = 0; j < 6; j++) vatom[i][j] *= q[i]*qscale; + } + + // 2d slab correction + + if (slabflag == 1) slabcorr(); +} + +/* ---------------------------------------------------------------------- */ + +void Ewald::eik_dot_r() +{ + int i,k,l,m,n,ic; + double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4; + double sqk,clpm,slpm; + + double **x = atom->x; + double *q = atom->q; + int nlocal = atom->nlocal; + + n = 0; + + // (k,0,0), (0,l,0), (0,0,m) + + for (ic = 0; ic < 3; ic++) { + sqk = unitk[ic]*unitk[ic]; + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + for (i = 0; i < nlocal; i++) { + cs[0][ic][i] = 1.0; + sn[0][ic][i] = 0.0; + cs[1][ic][i] = cos(unitk[ic]*x[i][ic]); + sn[1][ic][i] = sin(unitk[ic]*x[i][ic]); + cs[-1][ic][i] = cs[1][ic][i]; + sn[-1][ic][i] = -sn[1][ic][i]; + cstr1 += q[i]*cs[1][ic][i]; + sstr1 += q[i]*sn[1][ic][i]; + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + } + } + + for (m = 2; m <= kmax; m++) { + for (ic = 0; ic < 3; ic++) { + sqk = m*unitk[ic] * m*unitk[ic]; + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + for (i = 0; i < nlocal; i++) { + cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - + sn[m-1][ic][i]*sn[1][ic][i]; + sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + + cs[m-1][ic][i]*sn[1][ic][i]; + cs[-m][ic][i] = cs[m][ic][i]; + sn[-m][ic][i] = -sn[m][ic][i]; + cstr1 += q[i]*cs[m][ic][i]; + sstr1 += q[i]*sn[m][ic][i]; + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + } + } + } + + // 1 = (k,l,0), 2 = (k,-l,0) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = 0; i < nlocal; i++) { + cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]); + sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]); + cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]); + sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + } + } + } + + // 1 = (0,l,m), 2 = (0,l,-m) + + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = 0; i < nlocal; i++) { + cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]); + sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]); + cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]); + sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + } + } + } + + // 1 = (k,0,m), 2 = (k,0,-m) + + for (k = 1; k <= kxmax; k++) { + for (m = 1; m <= kzmax; m++) { + sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = 0; i < nlocal; i++) { + cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]); + sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]); + cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]); + sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + } + } + } + + // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) + + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + cstr3 = 0.0; + sstr3 = 0.0; + cstr4 = 0.0; + sstr4 = 0.0; + for (i = 0; i < nlocal; i++) { + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; + slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; + cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; + cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + } + sfacrl[n] = cstr1; + sfacim[n++] = sstr1; + sfacrl[n] = cstr2; + sfacim[n++] = sstr2; + sfacrl[n] = cstr3; + sfacim[n++] = sstr3; + sfacrl[n] = cstr4; + sfacim[n++] = sstr4; + } + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +void Ewald::eik_dot_r_triclinic() +{ + int i,k,l,m,n,ic; + double cstr1,sstr1; + double sqk,clpm,slpm; + + double **x = atom->x; + double *q = atom->q; + int nlocal = atom->nlocal; + + double unitk_lamda[3]; + + double max_kvecs[3]; + max_kvecs[0] = kxmax; + max_kvecs[1] = kymax; + max_kvecs[2] = kzmax; + + // (k,0,0), (0,l,0), (0,0,m) + + for (ic = 0; ic < 3; ic++) { + unitk_lamda[0] = 0.0; + unitk_lamda[1] = 0.0; + unitk_lamda[2] = 0.0; + unitk_lamda[ic] = 2.0*MY_PI; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + sqk = unitk_lamda[ic]*unitk_lamda[ic]; + if (sqk <= gsqmx) { + for (i = 0; i < nlocal; i++) { + cs[0][ic][i] = 1.0; + sn[0][ic][i] = 0.0; + cs[1][ic][i] = cos(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]); + sn[1][ic][i] = sin(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]); + cs[-1][ic][i] = cs[1][ic][i]; + sn[-1][ic][i] = -sn[1][ic][i]; + } + } + } + + for (ic = 0; ic < 3; ic++) { + for (m = 2; m <= max_kvecs[ic]; m++) { + unitk_lamda[0] = 0.0; + unitk_lamda[1] = 0.0; + unitk_lamda[2] = 0.0; + unitk_lamda[ic] = 2.0*MY_PI*m; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + sqk = unitk_lamda[ic]*unitk_lamda[ic]; + for (i = 0; i < nlocal; i++) { + cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - + sn[m-1][ic][i]*sn[1][ic][i]; + sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + + cs[m-1][ic][i]*sn[1][ic][i]; + cs[-m][ic][i] = cs[m][ic][i]; + sn[-m][ic][i] = -sn[m][ic][i]; + } + } + } + + for (n = 0; n < kcount; n++) { + k = kxvecs[n]; + l = kyvecs[n]; + m = kzvecs[n]; + cstr1 = 0.0; + sstr1 = 0.0; + for (i = 0; i < nlocal; i++) { + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + } + sfacrl[n] = cstr1; + sfacim[n] = sstr1; + } +} + +/* ---------------------------------------------------------------------- + pre-compute coefficients for each Ewald K-vector +------------------------------------------------------------------------- */ + +void Ewald::coeffs() +{ + int k,l,m; + double sqk,vterm; + + double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald); + double preu = 4.0*MY_PI/volume; + + kcount = 0; + + // (k,0,0), (0,l,0), (0,0,m) + + for (m = 1; m <= kmax; m++) { + sqk = (m*unitk[0]) * (m*unitk[0]); + if (sqk <= gsqmx) { + kxvecs[kcount] = m; + kyvecs[kcount] = 0; + kzvecs[kcount] = 0; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*m*ug[kcount]; + eg[kcount][1] = 0.0; + eg[kcount][2] = 0.0; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0 + vterm*(unitk[0]*m)*(unitk[0]*m); + vg[kcount][1] = 1.0; + vg[kcount][2] = 1.0; + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++; + } + sqk = (m*unitk[1]) * (m*unitk[1]); + if (sqk <= gsqmx) { + kxvecs[kcount] = 0; + kyvecs[kcount] = m; + kzvecs[kcount] = 0; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 2.0*unitk[1]*m*ug[kcount]; + eg[kcount][2] = 0.0; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0 + vterm*(unitk[1]*m)*(unitk[1]*m); + vg[kcount][2] = 1.0; + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++; + } + sqk = (m*unitk[2]) * (m*unitk[2]); + if (sqk <= gsqmx) { + kxvecs[kcount] = 0; + kyvecs[kcount] = 0; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 0.0; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0; + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++; + } + } + + // 1 = (k,l,0), 2 = (k,-l,0) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l); + if (sqk <= gsqmx) { + kxvecs[kcount] = k; + kyvecs[kcount] = l; + kzvecs[kcount] = 0; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 0.0; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0; + vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++; + + kxvecs[kcount] = k; + kyvecs[kcount] = -l; + kzvecs[kcount] = 0; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 0.0; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0; + vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++;; + } + } + } + + // 1 = (0,l,m), 2 = (0,l,-m) + + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (unitk[1]*l) * (unitk[1]*l) + (unitk[2]*m) * (unitk[2]*m); + if (sqk <= gsqmx) { + kxvecs[kcount] = 0; + kyvecs[kcount] = l; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; + kcount++; + + kxvecs[kcount] = 0; + kyvecs[kcount] = l; + kzvecs[kcount] = -m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; + kcount++; + } + } + } + + // 1 = (k,0,m), 2 = (k,0,-m) + + for (k = 1; k <= kxmax; k++) { + for (m = 1; m <= kzmax; m++) { + sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[2]*m) * (unitk[2]*m); + if (sqk <= gsqmx) { + kxvecs[kcount] = k; + kyvecs[kcount] = 0; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 0.0; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0; + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = 0.0; + vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = 0.0; + kcount++; + + kxvecs[kcount] = k; + kyvecs[kcount] = 0; + kzvecs[kcount] = -m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 0.0; + eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0; + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = 0.0; + vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = 0.0; + kcount++; + } + } + } + + // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l) + + (unitk[2]*m) * (unitk[2]*m); + if (sqk <= gsqmx) { + kxvecs[kcount] = k; + kyvecs[kcount] = l; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; + kcount++; + + kxvecs[kcount] = k; + kyvecs[kcount] = -l; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; + kcount++; + + kxvecs[kcount] = k; + kyvecs[kcount] = l; + kzvecs[kcount] = -m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; + kcount++; + + kxvecs[kcount] = k; + kyvecs[kcount] = -l; + kzvecs[kcount] = -m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; + kcount++; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + pre-compute coefficients for each Ewald K-vector for a triclinic + system +------------------------------------------------------------------------- */ + +void Ewald::coeffs_triclinic() +{ + int k,l,m; + double sqk,vterm; + + double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald); + double preu = 4.0*MY_PI/volume; + + double unitk_lamda[3]; + + kcount = 0; + + // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) + + for (k = 1; k <= kxmax; k++) { + for (l = -kymax; l <= kymax; l++) { + for (m = -kzmax; m <= kzmax; m++) { + unitk_lamda[0] = 2.0*MY_PI*k; + unitk_lamda[1] = 2.0*MY_PI*l; + unitk_lamda[2] = 2.0*MY_PI*m; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + sqk = unitk_lamda[0]*unitk_lamda[0] + unitk_lamda[1]*unitk_lamda[1] + + unitk_lamda[2]*unitk_lamda[2]; + if (sqk <= gsqmx) { + kxvecs[kcount] = k; + kyvecs[kcount] = l; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 2.0*unitk_lamda[0]*ug[kcount]; + eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount]; + eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0 + vterm*unitk_lamda[0]*unitk_lamda[0]; + vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1]; + vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; + vg[kcount][3] = vterm*unitk_lamda[0]*unitk_lamda[1]; + vg[kcount][4] = vterm*unitk_lamda[0]*unitk_lamda[2]; + vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2]; + kcount++; + } + } + } + } + + // 1 = (0,l,m), 2 = (0,l,-m) + + for (l = 1; l <= kymax; l++) { + for (m = -kzmax; m <= kzmax; m++) { + unitk_lamda[0] = 0.0; + unitk_lamda[1] = 2.0*MY_PI*l; + unitk_lamda[2] = 2.0*MY_PI*m; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + sqk = unitk_lamda[1]*unitk_lamda[1] + unitk_lamda[2]*unitk_lamda[2]; + if (sqk <= gsqmx) { + kxvecs[kcount] = 0; + kyvecs[kcount] = l; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount]; + eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1]; + vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2]; + kcount++; + } + } + } + + // (0,0,m) + + for (m = 1; m <= kmax; m++) { + unitk_lamda[0] = 0.0; + unitk_lamda[1] = 0.0; + unitk_lamda[2] = 2.0*MY_PI*m; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + sqk = unitk_lamda[2]*unitk_lamda[2]; + if (sqk <= gsqmx) { + kxvecs[kcount] = 0; + kyvecs[kcount] = 0; + kzvecs[kcount] = m; + ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; + eg[kcount][0] = 0.0; + eg[kcount][1] = 0.0; + eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount]; + vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); + vg[kcount][0] = 1.0; + vg[kcount][1] = 1.0; + vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2]; + vg[kcount][3] = 0.0; + vg[kcount][4] = 0.0; + vg[kcount][5] = 0.0; + kcount++; + } + } +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors +------------------------------------------------------------------------- */ + +void Ewald::allocate() +{ + kxvecs = new int[kmax3d]; + kyvecs = new int[kmax3d]; + kzvecs = new int[kmax3d]; + + ug = new double[kmax3d]; + memory->create(eg,kmax3d,3,"ewald:eg"); + memory->create(vg,kmax3d,6,"ewald:vg"); + + sfacrl = new double[kmax3d]; + sfacim = new double[kmax3d]; + sfacrl_all = new double[kmax3d]; + sfacim_all = new double[kmax3d]; +} + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors +------------------------------------------------------------------------- */ + +void Ewald::deallocate() +{ + delete [] kxvecs; + delete [] kyvecs; + delete [] kzvecs; + + delete [] ug; + memory->destroy(eg); + memory->destroy(vg); + + delete [] sfacrl; + delete [] sfacim; + delete [] sfacrl_all; + delete [] sfacim_all; +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void Ewald::slabcorr() +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); +} + +/* ---------------------------------------------------------------------- + memory usage of local arrays +------------------------------------------------------------------------- */ + +double Ewald::memory_usage() +{ + double bytes = 3 * kmax3d * sizeof(int); + bytes += (1 + 3 + 6) * kmax3d * sizeof(double); + bytes += 4 * kmax3d * sizeof(double); + bytes += nmax*3 * sizeof(double); + bytes += 2 * (2*kmax+1)*3*nmax * sizeof(double); + return bytes; +} + +/* ---------------------------------------------------------------------- + group-group interactions + ------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + compute the Ewald total long-range force and energy for groups A and B + ------------------------------------------------------------------------- */ + +void Ewald::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag) +{ + if (slabflag && triclinic) + error->all(FLERR,"Cannot (yet) use K-space slab " + "correction with compute group/group for triclinic systems"); + + int i,k; + + if (!group_allocate_flag) { + allocate_groups(); + group_allocate_flag = 1; + } + + e2group = 0.0; //energy + f2group[0] = 0.0; //force in x-direction + f2group[1] = 0.0; //force in y-direction + f2group[2] = 0.0; //force in z-direction + + // partial and total structure factors for groups A and B + + for (k = 0; k < kcount; k++) { + + // group A + + sfacrl_A[k] = 0.0; + sfacim_A[k] = 0.0; + sfacrl_A_all[k] = 0.0; + sfacim_A_all[k] = 0; + + // group B + + sfacrl_B[k] = 0.0; + sfacim_B[k] = 0.0; + sfacrl_B_all[k] = 0.0; + sfacim_B_all[k] = 0.0; + } + + double *q = atom->q; + int nlocal = atom->nlocal; + int *mask = atom->mask; + + int kx,ky,kz; + double cypz,sypz,exprl,expim; + + // partial structure factors for groups A and B on each processor + + for (k = 0; k < kcount; k++) { + kx = kxvecs[k]; + ky = kyvecs[k]; + kz = kzvecs[k]; + + for (i = 0; i < nlocal; i++) { + + if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) + if (AA_flag) continue; + + if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { + + cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; + sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; + exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; + expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; + + // group A + + if (mask[i] & groupbit_A) { + sfacrl_A[k] += q[i]*exprl; + sfacim_A[k] += q[i]*expim; + } + + // group B + + if (mask[i] & groupbit_B) { + sfacrl_B[k] += q[i]*exprl; + sfacim_B[k] += q[i]*expim; + } + } + } + } + + // total structure factor by summing over procs + + MPI_Allreduce(sfacrl_A,sfacrl_A_all,kcount,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(sfacim_A,sfacim_A_all,kcount,MPI_DOUBLE,MPI_SUM,world); + + MPI_Allreduce(sfacrl_B,sfacrl_B_all,kcount,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(sfacim_B,sfacim_B_all,kcount,MPI_DOUBLE,MPI_SUM,world); + + const double qscale = force->qqrd2e * scale; + double partial_group; + + // total group A <--> group B energy + // self and boundary correction terms are in compute_group_group.cpp + + for (k = 0; k < kcount; k++) { + partial_group = sfacrl_A_all[k]*sfacrl_B_all[k] + + sfacim_A_all[k]*sfacim_B_all[k]; + e2group += ug[k]*partial_group; + } + + e2group *= qscale; + + // total group A <--> group B force + + for (k = 0; k < kcount; k++) { + partial_group = sfacim_A_all[k]*sfacrl_B_all[k] - + sfacrl_A_all[k]*sfacim_B_all[k]; + f2group[0] += eg[k][0]*partial_group; + f2group[1] += eg[k][1]*partial_group; + if (slabflag != 2) f2group[2] += eg[k][2]*partial_group; + } + + f2group[0] *= qscale; + f2group[1] *= qscale; + f2group[2] *= qscale; + + // 2d slab correction + + if (slabflag == 1) + slabcorr_groups(groupbit_A, groupbit_B, AA_flag); +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void Ewald::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag) +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + double qsum_A = 0.0; + double qsum_B = 0.0; + double dipole_A = 0.0; + double dipole_B = 0.0; + double dipole_r2_A = 0.0; + double dipole_r2_B = 0.0; + + for (int i = 0; i < nlocal; i++) { + if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) + if (AA_flag) continue; + + if (mask[i] & groupbit_A) { + qsum_A += q[i]; + dipole_A += q[i]*x[i][2]; + dipole_r2_A += q[i]*x[i][2]*x[i][2]; + } + + if (mask[i] & groupbit_B) { + qsum_B += q[i]; + dipole_B += q[i]*x[i][2]; + dipole_r2_B += q[i]*x[i][2]*x[i][2]; + } + } + + // sum local contributions to get total charge and global dipole moment + // for each group + + double tmp; + MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum_A = tmp; + + MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum_B = tmp; + + MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_A = tmp; + + MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_B = tmp; + + MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2_A = tmp; + + MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2_B = tmp; + + // compute corrections + + const double qscale = force->qqrd2e * scale; + const double efact = qscale * MY_2PI/volume; + + e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B + + qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0); + + // add on force corrections + + const double ffact = qscale * (-4.0*MY_PI/volume); + f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A); +} + +/* ---------------------------------------------------------------------- + allocate group-group memory that depends on # of K-vectors +------------------------------------------------------------------------- */ + +void Ewald::allocate_groups() +{ + // group A + + sfacrl_A = new double[kmax3d]; + sfacim_A = new double[kmax3d]; + sfacrl_A_all = new double[kmax3d]; + sfacim_A_all = new double[kmax3d]; + + // group B + + sfacrl_B = new double[kmax3d]; + sfacim_B = new double[kmax3d]; + sfacrl_B_all = new double[kmax3d]; + sfacim_B_all = new double[kmax3d]; +} + +/* ---------------------------------------------------------------------- + deallocate group-group memory that depends on # of K-vectors +------------------------------------------------------------------------- */ + +void Ewald::deallocate_groups() +{ + // group A + + delete [] sfacrl_A; + delete [] sfacim_A; + delete [] sfacrl_A_all; + delete [] sfacim_A_all; + + // group B + + delete [] sfacrl_B; + delete [] sfacim_B; + delete [] sfacrl_B_all; + delete [] sfacim_B_all; +} diff --git a/src/KSPACE/ewald_disp.cpp b/src/KSPACE/ewald_disp.cpp index ba88e40f14..39951b8b0c 100644 --- a/src/KSPACE/ewald_disp.cpp +++ b/src/KSPACE/ewald_disp.cpp @@ -1,1475 +1,1475 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Pieter in 't Veld (SNL), Stan Moore (SNL) -------------------------------------------------------------------------- */ - -#include "mpi.h" -#include "string.h" -#include "stdio.h" -#include "stdlib.h" -#include "math.h" -#include "ewald_disp.h" -#include "math_vector.h" -#include "math_const.h" -#include "math_special.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "pair.h" -#include "domain.h" -#include "memory.h" -#include "error.h" -#include "update.h" - -using namespace LAMMPS_NS; -using namespace MathConst; -using namespace MathSpecial; - -#define SMALL 0.00001 - -enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER}; // same as in pair.h - -//#define DEBUG - -/* ---------------------------------------------------------------------- */ - -EwaldDisp::EwaldDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) -{ - if (narg!=1) error->all(FLERR,"Illegal kspace_style ewald/n command"); - - ewaldflag = dispersionflag = dipoleflag = 1; - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - memset(function, 0, EWALD_NORDER*sizeof(int)); - kenergy = kvirial = NULL; - cek_local = cek_global = NULL; - ekr_local = NULL; - hvec = NULL; - kvec = NULL; - B = NULL; - first_output = 0; - energy_self_peratom = NULL; - virial_self_peratom = NULL; - nmax = 0; - q2 = 0; - b2 = 0; - M2 = 0; -} - -/* ---------------------------------------------------------------------- */ - -EwaldDisp::~EwaldDisp() -{ - deallocate(); - deallocate_peratom(); - delete [] ekr_local; - delete [] B; -} - -/* --------------------------------------------------------------------- */ - -void EwaldDisp::init() -{ - nkvec = nkvec_max = nevec = nevec_max = 0; - nfunctions = nsums = sums = 0; - nbox = -1; - bytes = 0.0; - - if (!comm->me) { - if (screen) fprintf(screen,"EwaldDisp initialization ...\n"); - if (logfile) fprintf(logfile,"EwaldDisp initialization ...\n"); - } - - triclinic_check(); - if (domain->dimension == 2) - error->all(FLERR,"Cannot use EwaldDisp with 2d simulation"); - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with EwaldDisp"); - if (slabflag == 1) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab EwaldDisp"); - } - - scale = 1.0; - mumurd2e = force->qqrd2e; - dielectric = force->dielectric; - - int tmp; - Pair *pair = force->pair; - int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL; - double *cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL; - if (!(ptr||cutoff)) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - int ewald_order = ptr ? *((int *) ptr) : 1<<1; - int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC; - memset(function, 0, EWALD_NFUNCS*sizeof(int)); - for (int i=0; i<=EWALD_NORDER; ++i) // transcribe order - if (ewald_order&(1<all(FLERR, - "Unsupported mixing rule in kspace_style ewald/disp"); - default: - error->all(FLERR,"Unsupported order in kspace_style ewald/disp"); - } - nfunctions += function[k] = 1; - nsums += n[k]; - } - - if (!gewaldflag) g_ewald = 0.0; - pair->init(); // so B is defined - init_coeffs(); - init_coeff_sums(); - - double qsum, qsqsum, bsbsum; - qsum = qsqsum = bsbsum = 0.0; - if (function[0]) { - qsum = sum[0].x; - qsqsum = sum[0].x2; - } - - // turn off coulombic if no charge - - if (function[0] && qsqsum == 0.0) { - function[0] = 0; - nfunctions -= 1; - nsums -= 1; - } - - if (function[1]) bsbsum = sum[1].x2; - if (function[2]) bsbsum = sum[2].x2; - - if (function[3]) M2 = sum[9].x2; - - if (function[3] && strcmp(update->unit_style,"electron") == 0) - error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles"); - - if (qsqsum == 0.0 && bsbsum == 0.0 && M2 == 0.0) - error->all(FLERR,"Cannot use Ewald/disp solver " - "on system with no charge, dipole, or LJ particles"); - if (fabs(qsum) > SMALL && comm->me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - - if (!function[1] && !function[2]) - dispersionflag = 0; - - if (!function[3]) - dipoleflag = 0; - - pair_check(); - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - // setup K-space resolution - - q2 = qsqsum * force->qqrd2e; - M2 *= mumurd2e; - b2 = bsbsum; //Are these units right? - bigint natoms = atom->natoms; - - if (!gewaldflag) { - if (function[0]) { - g_ewald = accuracy*sqrt(natoms*(*cutoff)*shape_det(domain->h)) / (2.0*q2); - if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/(*cutoff); - else g_ewald = sqrt(-log(g_ewald)) / (*cutoff); - } - else if (function[1] || function[2]) { - //Try Newton Solver - //Use old method to get guess - g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff; - - double g_ewald_new = - NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),b2); - if (g_ewald_new > 0.0) g_ewald = g_ewald_new; - else error->warning(FLERR,"Ewald/disp Newton solver failed, " - "using old method to estimate g_ewald"); - } else if (function[3]) { - //Try Newton Solver - //Use old method to get guess - g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff; - double g_ewald_new = - NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),M2); - if (g_ewald_new > 0.0) g_ewald = g_ewald_new; - else error->warning(FLERR,"Ewald/disp Newton solver failed, " - "using old method to estimate g_ewald"); - } - } - - if (!comm->me) { - if (screen) fprintf(screen, " G vector = %g\n", g_ewald); - if (logfile) fprintf(logfile, " G vector = %g\n", g_ewald); - } - - g_ewald_6 = g_ewald; - deallocate_peratom(); - peratom_allocate_flag = 0; -} - -/* ---------------------------------------------------------------------- - adjust EwaldDisp coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void EwaldDisp::setup() -{ - volume = shape_det(domain->h)*slab_volfactor; - memcpy(unit, domain->h_inv, sizeof(shape)); - shape_scalar_mult(unit, 2.0*MY_PI); - unit[2] /= slab_volfactor; - - // int nbox_old = nbox, nkvec_old = nkvec; - - if (accuracy >= 1) { - nbox = 0; - error->all(FLERR,"KSpace accuracy too low"); - } - - bigint natoms = atom->natoms; - double err; - int kxmax = 1; - int kymax = 1; - int kzmax = 1; - err = rms(kxmax,domain->h[0],natoms,q2,b2,M2); - while (err > accuracy) { - kxmax++; - err = rms(kxmax,domain->h[0],natoms,q2,b2,M2); - } - err = rms(kymax,domain->h[1],natoms,q2,b2,M2); - while (err > accuracy) { - kymax++; - err = rms(kymax,domain->h[1],natoms,q2,b2,M2); - } - err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2); - while (err > accuracy) { - kzmax++; - err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2); - } - nbox = MAX(kxmax,kymax); - nbox = MAX(nbox,kzmax); - double gsqxmx = unit[0]*unit[0]*kxmax*kxmax; - double gsqymx = unit[1]*unit[1]*kymax*kymax; - double gsqzmx = unit[2]*unit[2]*kzmax*kzmax; - gsqmx = MAX(gsqxmx,gsqymx); - gsqmx = MAX(gsqmx,gsqzmx); - gsqmx *= 1.00001; - - reallocate(); - coefficients(); - init_coeffs(); - init_coeff_sums(); - init_self(); - - if (!(first_output||comm->me)) { - first_output = 1; - if (screen) fprintf(screen, - " vectors: nbox = %d, nkvec = %d\n", nbox, nkvec); - if (logfile) fprintf(logfile, - " vectors: nbox = %d, nkvec = %d\n", nbox, nkvec); - } -} - -/* ---------------------------------------------------------------------- - compute RMS accuracy for a dimension -------------------------------------------------------------------------- */ - -double EwaldDisp::rms(int km, double prd, bigint natoms, double q2, double b2, double M2) -{ - double value = 0.0; - - // Coulombic - - double g2 = g_ewald*g_ewald; - - value += 2.0*q2*g_ewald/prd * - sqrt(1.0/(MY_PI*km*natoms)) * - exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)); - - // Lennard-Jones - - double g7 = g2*g2*g2*g_ewald; - - value += 4.0*b2*g7/3.0 * - sqrt(1.0/(MY_PI*natoms)) * - (exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)) * - (MY_PI*km/(g_ewald*prd) + 1)); - - // dipole - - value += 8.0*MY_PI*M2/volume*g_ewald * - sqrt(2.0*MY_PI*km*km*km/(15.0*natoms)) * - exp(-pow(MY_PI*km/(g_ewald*prd),2.0)); - - return value; -} - -void EwaldDisp::reallocate() -{ - int ix, iy, iz; - int nkvec_max = nkvec; - vector h; - - nkvec = 0; - int *kflag = new int[(nbox+1)*(2*nbox+1)*(2*nbox+1)]; - int *flag = kflag; - - for (ix=0; ix<=nbox; ++ix) - for (iy=-nbox; iy<=nbox; ++iy) - for (iz=-nbox; iz<=nbox; ++iz) - if (!(ix||iy||iz)) *(flag++) = 0; - else if ((!ix)&&(iy<0)) *(flag++) = 0; - else if ((!(ix||iy))&&(iz<0)) *(flag++) = 0; // use symmetry - else { - h[0] = unit[0]*ix; - h[1] = unit[5]*ix+unit[1]*iy; - h[2] = unit[4]*ix+unit[3]*iy+unit[2]*iz; - if ((*(flag++) = h[0]*h[0]+h[1]*h[1]+h[2]*h[2]<=gsqmx)) ++nkvec; - } - - if (nkvec>nkvec_max) { - deallocate(); // free memory - hvec = new hvector[nkvec]; // hvec - bytes += (nkvec-nkvec_max)*sizeof(hvector); - kvec = new kvector[nkvec]; // kvec - bytes += (nkvec-nkvec_max)*sizeof(kvector); - kenergy = new double[nkvec*nfunctions]; // kenergy - bytes += (nkvec-nkvec_max)*nfunctions*sizeof(double); - kvirial = new double[6*nkvec*nfunctions]; // kvirial - bytes += 6*(nkvec-nkvec_max)*nfunctions*sizeof(double); - cek_local = new complex[nkvec*nsums]; // cek_local - bytes += (nkvec-nkvec_max)*nsums*sizeof(complex); - cek_global = new complex[nkvec*nsums]; // cek_global - bytes += (nkvec-nkvec_max)*nsums*sizeof(complex); - nkvec_max = nkvec; - } - - flag = kflag; // create index and - kvector *k = kvec; // wave vectors - hvector *hi = hvec; - for (ix=0; ix<=nbox; ++ix) - for (iy=-nbox; iy<=nbox; ++iy) - for (iz=-nbox; iz<=nbox; ++iz) - if (*(flag++)) { - hi->x = unit[0]*ix; - hi->y = unit[5]*ix+unit[1]*iy; - (hi++)->z = unit[4]*ix+unit[3]*iy+unit[2]*iz; - k->x = ix+nbox; k->y = iy+nbox; (k++)->z = iz+nbox; } - - delete [] kflag; -} - - -void EwaldDisp::reallocate_atoms() -{ - if (eflag_atom || vflag_atom) - if (atom->nlocal > nmax) { - deallocate_peratom(); - allocate_peratom(); - nmax = atom->nmax; - } - - if ((nevec = atom->nmax*(2*nbox+1))<=nevec_max) return; - delete [] ekr_local; - ekr_local = new cvector[nevec]; - bytes += (nevec-nevec_max)*sizeof(cvector); - nevec_max = nevec; -} - - -void EwaldDisp::allocate_peratom() -{ - memory->create(energy_self_peratom, - atom->nmax,EWALD_NFUNCS,"ewald/n:energy_self_peratom"); - memory->create(virial_self_peratom, - atom->nmax,EWALD_NFUNCS,"ewald/n:virial_self_peratom"); -} - - -void EwaldDisp::deallocate_peratom() // free memory -{ - memory->destroy(energy_self_peratom); - memory->destroy(virial_self_peratom); -} - - -void EwaldDisp::deallocate() // free memory -{ - delete [] hvec; hvec = NULL; - delete [] kvec; kvec = NULL; - delete [] kenergy; kenergy = NULL; - delete [] kvirial; kvirial = NULL; - delete [] cek_local; cek_local = NULL; - delete [] cek_global; cek_global = NULL; -} - - -void EwaldDisp::coefficients() -{ - vector h; - hvector *hi = hvec, *nh; - double eta2 = 0.25/(g_ewald*g_ewald); - double b1, b2, expb2, h1, h2, c1, c2; - double *ke = kenergy, *kv = kvirial; - int func0 = function[0], func12 = function[1]||function[2], - func3 = function[3]; - - for (nh = (hi = hvec)+nkvec; hintypes; - - if (function[1]) { // geometric 1/r^6 - double **b = (double **) force->pair->extract("B",tmp); - delete [] B; - B = new double[n+1]; - bytes += (n+1)*sizeof(double); - for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i])); - } - if (function[2]) { // arithmetic 1/r^6 - double **epsilon = (double **) force->pair->extract("epsilon",tmp); - double **sigma = (double **) force->pair->extract("sigma",tmp); - double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7]; - double c[7] = { - 1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0}; - - if (!(epsilon&&sigma)) - error->all( - FLERR,"Epsilon or sigma reference not set by pair style in ewald/n"); - for (int i=0; i<=n; ++i) { - eps_i = sqrt(epsilon[i][i]); - sigma_i = sigma[i][i]; - sigma_n = 1.0; - for (int j=0; j<7; ++j) { - *(bi++) = sigma_n*eps_i*c[j]; sigma_n *= sigma_i; - } - } - } -} - -void EwaldDisp::init_coeff_sums() -{ - if (sums) return; // calculated only once - sums = 1; - - Sum sum_local[EWALD_MAX_NSUMS]; - - memset(sum_local, 0, EWALD_MAX_NSUMS*sizeof(Sum)); - if (function[0]) { // 1/r - double *q = atom->q, *qn = q+atom->nlocal; - for (double *i=q; itype, *ntype = type+atom->nlocal; - for (int *i=type; itype, *ntype = type+atom->nlocal; - for (int *i=type; imu) { // dipole - double *mu = atom->mu[0], *nmu = mu+4*atom->nlocal; - for (double *i = mu; i < nmu; i += 4) - sum_local[9].x2 += i[3]*i[3]; - } - MPI_Allreduce(sum_local, sum, 2*EWALD_MAX_NSUMS, MPI_DOUBLE, MPI_SUM, world); -} - - -void EwaldDisp::init_self() -{ - double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2; - const double qscale = force->qqrd2e * scale; - - memset(energy_self, 0, EWALD_NFUNCS*sizeof(double)); // self energy - memset(virial_self, 0, EWALD_NFUNCS*sizeof(double)); - - if (function[0]) { // 1/r - virial_self[0] = -0.5*MY_PI*qscale/(g2*volume)*sum[0].x*sum[0].x; - energy_self[0] = sum[0].x2*qscale*g1/MY_PIS-virial_self[0]; - } - if (function[1]) { // geometric 1/r^6 - virial_self[1] = MY_PI*MY_PIS*g3/(6.0*volume)*sum[1].x*sum[1].x; - energy_self[1] = -sum[1].x2*g3*g3/12.0+virial_self[1]; - } - if (function[2]) { // arithmetic 1/r^6 - virial_self[2] = MY_PI*MY_PIS*g3/(48.0*volume)*(sum[2].x*sum[8].x+ - sum[3].x*sum[7].x+sum[4].x*sum[6].x+0.5*sum[5].x*sum[5].x); - energy_self[2] = -sum[2].x2*g3*g3/3.0+virial_self[2]; - } - if (function[3]) { // dipole - virial_self[3] = 0; // in surface - energy_self[3] = sum[9].x2*mumurd2e*2.0*g3/3.0/MY_PIS-virial_self[3]; - } -} - - -void EwaldDisp::init_self_peratom() -{ - if (!(vflag_atom || eflag_atom)) return; - - double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2; - const double qscale = force->qqrd2e * scale; - double *energy = energy_self_peratom[0]; - double *virial = virial_self_peratom[0]; - int nlocal = atom->nlocal; - - memset(energy, 0, EWALD_NFUNCS*nlocal*sizeof(double)); - memset(virial, 0, EWALD_NFUNCS*nlocal*sizeof(double)); - - if (function[0]) { // 1/r - double *ei = energy; - double *vi = virial; - double ce = qscale*g1/MY_PIS; - double cv = -0.5*MY_PI*qscale/(g2*volume); - double *qi = atom->q, *qn = qi + nlocal; - for (; qi < qn; qi++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { - double q = *qi; - *vi = cv*q*sum[0].x; - *ei = ce*q*q-vi[0]; - } - } - if (function[1]) { // geometric 1/r^6 - double *ei = energy+1; - double *vi = virial+1; - double ce = -g3*g3/12.0; - double cv = MY_PI*MY_PIS*g3/(6.0*volume); - int *typei = atom->type, *typen = typei + atom->nlocal; - for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { - double b = B[*typei]; - *vi = cv*b*sum[1].x; - *ei = ce*b*b+vi[0]; - } - } - if (function[2]) { // arithmetic 1/r^6 - double *bi; - double *ei = energy+2; - double *vi = virial+2; - double ce = -g3*g3/3.0; - double cv = 0.5*MY_PI*MY_PIS*g3/(48.0*volume); - int *typei = atom->type, *typen = typei + atom->nlocal; - for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { - bi = B+7*typei[0]+7; - for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(--bi)[0]; - - /* PJV 20120225: - should this be this instead? above implies an inverse dependence - seems to be the above way in original; i recall having tested - arithmetic mixing in the conception phase, but an extra test would - be prudent (pattern repeats in multiple functions below) - - bi = B+7*typei[0]; - for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(bi++)[0]; - - */ - - *ei = ce*bi[0]*bi[6]+vi[0]; - } - } - if (function[3]&&atom->mu) { // dipole - double *ei = energy+3; - double *vi = virial+3; - double *imu = atom->mu[0], *nmu = imu+4*atom->nlocal; - double ce = mumurd2e*2.0*g3/3.0/MY_PIS; - for (; imu < nmu; imu += 4, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { - *vi = 0; // in surface - *ei = ce*imu[3]*imu[3]-vi[0]; - } - } -} - - -/* ---------------------------------------------------------------------- - compute the EwaldDisp long-range force, energy, virial -------------------------------------------------------------------------- */ - -void EwaldDisp::compute(int eflag, int vflag) -{ - if (!nbox) return; - - // set energy/virial flags - // invoke allocate_peratom() if needed for first time - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; - - if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) { - allocate_peratom(); - peratom_allocate_flag = 1; - nmax = atom->nmax; - } - - reallocate_atoms(); - init_self_peratom(); - compute_ek(); - compute_force(); - //compute_surface(); // assume conducting metal (tinfoil) boundary conditions - compute_energy(); - compute_energy_peratom(); - compute_virial(); - compute_virial_dipole(); - compute_virial_peratom(); -} - - -void EwaldDisp::compute_ek() -{ - cvector *ekr = ekr_local; - int lbytes = (2*nbox+1)*sizeof(cvector); - hvector *h = NULL; - kvector *k, *nk = kvec+nkvec; - cvector *z = new cvector[2*nbox+1]; - cvector z1, *zx, *zy, *zz, *zn = z+2*nbox; - complex *cek, zxyz, zxy = COMPLEX_NULL, cx = COMPLEX_NULL; - vector mui; - double *x = atom->x[0], *xn = x+3*atom->nlocal, *q = atom->q, qi = 0.0; - double bi = 0.0, ci[7]; - double *mu = atom->mu ? atom->mu[0] : NULL; - int i, kx, ky, n = nkvec*nsums, *type = atom->type, tri = domain->triclinic; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - memset(cek_local, 0, n*sizeof(complex)); // reset sums - while (xx, 1, 0); C_SET(zz->y, 1, 0); C_SET(zz->z, 1, 0); // z[0] - if (tri) { // triclinic z[1] - C_ANGLE(z1.x, unit[0]*x[0]+unit[5]*x[1]+unit[4]*x[2]); - C_ANGLE(z1.y, unit[1]*x[1]+unit[3]*x[2]); - C_ANGLE(z1.z, x[2]*unit[2]); x += 3; - } - else { // orthogonal z[1] - C_ANGLE(z1.x, *(x++)*unit[0]); - C_ANGLE(z1.y, *(x++)*unit[1]); - C_ANGLE(z1.z, *(x++)*unit[2]); - } - for (; zzx, zz->x, z1.x); // 3D k-vector - C_RMULT(zy->y, zz->y, z1.y); C_CONJ(zx->y, zy->y); - C_RMULT(zy->z, zz->z, z1.z); C_CONJ(zx->z, zy->z); - } - kx = ky = -1; - cek = cek_local; - if (func[0]) qi = *(q++); - if (func[1]) bi = B[*type]; - if (func[2]) memcpy(ci, B+7*type[0], 7*sizeof(double)); - if (func[3]) { - memcpy(mui, mu, sizeof(vector)); - mu += 4; - h = hvec; - } - for (k=kvec; ky) { // based on order in - if (kx!=k->x) cx = z[kx = k->x].x; // reallocate - C_RMULT(zxy, z[ky = k->y].y, cx); - } - C_RMULT(zxyz, z[k->z].z, zxy); - if (func[0]) { - cek->re += zxyz.re*qi; (cek++)->im += zxyz.im*qi; - } - if (func[1]) { - cek->re += zxyz.re*bi; (cek++)->im += zxyz.im*bi; - } - if (func[2]) for (i=0; i<7; ++i) { - cek->re += zxyz.re*ci[i]; (cek++)->im += zxyz.im*ci[i]; - } - if (func[3]) { - register double muk = mui[0]*h->x+mui[1]*h->y+mui[2]*h->z; ++h; - cek->re += zxyz.re*muk; (cek++)->im += zxyz.im*muk; - } - } - ekr = (cvector *) ((char *) memcpy(ekr, z, lbytes)+lbytes); - ++type; - } - MPI_Allreduce(cek_local, cek_global, 2*n, MPI_DOUBLE, MPI_SUM, world); - - delete [] z; -} - - -void EwaldDisp::compute_force() -{ - kvector *k; - hvector *h, *nh; - cvector *z = ekr_local; - vector sum[EWALD_MAX_NSUMS], mui = COMPLEX_NULL; - complex *cek, zc, zx = COMPLEX_NULL, zxy = COMPLEX_NULL; - complex *cek_coul; - double *f = atom->f[0], *fn = f+3*atom->nlocal, *q = atom->q, *t = NULL; - double *mu = atom->mu ? atom->mu[0] : NULL; - const double qscale = force->qqrd2e * scale; - double *ke, c[EWALD_NFUNCS] = { - 8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume}; - double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3]; - int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; - int func[EWALD_NFUNCS]; - - if (atom->torque) t = atom->torque[0]; - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - memset(sum, 0, EWALD_MAX_NSUMS*sizeof(vector)); // fj = -dE/dr = - for (; fy) { // based on order in - if (kx!=k->x) zx = z[kx = k->x].x; // reallocate - C_RMULT(zxy, z[ky = k->y].y, zx); - } - C_CRMULT(zc, z[k->z].z, zxy); - if (func[0]) { // 1/r - register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); - if (func[3]) cek_coul = cek; - ++cek; - sum[0][0] += h->x*im; sum[0][1] += h->y*im; sum[0][2] += h->z*im; - } - if (func[1]) { // geometric 1/r^6 - register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); ++cek; - sum[1][0] += h->x*im; sum[1][1] += h->y*im; sum[1][2] += h->z*im; - } - if (func[2]) { // arithmetic 1/r^6 - register double im, c = *(ke++); - for (i=2; i<9; ++i) { - im = c*(zc.im*cek->re+cek->im*zc.re); ++cek; - sum[i][0] += h->x*im; sum[i][1] += h->y*im; sum[i][2] += h->z*im; - } - } - if (func[3]) { // dipole - register double im = *(ke)*(zc.im*cek->re+ - cek->im*zc.re)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); - register double im2 = *(ke)*(zc.re*cek->re- - cek->im*zc.im); - sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im; - t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2; // torque - t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2; - t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2; - if (func[0]) { // charge-dipole - register double qi = *(q)*c[0]; - im = - *(ke)*(zc.re*cek_coul->re - - cek_coul->im*zc.im)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); - im += *(ke)*(zc.re*cek->re - cek->im*zc.im)*qi; - sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im; - - im2 = *(ke)*(zc.re*cek_coul->im + cek_coul->re*zc.im); - im2 += -*(ke)*(zc.re*cek->im - cek->im*zc.re); - t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2; // torque - t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2; - t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2; - } - ++cek; - ke++; - } - } - if (func[0]) { // 1/r - register double qi = *(q++)*c[0]; - f[0] -= sum[0][0]*qi; f[1] -= sum[0][1]*qi; f[2] -= sum[0][2]*qi; - } - if (func[1]) { // geometric 1/r^6 - register double bi = B[*type]*c[1]; - f[0] -= sum[1][0]*bi; f[1] -= sum[1][1]*bi; f[2] -= sum[1][2]*bi; - } - if (func[2]) { // arithmetic 1/r^6 - register double *bi = B+7*type[0]+7; - for (i=2; i<9; ++i) { - register double c2 = (--bi)[0]*c[2]; - f[0] -= sum[i][0]*c2; f[1] -= sum[i][1]*c2; f[2] -= sum[i][2]*c2; - } - } - if (func[3]) { // dipole - f[0] -= sum[9][0]; f[1] -= sum[9][1]; f[2] -= sum[9][2]; - } - z = (cvector *) ((char *) z+lbytes); - ++type; - t += 3; - } -} - - -void EwaldDisp::compute_surface() -{ - // assume conducting metal (tinfoil) boundary conditions, so this function is - // not called because dielectric at the boundary --> infinity, which makes all - // the terms here zero. - - if (!function[3]) return; - if (!atom->mu) return; - - vector sum_local = VECTOR_NULL, sum_total; - memset(sum_local, 0, sizeof(vector)); - double *i, *n, *mu = atom->mu[0]; - - for (n = (i = mu) + 4*atom->nlocal; i < n; ++i) { - sum_local[0] += (i++)[0]; - sum_local[1] += (i++)[0]; - sum_local[2] += (i++)[0]; - } - MPI_Allreduce(sum_local, sum_total, 3, MPI_DOUBLE, MPI_SUM, world); - - virial_self[3] = - mumurd2e*(2.0*MY_PI*vec_dot(sum_total,sum_total)/(2.0*dielectric+1)/volume); - energy_self[3] -= virial_self[3]; - - if (!(vflag_atom || eflag_atom)) return; - - double *ei = energy_self_peratom[0]+3; - double *vi = virial_self_peratom[0]+3; - double cv = 2.0*mumurd2e*MY_PI/(2.0*dielectric+1)/volume; - - for (i = mu; i < n; i += 4, ei += EWALD_NFUNCS, vi += EWALD_NFUNCS) { - *vi = cv*(i[0]*sum_total[0]+i[1]*sum_total[1]+i[2]*sum_total[2]); - *ei -= *vi; - } -} - - -void EwaldDisp::compute_energy() -{ - energy = 0.0; - if (!eflag_global) return; - - complex *cek = cek_global; - complex *cek_coul; - double *ke = kenergy; - const double qscale = force->qqrd2e * scale; - double c[EWALD_NFUNCS] = { - 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; - double sum[EWALD_NFUNCS]; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - memset(sum, 0, EWALD_NFUNCS*sizeof(double)); // reset sums - for (int k=0; kre*cek->re+cek->im*cek->im); - if (func[3]) cek_coul = cek; - ++cek; - } - if (func[1]) { // geometric 1/r^6 - sum[1] += *(ke++)*(cek->re*cek->re+cek->im*cek->im); ++cek; } - if (func[2]) { // arithmetic 1/r^6 - register double r = - (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+ - (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+ - (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+ - 0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7; - sum[2] += *(ke++)*r; - } - if (func[3]) { // dipole - sum[3] += *(ke)*(cek->re*cek->re+cek->im*cek->im); - if (func[0]) { // charge-dipole - sum[3] += *(ke)*2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re); - } - ke++; - ++cek; - } - } - for (int k=0; kq; - double *eatomj = eatom; - double *mu = atom->mu ? atom->mu[0] : NULL; - const double qscale = force->qqrd2e * scale; - double *ke = kenergy; - double c[EWALD_NFUNCS] = { - 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; - int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - for (int j = 0; j < atom->nlocal; j++, ++eatomj) { - k = kvec; - kx = ky = -1; - ke = kenergy; - cek = cek_global; - memset(sum, 0, EWALD_MAX_NSUMS*sizeof(double)); - if (func[3]) { - register double di = c[3]; - mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; - mu++; - } - for (nh = (h = hvec)+nkvec; hy) { // based on order in - if (kx!=k->x) zx = z[kx = k->x].x; // reallocate - C_RMULT(zxy, z[ky = k->y].y, zx); - } - C_CRMULT(zc, z[k->z].z, zxy); - if (func[0]) { // 1/r - sum[0] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); - if (func[3]) cek_coul = cek; - ++cek; - } - if (func[1]) { // geometric 1/r^6 - sum[1] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); ++cek; } - if (func[2]) { // arithmetic 1/r^6 - register double im, c = *(ke++); - for (i=2; i<9; ++i) { - im = c*(cek->re*zc.re - cek->im*zc.im); ++cek; - sum[i] += im; - } - } - if (func[3]) { // dipole - double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); - sum[9] += *(ke)*(cek->re*zc.re - cek->im*zc.im)*muk; - if (func[0]) { // charge-dipole - register double qj = *(q)*c[0]; - sum[9] += *(ke)*(cek_coul->im*zc.re + cek_coul->re*zc.im)*muk; - sum[9] -= *(ke)*(cek->re*zc.im + cek->im*zc.re)*qj; - } - ++cek; - ke++; - } - } - - if (func[0]) { // 1/r - register double qj = *(q++)*c[0]; - *eatomj += sum[0]*qj - energy_self_peratom[j][0]; - } - if (func[1]) { // geometric 1/r^6 - register double bj = B[*type]*c[1]; - *eatomj += sum[1]*bj - energy_self_peratom[j][1]; - } - if (func[2]) { // arithmetic 1/r^6 - register double *bj = B+7*type[0]+7; - for (i=2; i<9; ++i) { - register double c2 = (--bj)[0]*c[2]; - *eatomj += 0.5*sum[i]*c2; - } - *eatomj -= energy_self_peratom[j][2]; - } - if (func[3]) { // dipole - *eatomj += sum[9] - energy_self_peratom[j][3]; - } - z = (cvector *) ((char *) z+lbytes); - ++type; - } -} - - -#define swap(a, b) { register double t = a; a= b; b = t; } - -void EwaldDisp::compute_virial() -{ - memset(virial, 0, sizeof(shape)); - if (!vflag_global) return; - - complex *cek = cek_global; - complex *cek_coul; - double *kv = kvirial; - const double qscale = force->qqrd2e * scale; - double c[EWALD_NFUNCS] = { - 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; - shape sum[EWALD_NFUNCS]; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - memset(sum, 0, EWALD_NFUNCS*sizeof(shape)); - for (int k=0; kre*cek->re+cek->im*cek->im; - if (func[3]) cek_coul = cek; - ++cek; - sum[0][0] += *(kv++)*r; sum[0][1] += *(kv++)*r; sum[0][2] += *(kv++)*r; - sum[0][3] += *(kv++)*r; sum[0][4] += *(kv++)*r; sum[0][5] += *(kv++)*r; - } - if (func[1]) { // geometric 1/r^6 - register double r = cek->re*cek->re+cek->im*cek->im; ++cek; - sum[1][0] += *(kv++)*r; sum[1][1] += *(kv++)*r; sum[1][2] += *(kv++)*r; - sum[1][3] += *(kv++)*r; sum[1][4] += *(kv++)*r; sum[1][5] += *(kv++)*r; - } - if (func[2]) { // arithmetic 1/r^6 - register double r = - (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+ - (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+ - (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+ - 0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7; - sum[2][0] += *(kv++)*r; sum[2][1] += *(kv++)*r; sum[2][2] += *(kv++)*r; - sum[2][3] += *(kv++)*r; sum[2][4] += *(kv++)*r; sum[2][5] += *(kv++)*r; - } - if (func[3]) { - register double r = cek->re*cek->re+cek->im*cek->im; - sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r; - sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r; - if (func[0]) { // charge-dipole - kv -= 6; - register double r = 2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re); - sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r; - sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r; - } - ++cek; - } - } - for (int k=0; kmu ? atom->mu[0] : NULL; - double *vatomj = NULL; - if (vflag_atom && vatom) vatomj = vatom[0]; - const double qscale = force->qqrd2e * scale; - double *ke, c[EWALD_NFUNCS] = { - 8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume}; - double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3]; - int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - memset(&sum[0], 0, 6*sizeof(double)); - memset(&sum_total[0], 0, 6*sizeof(double)); - for (int j = 0; j < atom->nlocal; j++) { - k = kvec; - kx = ky = -1; - ke = kenergy; - cek = cek_global; - memset(&sum[0], 0, 6*sizeof(double)); - if (func[3]) { - register double di = c[3]; - mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; - mu++; - } - for (nh = (h = hvec)+nkvec; hy) { // based on order in - if (kx!=k->x) zx = z[kx = k->x].x; // reallocate - C_RMULT(zxy, z[ky = k->y].y, zx); - } - C_CRMULT(zc, z[k->z].z, zxy); - double im = 0.0; - if (func[0]) { // 1/r - ke++; - if (func[3]) cek_coul = cek; - ++cek; - } - if (func[1]) { // geometric 1/r^6 - ke++; - ++cek; - } - if (func[2]) { // arithmetic 1/r^6 - ke++; - for (i=2; i<9; ++i) { - ++cek; - } - } - if (func[3]) { // dipole - im = *(ke)*(zc.re*cek->re - cek->im*zc.im); - if (func[0]) { // charge-dipole - im += *(ke)*(zc.im*cek_coul->re + cek_coul->im*zc.re); - } - sum[0] -= mui[0]*h->x*im; - sum[1] -= mui[1]*h->y*im; - sum[2] -= mui[2]*h->z*im; - sum[3] -= mui[0]*h->y*im; - sum[4] -= mui[0]*h->z*im; - sum[5] -= mui[1]*h->z*im; - ++cek; - ke++; - } - } - - if (vflag_global) - for (int n = 0; n < 6; n++) - sum_total[n] -= sum[n]; - - if (vflag_atom) - for (int n = 0; n < 6; n++) - vatomj[n] -= sum[n]; - - z = (cvector *) ((char *) z+lbytes); - ++type; - if (vflag_atom) vatomj += 6; - } - - if (vflag_global) { - MPI_Allreduce(&sum_total[0],&sum[0],6,MPI_DOUBLE,MPI_SUM,world); - for (int n = 0; n < 6; n++) - virial[n] += sum[n]; - } - -} - -void EwaldDisp::compute_virial_peratom() -{ - if (!vflag_atom) return; - - kvector *k; - hvector *h, *nh; - cvector *z = ekr_local; - vector mui = VECTOR_NULL; - complex *cek, zc = COMPLEX_NULL, zx = COMPLEX_NULL, zxy = COMPLEX_NULL; - complex *cek_coul; - double *kv; - double *q = atom->q; - double *vatomj = vatom ? vatom[0] : NULL; - double *mu = atom->mu ? atom->mu[0] : NULL; - const double qscale = force->qqrd2e * scale; - double c[EWALD_NFUNCS] = { - 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), - 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; - shape sum[EWALD_MAX_NSUMS]; - int func[EWALD_NFUNCS]; - - memcpy(func, function, EWALD_NFUNCS*sizeof(int)); - int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; - for (int j = 0; j < atom->nlocal; j++) { - k = kvec; - kx = ky = -1; - kv = kvirial; - cek = cek_global; - memset(sum, 0, EWALD_MAX_NSUMS*sizeof(shape)); - if (func[3]) { - register double di = c[3]; - mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; - mu++; - } - for (nh = (h = hvec)+nkvec; hy) { // based on order in - if (kx!=k->x) zx = z[kx = k->x].x; // reallocate - C_RMULT(zxy, z[ky = k->y].y, zx); - } - C_CRMULT(zc, z[k->z].z, zxy); - if (func[0]) { // 1/r - if (func[3]) cek_coul = cek; - register double r = cek->re*zc.re - cek->im*zc.im; ++cek; - sum[0][0] += *(kv++)*r; - sum[0][1] += *(kv++)*r; - sum[0][2] += *(kv++)*r; - sum[0][3] += *(kv++)*r; - sum[0][4] += *(kv++)*r; - sum[0][5] += *(kv++)*r; - } - if (func[1]) { // geometric 1/r^6 - register double r = cek->re*zc.re - cek->im*zc.im; ++cek; - sum[1][0] += *(kv++)*r; - sum[1][1] += *(kv++)*r; - sum[1][2] += *(kv++)*r; - sum[1][3] += *(kv++)*r; - sum[1][4] += *(kv++)*r; - sum[1][5] += *(kv++)*r; - } - if (func[2]) { // arithmetic 1/r^6 - register double r; - for (i=2; i<9; ++i) { - r = cek->re*zc.re - cek->im*zc.im; ++cek; - sum[i][0] += *(kv++)*r; - sum[i][1] += *(kv++)*r; - sum[i][2] += *(kv++)*r; - sum[i][3] += *(kv++)*r; - sum[i][4] += *(kv++)*r; - sum[i][5] += *(kv++)*r; - kv -= 6; - } - kv += 6; - } - if (func[3]) { // dipole - double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); - register double - r = (cek->re*zc.re - cek->im*zc.im)*muk; - sum[9][0] += *(kv++)*r; - sum[9][1] += *(kv++)*r; - sum[9][2] += *(kv++)*r; - sum[9][3] += *(kv++)*r; - sum[9][4] += *(kv++)*r; - sum[9][5] += *(kv++)*r; - if (func[0]) { // charge-dipole - kv -= 6; - register double qj = *(q)*c[0]; - r = (cek_coul->im*zc.re + cek_coul->re*zc.im)*muk; - r += -(cek->re*zc.im + cek->im*zc.re)*qj; - sum[9][0] += *(kv++)*r; sum[9][1] += *(kv++)*r; sum[9][2] += *(kv++)*r; - sum[9][3] += *(kv++)*r; sum[9][4] += *(kv++)*r; sum[9][5] += *(kv++)*r; - } - ++cek; - } - } - - if (func[0]) { // 1/r - register double qi = *(q++)*c[0]; - for (int n = 0; n < 6; n++) vatomj[n] += sum[0][n]*qi; - } - if (func[1]) { // geometric 1/r^6 - register double bi = B[*type]*c[1]; - for (int n = 0; n < 6; n++) vatomj[n] += sum[1][n]*bi; - } - if (func[2]) { // arithmetic 1/r^6 - register double *bj = B+7*type[0]+7; - for (i=2; i<9; ++i) { - register double c2 = (--bj)[0]*c[2]; - for (int n = 0; n < 6; n++) vatomj[n] += 0.5*sum[i][n]*c2; - } - } - if (func[3]) { // dipole - for (int n = 0; n < 6; n++) vatomj[n] += sum[9][n]; - } - - for (int k=0; kq; - double **x = atom->x; - double zprd = domain->zprd; - int nlocal = atom->nlocal; - - double qsum = 0.0; - if (function[0]) qsum = sum[0].x; - - double dipole = 0.0; - for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; - - if (function[3] && atom->mu) { - double **mu = atom->mu; - for (int i = 0; i < nlocal; i++) dipole += mu[i][2]; - } - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // need to make non-neutral systems and/or - // per-atom energy translationally invariant - - double dipole_r2 = 0.0; - if (eflag_atom || fabs(qsum) > SMALL) { - - if (function[3] && atom->mu) - error->all(FLERR,"Cannot (yet) use kspace slab correction with " - "long-range dipoles and non-neutral systems or per-atom energy"); - - for (int i = 0; i < nlocal; i++) - dipole_r2 += q[i]*x[i][2]*x[i][2]; - - // sum local contributions - - double tmp; - MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2 = tmp; - } - - // compute corrections - - const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - - qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; - const double qscale = force->qqrd2e * scale; - - if (eflag_global) energy += qscale * e_slabcorr; - - // per-atom energy - - if (eflag_atom) { - double efact = qscale * MY_2PI/volume; - for (int i = 0; i < nlocal; i++) - eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + - qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); - } - - // add on force corrections - - double ffact = qscale * (-4.0*MY_PI/volume); - double **f = atom->f; - - for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); - - // add on torque corrections - - if (function[3] && atom->mu && atom->torque) { - double **mu = atom->mu; - double **torque = atom->torque; - for (int i = 0; i < nlocal; i++) { - torque[i][0] += ffact * dipole_all * mu[i][1]; - torque[i][1] += -ffact * dipole_all * mu[i][0]; - } - } -} - -/* ---------------------------------------------------------------------- - Newton solver used to find g_ewald for LJ systems - ------------------------------------------------------------------------- */ - -double EwaldDisp::NewtonSolve(double x, double Rc, - bigint natoms, double vol, double b2) -{ - double dx,tol; - int maxit; - - maxit = 10000; //Maximum number of iterations - tol = 0.00001; //Convergence tolerance - - //Begin algorithm - - for (int i = 0; i < maxit; i++) { - dx = f(x,Rc,natoms,vol,b2) / derivf(x,Rc,natoms,vol,b2); - x = x - dx; //Update x - if (fabs(dx) < tol) return x; - if (x < 0 || x != x) // solver failed - return -1; - } - return -1; -} - -/* ---------------------------------------------------------------------- - Calculate f(x) - ------------------------------------------------------------------------- */ - -double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2) -{ - double a = Rc*x; - double f = 0.0; - - if (function[1] || function[2]) { // LJ - f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) * - (6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy); - } else { // dipole - double rg2 = a*a; - double rg4 = rg2*rg2; - double rg6 = rg4*rg2; - double Cc = 4.0*rg4 + 6.0*rg2 + 3.0; - double Dc = 8.0*rg6 + 20.0*rg4 + 30.0*rg2 + 15.0; - f = (b2/(sqrt(vol*powint(x,4)*powint(Rc,9)*natoms)) * - sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) * - exp(-rg2)) - accuracy; - } - - return f; -} - -/* ---------------------------------------------------------------------- - Calculate numerical derivative f'(x) - ------------------------------------------------------------------------- */ - -double EwaldDisp::derivf(double x, double Rc, - bigint natoms, double vol, double b2) -{ - double h = 0.000001; //Derivative step-size - return (f(x + h,Rc,natoms,vol,b2) - f(x,Rc,natoms,vol,b2)) / h; -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Pieter in 't Veld (SNL), Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "mpi.h" +#include "string.h" +#include "stdio.h" +#include "stdlib.h" +#include "math.h" +#include "ewald_disp.h" +#include "math_vector.h" +#include "math_const.h" +#include "math_special.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "pair.h" +#include "domain.h" +#include "memory.h" +#include "error.h" +#include "update.h" + +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace MathSpecial; + +#define SMALL 0.00001 + +enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER}; // same as in pair.h + +//#define DEBUG + +/* ---------------------------------------------------------------------- */ + +EwaldDisp::EwaldDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) +{ + if (narg!=1) error->all(FLERR,"Illegal kspace_style ewald/n command"); + + ewaldflag = dispersionflag = dipoleflag = 1; + accuracy_relative = fabs(force->numeric(FLERR,arg[0])); + + memset(function, 0, EWALD_NORDER*sizeof(int)); + kenergy = kvirial = NULL; + cek_local = cek_global = NULL; + ekr_local = NULL; + hvec = NULL; + kvec = NULL; + B = NULL; + first_output = 0; + energy_self_peratom = NULL; + virial_self_peratom = NULL; + nmax = 0; + q2 = 0; + b2 = 0; + M2 = 0; +} + +/* ---------------------------------------------------------------------- */ + +EwaldDisp::~EwaldDisp() +{ + deallocate(); + deallocate_peratom(); + delete [] ekr_local; + delete [] B; +} + +/* --------------------------------------------------------------------- */ + +void EwaldDisp::init() +{ + nkvec = nkvec_max = nevec = nevec_max = 0; + nfunctions = nsums = sums = 0; + nbox = -1; + bytes = 0.0; + + if (!comm->me) { + if (screen) fprintf(screen,"EwaldDisp initialization ...\n"); + if (logfile) fprintf(logfile,"EwaldDisp initialization ...\n"); + } + + triclinic_check(); + if (domain->dimension == 2) + error->all(FLERR,"Cannot use EwaldDisp with 2d simulation"); + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with EwaldDisp"); + if (slabflag == 1) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab EwaldDisp"); + } + + scale = 1.0; + mumurd2e = force->qqrd2e; + dielectric = force->dielectric; + + int tmp; + Pair *pair = force->pair; + int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL; + double *cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL; + if (!(ptr||cutoff)) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + int ewald_order = ptr ? *((int *) ptr) : 1<<1; + int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC; + memset(function, 0, EWALD_NFUNCS*sizeof(int)); + for (int i=0; i<=EWALD_NORDER; ++i) // transcribe order + if (ewald_order&(1<all(FLERR, + "Unsupported mixing rule in kspace_style ewald/disp"); + default: + error->all(FLERR,"Unsupported order in kspace_style ewald/disp"); + } + nfunctions += function[k] = 1; + nsums += n[k]; + } + + if (!gewaldflag) g_ewald = 0.0; + pair->init(); // so B is defined + init_coeffs(); + init_coeff_sums(); + + double qsum, qsqsum, bsbsum; + qsum = qsqsum = bsbsum = 0.0; + if (function[0]) { + qsum = sum[0].x; + qsqsum = sum[0].x2; + } + + // turn off coulombic if no charge + + if (function[0] && qsqsum == 0.0) { + function[0] = 0; + nfunctions -= 1; + nsums -= 1; + } + + if (function[1]) bsbsum = sum[1].x2; + if (function[2]) bsbsum = sum[2].x2; + + if (function[3]) M2 = sum[9].x2; + + if (function[3] && strcmp(update->unit_style,"electron") == 0) + error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles"); + + if (qsqsum == 0.0 && bsbsum == 0.0 && M2 == 0.0) + error->all(FLERR,"Cannot use Ewald/disp solver " + "on system with no charge, dipole, or LJ particles"); + if (fabs(qsum) > SMALL && comm->me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + + if (!function[1] && !function[2]) + dispersionflag = 0; + + if (!function[3]) + dipoleflag = 0; + + pair_check(); + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // setup K-space resolution + + q2 = qsqsum * force->qqrd2e; + M2 *= mumurd2e; + b2 = bsbsum; //Are these units right? + bigint natoms = atom->natoms; + + if (!gewaldflag) { + if (function[0]) { + g_ewald = accuracy*sqrt(natoms*(*cutoff)*shape_det(domain->h)) / (2.0*q2); + if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/(*cutoff); + else g_ewald = sqrt(-log(g_ewald)) / (*cutoff); + } + else if (function[1] || function[2]) { + //Try Newton Solver + //Use old method to get guess + g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff; + + double g_ewald_new = + NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),b2); + if (g_ewald_new > 0.0) g_ewald = g_ewald_new; + else error->warning(FLERR,"Ewald/disp Newton solver failed, " + "using old method to estimate g_ewald"); + } else if (function[3]) { + //Try Newton Solver + //Use old method to get guess + g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff; + double g_ewald_new = + NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),M2); + if (g_ewald_new > 0.0) g_ewald = g_ewald_new; + else error->warning(FLERR,"Ewald/disp Newton solver failed, " + "using old method to estimate g_ewald"); + } + } + + if (!comm->me) { + if (screen) fprintf(screen, " G vector = %g\n", g_ewald); + if (logfile) fprintf(logfile, " G vector = %g\n", g_ewald); + } + + g_ewald_6 = g_ewald; + deallocate_peratom(); + peratom_allocate_flag = 0; +} + +/* ---------------------------------------------------------------------- + adjust EwaldDisp coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void EwaldDisp::setup() +{ + volume = shape_det(domain->h)*slab_volfactor; + memcpy(unit, domain->h_inv, sizeof(shape)); + shape_scalar_mult(unit, 2.0*MY_PI); + unit[2] /= slab_volfactor; + + // int nbox_old = nbox, nkvec_old = nkvec; + + if (accuracy >= 1) { + nbox = 0; + error->all(FLERR,"KSpace accuracy too low"); + } + + bigint natoms = atom->natoms; + double err; + int kxmax = 1; + int kymax = 1; + int kzmax = 1; + err = rms(kxmax,domain->h[0],natoms,q2,b2,M2); + while (err > accuracy) { + kxmax++; + err = rms(kxmax,domain->h[0],natoms,q2,b2,M2); + } + err = rms(kymax,domain->h[1],natoms,q2,b2,M2); + while (err > accuracy) { + kymax++; + err = rms(kymax,domain->h[1],natoms,q2,b2,M2); + } + err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2); + while (err > accuracy) { + kzmax++; + err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2); + } + nbox = MAX(kxmax,kymax); + nbox = MAX(nbox,kzmax); + double gsqxmx = unit[0]*unit[0]*kxmax*kxmax; + double gsqymx = unit[1]*unit[1]*kymax*kymax; + double gsqzmx = unit[2]*unit[2]*kzmax*kzmax; + gsqmx = MAX(gsqxmx,gsqymx); + gsqmx = MAX(gsqmx,gsqzmx); + gsqmx *= 1.00001; + + reallocate(); + coefficients(); + init_coeffs(); + init_coeff_sums(); + init_self(); + + if (!(first_output||comm->me)) { + first_output = 1; + if (screen) fprintf(screen, + " vectors: nbox = %d, nkvec = %d\n", nbox, nkvec); + if (logfile) fprintf(logfile, + " vectors: nbox = %d, nkvec = %d\n", nbox, nkvec); + } +} + +/* ---------------------------------------------------------------------- + compute RMS accuracy for a dimension +------------------------------------------------------------------------- */ + +double EwaldDisp::rms(int km, double prd, bigint natoms, double q2, double b2, double M2) +{ + double value = 0.0; + + // Coulombic + + double g2 = g_ewald*g_ewald; + + value += 2.0*q2*g_ewald/prd * + sqrt(1.0/(MY_PI*km*natoms)) * + exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)); + + // Lennard-Jones + + double g7 = g2*g2*g2*g_ewald; + + value += 4.0*b2*g7/3.0 * + sqrt(1.0/(MY_PI*natoms)) * + (exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)) * + (MY_PI*km/(g_ewald*prd) + 1)); + + // dipole + + value += 8.0*MY_PI*M2/volume*g_ewald * + sqrt(2.0*MY_PI*km*km*km/(15.0*natoms)) * + exp(-pow(MY_PI*km/(g_ewald*prd),2.0)); + + return value; +} + +void EwaldDisp::reallocate() +{ + int ix, iy, iz; + int nkvec_max = nkvec; + vector h; + + nkvec = 0; + int *kflag = new int[(nbox+1)*(2*nbox+1)*(2*nbox+1)]; + int *flag = kflag; + + for (ix=0; ix<=nbox; ++ix) + for (iy=-nbox; iy<=nbox; ++iy) + for (iz=-nbox; iz<=nbox; ++iz) + if (!(ix||iy||iz)) *(flag++) = 0; + else if ((!ix)&&(iy<0)) *(flag++) = 0; + else if ((!(ix||iy))&&(iz<0)) *(flag++) = 0; // use symmetry + else { + h[0] = unit[0]*ix; + h[1] = unit[5]*ix+unit[1]*iy; + h[2] = unit[4]*ix+unit[3]*iy+unit[2]*iz; + if ((*(flag++) = h[0]*h[0]+h[1]*h[1]+h[2]*h[2]<=gsqmx)) ++nkvec; + } + + if (nkvec>nkvec_max) { + deallocate(); // free memory + hvec = new hvector[nkvec]; // hvec + bytes += (nkvec-nkvec_max)*sizeof(hvector); + kvec = new kvector[nkvec]; // kvec + bytes += (nkvec-nkvec_max)*sizeof(kvector); + kenergy = new double[nkvec*nfunctions]; // kenergy + bytes += (nkvec-nkvec_max)*nfunctions*sizeof(double); + kvirial = new double[6*nkvec*nfunctions]; // kvirial + bytes += 6*(nkvec-nkvec_max)*nfunctions*sizeof(double); + cek_local = new complex[nkvec*nsums]; // cek_local + bytes += (nkvec-nkvec_max)*nsums*sizeof(complex); + cek_global = new complex[nkvec*nsums]; // cek_global + bytes += (nkvec-nkvec_max)*nsums*sizeof(complex); + nkvec_max = nkvec; + } + + flag = kflag; // create index and + kvector *k = kvec; // wave vectors + hvector *hi = hvec; + for (ix=0; ix<=nbox; ++ix) + for (iy=-nbox; iy<=nbox; ++iy) + for (iz=-nbox; iz<=nbox; ++iz) + if (*(flag++)) { + hi->x = unit[0]*ix; + hi->y = unit[5]*ix+unit[1]*iy; + (hi++)->z = unit[4]*ix+unit[3]*iy+unit[2]*iz; + k->x = ix+nbox; k->y = iy+nbox; (k++)->z = iz+nbox; } + + delete [] kflag; +} + + +void EwaldDisp::reallocate_atoms() +{ + if (eflag_atom || vflag_atom) + if (atom->nlocal > nmax) { + deallocate_peratom(); + allocate_peratom(); + nmax = atom->nmax; + } + + if ((nevec = atom->nmax*(2*nbox+1))<=nevec_max) return; + delete [] ekr_local; + ekr_local = new cvector[nevec]; + bytes += (nevec-nevec_max)*sizeof(cvector); + nevec_max = nevec; +} + + +void EwaldDisp::allocate_peratom() +{ + memory->create(energy_self_peratom, + atom->nmax,EWALD_NFUNCS,"ewald/n:energy_self_peratom"); + memory->create(virial_self_peratom, + atom->nmax,EWALD_NFUNCS,"ewald/n:virial_self_peratom"); +} + + +void EwaldDisp::deallocate_peratom() // free memory +{ + memory->destroy(energy_self_peratom); + memory->destroy(virial_self_peratom); +} + + +void EwaldDisp::deallocate() // free memory +{ + delete [] hvec; hvec = NULL; + delete [] kvec; kvec = NULL; + delete [] kenergy; kenergy = NULL; + delete [] kvirial; kvirial = NULL; + delete [] cek_local; cek_local = NULL; + delete [] cek_global; cek_global = NULL; +} + + +void EwaldDisp::coefficients() +{ + vector h; + hvector *hi = hvec, *nh; + double eta2 = 0.25/(g_ewald*g_ewald); + double b1, b2, expb2, h1, h2, c1, c2; + double *ke = kenergy, *kv = kvirial; + int func0 = function[0], func12 = function[1]||function[2], + func3 = function[3]; + + for (nh = (hi = hvec)+nkvec; hintypes; + + if (function[1]) { // geometric 1/r^6 + double **b = (double **) force->pair->extract("B",tmp); + delete [] B; + B = new double[n+1]; + bytes += (n+1)*sizeof(double); + for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i])); + } + if (function[2]) { // arithmetic 1/r^6 + double **epsilon = (double **) force->pair->extract("epsilon",tmp); + double **sigma = (double **) force->pair->extract("sigma",tmp); + double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7]; + double c[7] = { + 1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0}; + + if (!(epsilon&&sigma)) + error->all( + FLERR,"Epsilon or sigma reference not set by pair style in ewald/n"); + for (int i=0; i<=n; ++i) { + eps_i = sqrt(epsilon[i][i]); + sigma_i = sigma[i][i]; + sigma_n = 1.0; + for (int j=0; j<7; ++j) { + *(bi++) = sigma_n*eps_i*c[j]; sigma_n *= sigma_i; + } + } + } +} + +void EwaldDisp::init_coeff_sums() +{ + if (sums) return; // calculated only once + sums = 1; + + Sum sum_local[EWALD_MAX_NSUMS]; + + memset(sum_local, 0, EWALD_MAX_NSUMS*sizeof(Sum)); + if (function[0]) { // 1/r + double *q = atom->q, *qn = q+atom->nlocal; + for (double *i=q; itype, *ntype = type+atom->nlocal; + for (int *i=type; itype, *ntype = type+atom->nlocal; + for (int *i=type; imu) { // dipole + double *mu = atom->mu[0], *nmu = mu+4*atom->nlocal; + for (double *i = mu; i < nmu; i += 4) + sum_local[9].x2 += i[3]*i[3]; + } + MPI_Allreduce(sum_local, sum, 2*EWALD_MAX_NSUMS, MPI_DOUBLE, MPI_SUM, world); +} + + +void EwaldDisp::init_self() +{ + double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2; + const double qscale = force->qqrd2e * scale; + + memset(energy_self, 0, EWALD_NFUNCS*sizeof(double)); // self energy + memset(virial_self, 0, EWALD_NFUNCS*sizeof(double)); + + if (function[0]) { // 1/r + virial_self[0] = -0.5*MY_PI*qscale/(g2*volume)*sum[0].x*sum[0].x; + energy_self[0] = sum[0].x2*qscale*g1/MY_PIS-virial_self[0]; + } + if (function[1]) { // geometric 1/r^6 + virial_self[1] = MY_PI*MY_PIS*g3/(6.0*volume)*sum[1].x*sum[1].x; + energy_self[1] = -sum[1].x2*g3*g3/12.0+virial_self[1]; + } + if (function[2]) { // arithmetic 1/r^6 + virial_self[2] = MY_PI*MY_PIS*g3/(48.0*volume)*(sum[2].x*sum[8].x+ + sum[3].x*sum[7].x+sum[4].x*sum[6].x+0.5*sum[5].x*sum[5].x); + energy_self[2] = -sum[2].x2*g3*g3/3.0+virial_self[2]; + } + if (function[3]) { // dipole + virial_self[3] = 0; // in surface + energy_self[3] = sum[9].x2*mumurd2e*2.0*g3/3.0/MY_PIS-virial_self[3]; + } +} + + +void EwaldDisp::init_self_peratom() +{ + if (!(vflag_atom || eflag_atom)) return; + + double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2; + const double qscale = force->qqrd2e * scale; + double *energy = energy_self_peratom[0]; + double *virial = virial_self_peratom[0]; + int nlocal = atom->nlocal; + + memset(energy, 0, EWALD_NFUNCS*nlocal*sizeof(double)); + memset(virial, 0, EWALD_NFUNCS*nlocal*sizeof(double)); + + if (function[0]) { // 1/r + double *ei = energy; + double *vi = virial; + double ce = qscale*g1/MY_PIS; + double cv = -0.5*MY_PI*qscale/(g2*volume); + double *qi = atom->q, *qn = qi + nlocal; + for (; qi < qn; qi++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { + double q = *qi; + *vi = cv*q*sum[0].x; + *ei = ce*q*q-vi[0]; + } + } + if (function[1]) { // geometric 1/r^6 + double *ei = energy+1; + double *vi = virial+1; + double ce = -g3*g3/12.0; + double cv = MY_PI*MY_PIS*g3/(6.0*volume); + int *typei = atom->type, *typen = typei + atom->nlocal; + for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { + double b = B[*typei]; + *vi = cv*b*sum[1].x; + *ei = ce*b*b+vi[0]; + } + } + if (function[2]) { // arithmetic 1/r^6 + double *bi; + double *ei = energy+2; + double *vi = virial+2; + double ce = -g3*g3/3.0; + double cv = 0.5*MY_PI*MY_PIS*g3/(48.0*volume); + int *typei = atom->type, *typen = typei + atom->nlocal; + for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { + bi = B+7*typei[0]+7; + for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(--bi)[0]; + + /* PJV 20120225: + should this be this instead? above implies an inverse dependence + seems to be the above way in original; i recall having tested + arithmetic mixing in the conception phase, but an extra test would + be prudent (pattern repeats in multiple functions below) + + bi = B+7*typei[0]; + for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(bi++)[0]; + + */ + + *ei = ce*bi[0]*bi[6]+vi[0]; + } + } + if (function[3]&&atom->mu) { // dipole + double *ei = energy+3; + double *vi = virial+3; + double *imu = atom->mu[0], *nmu = imu+4*atom->nlocal; + double ce = mumurd2e*2.0*g3/3.0/MY_PIS; + for (; imu < nmu; imu += 4, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) { + *vi = 0; // in surface + *ei = ce*imu[3]*imu[3]-vi[0]; + } + } +} + + +/* ---------------------------------------------------------------------- + compute the EwaldDisp long-range force, energy, virial +------------------------------------------------------------------------- */ + +void EwaldDisp::compute(int eflag, int vflag) +{ + if (!nbox) return; + + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; + + if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) { + allocate_peratom(); + peratom_allocate_flag = 1; + nmax = atom->nmax; + } + + reallocate_atoms(); + init_self_peratom(); + compute_ek(); + compute_force(); + //compute_surface(); // assume conducting metal (tinfoil) boundary conditions + compute_energy(); + compute_energy_peratom(); + compute_virial(); + compute_virial_dipole(); + compute_virial_peratom(); +} + + +void EwaldDisp::compute_ek() +{ + cvector *ekr = ekr_local; + int lbytes = (2*nbox+1)*sizeof(cvector); + hvector *h = NULL; + kvector *k, *nk = kvec+nkvec; + cvector *z = new cvector[2*nbox+1]; + cvector z1, *zx, *zy, *zz, *zn = z+2*nbox; + complex *cek, zxyz, zxy = COMPLEX_NULL, cx = COMPLEX_NULL; + vector mui; + double *x = atom->x[0], *xn = x+3*atom->nlocal, *q = atom->q, qi = 0.0; + double bi = 0.0, ci[7]; + double *mu = atom->mu ? atom->mu[0] : NULL; + int i, kx, ky, n = nkvec*nsums, *type = atom->type, tri = domain->triclinic; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + memset(cek_local, 0, n*sizeof(complex)); // reset sums + while (xx, 1, 0); C_SET(zz->y, 1, 0); C_SET(zz->z, 1, 0); // z[0] + if (tri) { // triclinic z[1] + C_ANGLE(z1.x, unit[0]*x[0]+unit[5]*x[1]+unit[4]*x[2]); + C_ANGLE(z1.y, unit[1]*x[1]+unit[3]*x[2]); + C_ANGLE(z1.z, x[2]*unit[2]); x += 3; + } + else { // orthogonal z[1] + C_ANGLE(z1.x, *(x++)*unit[0]); + C_ANGLE(z1.y, *(x++)*unit[1]); + C_ANGLE(z1.z, *(x++)*unit[2]); + } + for (; zzx, zz->x, z1.x); // 3D k-vector + C_RMULT(zy->y, zz->y, z1.y); C_CONJ(zx->y, zy->y); + C_RMULT(zy->z, zz->z, z1.z); C_CONJ(zx->z, zy->z); + } + kx = ky = -1; + cek = cek_local; + if (func[0]) qi = *(q++); + if (func[1]) bi = B[*type]; + if (func[2]) memcpy(ci, B+7*type[0], 7*sizeof(double)); + if (func[3]) { + memcpy(mui, mu, sizeof(vector)); + mu += 4; + h = hvec; + } + for (k=kvec; ky) { // based on order in + if (kx!=k->x) cx = z[kx = k->x].x; // reallocate + C_RMULT(zxy, z[ky = k->y].y, cx); + } + C_RMULT(zxyz, z[k->z].z, zxy); + if (func[0]) { + cek->re += zxyz.re*qi; (cek++)->im += zxyz.im*qi; + } + if (func[1]) { + cek->re += zxyz.re*bi; (cek++)->im += zxyz.im*bi; + } + if (func[2]) for (i=0; i<7; ++i) { + cek->re += zxyz.re*ci[i]; (cek++)->im += zxyz.im*ci[i]; + } + if (func[3]) { + register double muk = mui[0]*h->x+mui[1]*h->y+mui[2]*h->z; ++h; + cek->re += zxyz.re*muk; (cek++)->im += zxyz.im*muk; + } + } + ekr = (cvector *) ((char *) memcpy(ekr, z, lbytes)+lbytes); + ++type; + } + MPI_Allreduce(cek_local, cek_global, 2*n, MPI_DOUBLE, MPI_SUM, world); + + delete [] z; +} + + +void EwaldDisp::compute_force() +{ + kvector *k; + hvector *h, *nh; + cvector *z = ekr_local; + vector sum[EWALD_MAX_NSUMS], mui = COMPLEX_NULL; + complex *cek, zc, zx = COMPLEX_NULL, zxy = COMPLEX_NULL; + complex *cek_coul; + double *f = atom->f[0], *fn = f+3*atom->nlocal, *q = atom->q, *t = NULL; + double *mu = atom->mu ? atom->mu[0] : NULL; + const double qscale = force->qqrd2e * scale; + double *ke, c[EWALD_NFUNCS] = { + 8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume}; + double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3]; + int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; + int func[EWALD_NFUNCS]; + + if (atom->torque) t = atom->torque[0]; + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + memset(sum, 0, EWALD_MAX_NSUMS*sizeof(vector)); // fj = -dE/dr = + for (; fy) { // based on order in + if (kx!=k->x) zx = z[kx = k->x].x; // reallocate + C_RMULT(zxy, z[ky = k->y].y, zx); + } + C_CRMULT(zc, z[k->z].z, zxy); + if (func[0]) { // 1/r + register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); + if (func[3]) cek_coul = cek; + ++cek; + sum[0][0] += h->x*im; sum[0][1] += h->y*im; sum[0][2] += h->z*im; + } + if (func[1]) { // geometric 1/r^6 + register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); ++cek; + sum[1][0] += h->x*im; sum[1][1] += h->y*im; sum[1][2] += h->z*im; + } + if (func[2]) { // arithmetic 1/r^6 + register double im, c = *(ke++); + for (i=2; i<9; ++i) { + im = c*(zc.im*cek->re+cek->im*zc.re); ++cek; + sum[i][0] += h->x*im; sum[i][1] += h->y*im; sum[i][2] += h->z*im; + } + } + if (func[3]) { // dipole + register double im = *(ke)*(zc.im*cek->re+ + cek->im*zc.re)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); + register double im2 = *(ke)*(zc.re*cek->re- + cek->im*zc.im); + sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im; + t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2; // torque + t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2; + t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2; + if (func[0]) { // charge-dipole + register double qi = *(q)*c[0]; + im = - *(ke)*(zc.re*cek_coul->re - + cek_coul->im*zc.im)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); + im += *(ke)*(zc.re*cek->re - cek->im*zc.im)*qi; + sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im; + + im2 = *(ke)*(zc.re*cek_coul->im + cek_coul->re*zc.im); + im2 += -*(ke)*(zc.re*cek->im - cek->im*zc.re); + t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2; // torque + t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2; + t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2; + } + ++cek; + ke++; + } + } + if (func[0]) { // 1/r + register double qi = *(q++)*c[0]; + f[0] -= sum[0][0]*qi; f[1] -= sum[0][1]*qi; f[2] -= sum[0][2]*qi; + } + if (func[1]) { // geometric 1/r^6 + register double bi = B[*type]*c[1]; + f[0] -= sum[1][0]*bi; f[1] -= sum[1][1]*bi; f[2] -= sum[1][2]*bi; + } + if (func[2]) { // arithmetic 1/r^6 + register double *bi = B+7*type[0]+7; + for (i=2; i<9; ++i) { + register double c2 = (--bi)[0]*c[2]; + f[0] -= sum[i][0]*c2; f[1] -= sum[i][1]*c2; f[2] -= sum[i][2]*c2; + } + } + if (func[3]) { // dipole + f[0] -= sum[9][0]; f[1] -= sum[9][1]; f[2] -= sum[9][2]; + } + z = (cvector *) ((char *) z+lbytes); + ++type; + t += 3; + } +} + + +void EwaldDisp::compute_surface() +{ + // assume conducting metal (tinfoil) boundary conditions, so this function is + // not called because dielectric at the boundary --> infinity, which makes all + // the terms here zero. + + if (!function[3]) return; + if (!atom->mu) return; + + vector sum_local = VECTOR_NULL, sum_total; + memset(sum_local, 0, sizeof(vector)); + double *i, *n, *mu = atom->mu[0]; + + for (n = (i = mu) + 4*atom->nlocal; i < n; ++i) { + sum_local[0] += (i++)[0]; + sum_local[1] += (i++)[0]; + sum_local[2] += (i++)[0]; + } + MPI_Allreduce(sum_local, sum_total, 3, MPI_DOUBLE, MPI_SUM, world); + + virial_self[3] = + mumurd2e*(2.0*MY_PI*vec_dot(sum_total,sum_total)/(2.0*dielectric+1)/volume); + energy_self[3] -= virial_self[3]; + + if (!(vflag_atom || eflag_atom)) return; + + double *ei = energy_self_peratom[0]+3; + double *vi = virial_self_peratom[0]+3; + double cv = 2.0*mumurd2e*MY_PI/(2.0*dielectric+1)/volume; + + for (i = mu; i < n; i += 4, ei += EWALD_NFUNCS, vi += EWALD_NFUNCS) { + *vi = cv*(i[0]*sum_total[0]+i[1]*sum_total[1]+i[2]*sum_total[2]); + *ei -= *vi; + } +} + + +void EwaldDisp::compute_energy() +{ + energy = 0.0; + if (!eflag_global) return; + + complex *cek = cek_global; + complex *cek_coul; + double *ke = kenergy; + const double qscale = force->qqrd2e * scale; + double c[EWALD_NFUNCS] = { + 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; + double sum[EWALD_NFUNCS]; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + memset(sum, 0, EWALD_NFUNCS*sizeof(double)); // reset sums + for (int k=0; kre*cek->re+cek->im*cek->im); + if (func[3]) cek_coul = cek; + ++cek; + } + if (func[1]) { // geometric 1/r^6 + sum[1] += *(ke++)*(cek->re*cek->re+cek->im*cek->im); ++cek; } + if (func[2]) { // arithmetic 1/r^6 + register double r = + (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+ + (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+ + (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+ + 0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7; + sum[2] += *(ke++)*r; + } + if (func[3]) { // dipole + sum[3] += *(ke)*(cek->re*cek->re+cek->im*cek->im); + if (func[0]) { // charge-dipole + sum[3] += *(ke)*2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re); + } + ke++; + ++cek; + } + } + for (int k=0; kq; + double *eatomj = eatom; + double *mu = atom->mu ? atom->mu[0] : NULL; + const double qscale = force->qqrd2e * scale; + double *ke = kenergy; + double c[EWALD_NFUNCS] = { + 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; + int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + for (int j = 0; j < atom->nlocal; j++, ++eatomj) { + k = kvec; + kx = ky = -1; + ke = kenergy; + cek = cek_global; + memset(sum, 0, EWALD_MAX_NSUMS*sizeof(double)); + if (func[3]) { + register double di = c[3]; + mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; + mu++; + } + for (nh = (h = hvec)+nkvec; hy) { // based on order in + if (kx!=k->x) zx = z[kx = k->x].x; // reallocate + C_RMULT(zxy, z[ky = k->y].y, zx); + } + C_CRMULT(zc, z[k->z].z, zxy); + if (func[0]) { // 1/r + sum[0] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); + if (func[3]) cek_coul = cek; + ++cek; + } + if (func[1]) { // geometric 1/r^6 + sum[1] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); ++cek; } + if (func[2]) { // arithmetic 1/r^6 + register double im, c = *(ke++); + for (i=2; i<9; ++i) { + im = c*(cek->re*zc.re - cek->im*zc.im); ++cek; + sum[i] += im; + } + } + if (func[3]) { // dipole + double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); + sum[9] += *(ke)*(cek->re*zc.re - cek->im*zc.im)*muk; + if (func[0]) { // charge-dipole + register double qj = *(q)*c[0]; + sum[9] += *(ke)*(cek_coul->im*zc.re + cek_coul->re*zc.im)*muk; + sum[9] -= *(ke)*(cek->re*zc.im + cek->im*zc.re)*qj; + } + ++cek; + ke++; + } + } + + if (func[0]) { // 1/r + register double qj = *(q++)*c[0]; + *eatomj += sum[0]*qj - energy_self_peratom[j][0]; + } + if (func[1]) { // geometric 1/r^6 + register double bj = B[*type]*c[1]; + *eatomj += sum[1]*bj - energy_self_peratom[j][1]; + } + if (func[2]) { // arithmetic 1/r^6 + register double *bj = B+7*type[0]+7; + for (i=2; i<9; ++i) { + register double c2 = (--bj)[0]*c[2]; + *eatomj += 0.5*sum[i]*c2; + } + *eatomj -= energy_self_peratom[j][2]; + } + if (func[3]) { // dipole + *eatomj += sum[9] - energy_self_peratom[j][3]; + } + z = (cvector *) ((char *) z+lbytes); + ++type; + } +} + + +#define swap(a, b) { register double t = a; a= b; b = t; } + +void EwaldDisp::compute_virial() +{ + memset(virial, 0, sizeof(shape)); + if (!vflag_global) return; + + complex *cek = cek_global; + complex *cek_coul; + double *kv = kvirial; + const double qscale = force->qqrd2e * scale; + double c[EWALD_NFUNCS] = { + 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; + shape sum[EWALD_NFUNCS]; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + memset(sum, 0, EWALD_NFUNCS*sizeof(shape)); + for (int k=0; kre*cek->re+cek->im*cek->im; + if (func[3]) cek_coul = cek; + ++cek; + sum[0][0] += *(kv++)*r; sum[0][1] += *(kv++)*r; sum[0][2] += *(kv++)*r; + sum[0][3] += *(kv++)*r; sum[0][4] += *(kv++)*r; sum[0][5] += *(kv++)*r; + } + if (func[1]) { // geometric 1/r^6 + register double r = cek->re*cek->re+cek->im*cek->im; ++cek; + sum[1][0] += *(kv++)*r; sum[1][1] += *(kv++)*r; sum[1][2] += *(kv++)*r; + sum[1][3] += *(kv++)*r; sum[1][4] += *(kv++)*r; sum[1][5] += *(kv++)*r; + } + if (func[2]) { // arithmetic 1/r^6 + register double r = + (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+ + (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+ + (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+ + 0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7; + sum[2][0] += *(kv++)*r; sum[2][1] += *(kv++)*r; sum[2][2] += *(kv++)*r; + sum[2][3] += *(kv++)*r; sum[2][4] += *(kv++)*r; sum[2][5] += *(kv++)*r; + } + if (func[3]) { + register double r = cek->re*cek->re+cek->im*cek->im; + sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r; + sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r; + if (func[0]) { // charge-dipole + kv -= 6; + register double r = 2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re); + sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r; + sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r; + } + ++cek; + } + } + for (int k=0; kmu ? atom->mu[0] : NULL; + double *vatomj = NULL; + if (vflag_atom && vatom) vatomj = vatom[0]; + const double qscale = force->qqrd2e * scale; + double *ke, c[EWALD_NFUNCS] = { + 8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume}; + double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3]; + int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + memset(&sum[0], 0, 6*sizeof(double)); + memset(&sum_total[0], 0, 6*sizeof(double)); + for (int j = 0; j < atom->nlocal; j++) { + k = kvec; + kx = ky = -1; + ke = kenergy; + cek = cek_global; + memset(&sum[0], 0, 6*sizeof(double)); + if (func[3]) { + register double di = c[3]; + mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; + mu++; + } + for (nh = (h = hvec)+nkvec; hy) { // based on order in + if (kx!=k->x) zx = z[kx = k->x].x; // reallocate + C_RMULT(zxy, z[ky = k->y].y, zx); + } + C_CRMULT(zc, z[k->z].z, zxy); + double im = 0.0; + if (func[0]) { // 1/r + ke++; + if (func[3]) cek_coul = cek; + ++cek; + } + if (func[1]) { // geometric 1/r^6 + ke++; + ++cek; + } + if (func[2]) { // arithmetic 1/r^6 + ke++; + for (i=2; i<9; ++i) { + ++cek; + } + } + if (func[3]) { // dipole + im = *(ke)*(zc.re*cek->re - cek->im*zc.im); + if (func[0]) { // charge-dipole + im += *(ke)*(zc.im*cek_coul->re + cek_coul->im*zc.re); + } + sum[0] -= mui[0]*h->x*im; + sum[1] -= mui[1]*h->y*im; + sum[2] -= mui[2]*h->z*im; + sum[3] -= mui[0]*h->y*im; + sum[4] -= mui[0]*h->z*im; + sum[5] -= mui[1]*h->z*im; + ++cek; + ke++; + } + } + + if (vflag_global) + for (int n = 0; n < 6; n++) + sum_total[n] -= sum[n]; + + if (vflag_atom) + for (int n = 0; n < 6; n++) + vatomj[n] -= sum[n]; + + z = (cvector *) ((char *) z+lbytes); + ++type; + if (vflag_atom) vatomj += 6; + } + + if (vflag_global) { + MPI_Allreduce(&sum_total[0],&sum[0],6,MPI_DOUBLE,MPI_SUM,world); + for (int n = 0; n < 6; n++) + virial[n] += sum[n]; + } + +} + +void EwaldDisp::compute_virial_peratom() +{ + if (!vflag_atom) return; + + kvector *k; + hvector *h, *nh; + cvector *z = ekr_local; + vector mui = VECTOR_NULL; + complex *cek, zc = COMPLEX_NULL, zx = COMPLEX_NULL, zxy = COMPLEX_NULL; + complex *cek_coul; + double *kv; + double *q = atom->q; + double *vatomj = vatom ? vatom[0] : NULL; + double *mu = atom->mu ? atom->mu[0] : NULL; + const double qscale = force->qqrd2e * scale; + double c[EWALD_NFUNCS] = { + 4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume), + 2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume}; + shape sum[EWALD_MAX_NSUMS]; + int func[EWALD_NFUNCS]; + + memcpy(func, function, EWALD_NFUNCS*sizeof(int)); + int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type; + for (int j = 0; j < atom->nlocal; j++) { + k = kvec; + kx = ky = -1; + kv = kvirial; + cek = cek_global; + memset(sum, 0, EWALD_MAX_NSUMS*sizeof(shape)); + if (func[3]) { + register double di = c[3]; + mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0]; + mu++; + } + for (nh = (h = hvec)+nkvec; hy) { // based on order in + if (kx!=k->x) zx = z[kx = k->x].x; // reallocate + C_RMULT(zxy, z[ky = k->y].y, zx); + } + C_CRMULT(zc, z[k->z].z, zxy); + if (func[0]) { // 1/r + if (func[3]) cek_coul = cek; + register double r = cek->re*zc.re - cek->im*zc.im; ++cek; + sum[0][0] += *(kv++)*r; + sum[0][1] += *(kv++)*r; + sum[0][2] += *(kv++)*r; + sum[0][3] += *(kv++)*r; + sum[0][4] += *(kv++)*r; + sum[0][5] += *(kv++)*r; + } + if (func[1]) { // geometric 1/r^6 + register double r = cek->re*zc.re - cek->im*zc.im; ++cek; + sum[1][0] += *(kv++)*r; + sum[1][1] += *(kv++)*r; + sum[1][2] += *(kv++)*r; + sum[1][3] += *(kv++)*r; + sum[1][4] += *(kv++)*r; + sum[1][5] += *(kv++)*r; + } + if (func[2]) { // arithmetic 1/r^6 + register double r; + for (i=2; i<9; ++i) { + r = cek->re*zc.re - cek->im*zc.im; ++cek; + sum[i][0] += *(kv++)*r; + sum[i][1] += *(kv++)*r; + sum[i][2] += *(kv++)*r; + sum[i][3] += *(kv++)*r; + sum[i][4] += *(kv++)*r; + sum[i][5] += *(kv++)*r; + kv -= 6; + } + kv += 6; + } + if (func[3]) { // dipole + double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z); + register double + r = (cek->re*zc.re - cek->im*zc.im)*muk; + sum[9][0] += *(kv++)*r; + sum[9][1] += *(kv++)*r; + sum[9][2] += *(kv++)*r; + sum[9][3] += *(kv++)*r; + sum[9][4] += *(kv++)*r; + sum[9][5] += *(kv++)*r; + if (func[0]) { // charge-dipole + kv -= 6; + register double qj = *(q)*c[0]; + r = (cek_coul->im*zc.re + cek_coul->re*zc.im)*muk; + r += -(cek->re*zc.im + cek->im*zc.re)*qj; + sum[9][0] += *(kv++)*r; sum[9][1] += *(kv++)*r; sum[9][2] += *(kv++)*r; + sum[9][3] += *(kv++)*r; sum[9][4] += *(kv++)*r; sum[9][5] += *(kv++)*r; + } + ++cek; + } + } + + if (func[0]) { // 1/r + register double qi = *(q++)*c[0]; + for (int n = 0; n < 6; n++) vatomj[n] += sum[0][n]*qi; + } + if (func[1]) { // geometric 1/r^6 + register double bi = B[*type]*c[1]; + for (int n = 0; n < 6; n++) vatomj[n] += sum[1][n]*bi; + } + if (func[2]) { // arithmetic 1/r^6 + register double *bj = B+7*type[0]+7; + for (i=2; i<9; ++i) { + register double c2 = (--bj)[0]*c[2]; + for (int n = 0; n < 6; n++) vatomj[n] += 0.5*sum[i][n]*c2; + } + } + if (func[3]) { // dipole + for (int n = 0; n < 6; n++) vatomj[n] += sum[9][n]; + } + + for (int k=0; kq; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double qsum = 0.0; + if (function[0]) qsum = sum[0].x; + + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; + + if (function[3] && atom->mu) { + double **mu = atom->mu; + for (int i = 0; i < nlocal; i++) dipole += mu[i][2]; + } + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + + if (function[3] && atom->mu) + error->all(FLERR,"Cannot (yet) use kspace slab correction with " + "long-range dipoles and non-neutral systems or per-atom energy"); + + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); + + // add on torque corrections + + if (function[3] && atom->mu && atom->torque) { + double **mu = atom->mu; + double **torque = atom->torque; + for (int i = 0; i < nlocal; i++) { + torque[i][0] += ffact * dipole_all * mu[i][1]; + torque[i][1] += -ffact * dipole_all * mu[i][0]; + } + } +} + +/* ---------------------------------------------------------------------- + Newton solver used to find g_ewald for LJ systems + ------------------------------------------------------------------------- */ + +double EwaldDisp::NewtonSolve(double x, double Rc, + bigint natoms, double vol, double b2) +{ + double dx,tol; + int maxit; + + maxit = 10000; //Maximum number of iterations + tol = 0.00001; //Convergence tolerance + + //Begin algorithm + + for (int i = 0; i < maxit; i++) { + dx = f(x,Rc,natoms,vol,b2) / derivf(x,Rc,natoms,vol,b2); + x = x - dx; //Update x + if (fabs(dx) < tol) return x; + if (x < 0 || x != x) // solver failed + return -1; + } + return -1; +} + +/* ---------------------------------------------------------------------- + Calculate f(x) + ------------------------------------------------------------------------- */ + +double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2) +{ + double a = Rc*x; + double f = 0.0; + + if (function[1] || function[2]) { // LJ + f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) * + (6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy); + } else { // dipole + double rg2 = a*a; + double rg4 = rg2*rg2; + double rg6 = rg4*rg2; + double Cc = 4.0*rg4 + 6.0*rg2 + 3.0; + double Dc = 8.0*rg6 + 20.0*rg4 + 30.0*rg2 + 15.0; + f = (b2/(sqrt(vol*powint(x,4)*powint(Rc,9)*natoms)) * + sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) * + exp(-rg2)) - accuracy; + } + + return f; +} + +/* ---------------------------------------------------------------------- + Calculate numerical derivative f'(x) + ------------------------------------------------------------------------- */ + +double EwaldDisp::derivf(double x, double Rc, + bigint natoms, double vol, double b2) +{ + double h = 0.000001; //Derivative step-size + return (f(x + h,Rc,natoms,vol,b2) - f(x,Rc,natoms,vol,b2)) / h; +} diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp index 9c5db42ad8..c3b54559a0 100644 --- a/src/KSPACE/pppm.cpp +++ b/src/KSPACE/pppm.cpp @@ -1,3501 +1,3501 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) - per-atom energy/virial & group/group energy/force added by Stan Moore (BYU) - analytic diff (2 FFT) option added by Rolf Isele-Holder (Aachen University) - triclinic added by Stan Moore (SNL) -------------------------------------------------------------------------- */ - -#include "lmptype.h" -#include "mpi.h" -#include "string.h" -#include "stdio.h" -#include "stdlib.h" -#include "math.h" -#include "pppm.h" -#include "atom.h" -#include "comm.h" -#include "commgrid.h" -#include "neighbor.h" -#include "force.h" -#include "pair.h" -#include "bond.h" -#include "angle.h" -#include "domain.h" -#include "fft3d_wrap.h" -#include "remap_wrap.h" -#include "memory.h" -#include "error.h" - -#include "math_const.h" -#include "math_special.h" - -using namespace LAMMPS_NS; -using namespace MathConst; -using namespace MathSpecial; - -#define MAXORDER 7 -#define OFFSET 16384 -#define SMALL 0.00001 -#define LARGE 10000.0 -#define EPS_HOC 1.0e-7 - -enum{REVERSE_RHO}; -enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM}; - -#ifdef FFT_SINGLE -#define ZEROF 0.0f -#define ONEF 1.0f -#else -#define ZEROF 0.0 -#define ONEF 1.0 -#endif - -/* ---------------------------------------------------------------------- */ - -PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) -{ - if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command"); - - pppmflag = 1; - group_group_enable = 1; - - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - nfactors = 3; - factors = new int[nfactors]; - factors[0] = 2; - factors[1] = 3; - factors[2] = 5; - - MPI_Comm_rank(world,&me); - MPI_Comm_size(world,&nprocs); - - density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; - density_fft = NULL; - u_brick = NULL; - v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; - greensfn = NULL; - work1 = work2 = NULL; - vg = NULL; - fkx = fky = fkz = NULL; - - sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = - sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL; - - density_A_brick = density_B_brick = NULL; - density_A_fft = density_B_fft = NULL; - - gf_b = NULL; - rho1d = rho_coeff = drho1d = drho_coeff = NULL; - - fft1 = fft2 = NULL; - remap = NULL; - cg = NULL; - cg_peratom = NULL; - - nmax = 0; - part2grid = NULL; - - peratom_allocate_flag = 0; - group_allocate_flag = 0; - - // define acons coefficients for estimation of kspace errors - // see JCP 109, pg 7698 for derivation of coefficients - // higher order coefficients may be computed if needed - - memory->create(acons,8,7,"pppm:acons"); - acons[1][0] = 2.0 / 3.0; - acons[2][0] = 1.0 / 50.0; - acons[2][1] = 5.0 / 294.0; - acons[3][0] = 1.0 / 588.0; - acons[3][1] = 7.0 / 1440.0; - acons[3][2] = 21.0 / 3872.0; - acons[4][0] = 1.0 / 4320.0; - acons[4][1] = 3.0 / 1936.0; - acons[4][2] = 7601.0 / 2271360.0; - acons[4][3] = 143.0 / 28800.0; - acons[5][0] = 1.0 / 23232.0; - acons[5][1] = 7601.0 / 13628160.0; - acons[5][2] = 143.0 / 69120.0; - acons[5][3] = 517231.0 / 106536960.0; - acons[5][4] = 106640677.0 / 11737571328.0; - acons[6][0] = 691.0 / 68140800.0; - acons[6][1] = 13.0 / 57600.0; - acons[6][2] = 47021.0 / 35512320.0; - acons[6][3] = 9694607.0 / 2095994880.0; - acons[6][4] = 733191589.0 / 59609088000.0; - acons[6][5] = 326190917.0 / 11700633600.0; - acons[7][0] = 1.0 / 345600.0; - acons[7][1] = 3617.0 / 35512320.0; - acons[7][2] = 745739.0 / 838397952.0; - acons[7][3] = 56399353.0 / 12773376000.0; - acons[7][4] = 25091609.0 / 1560084480.0; - acons[7][5] = 1755948832039.0 / 36229939200000.0; - acons[7][6] = 4887769399.0 / 37838389248.0; -} - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -PPPM::~PPPM() -{ - delete [] factors; - deallocate(); - if (peratom_allocate_flag) deallocate_peratom(); - if (group_allocate_flag) deallocate_groups(); - memory->destroy(part2grid); - memory->destroy(acons); -} - -/* ---------------------------------------------------------------------- - called once before run -------------------------------------------------------------------------- */ - -void PPPM::init() -{ - if (me == 0) { - if (screen) fprintf(screen,"PPPM initialization ...\n"); - if (logfile) fprintf(logfile,"PPPM initialization ...\n"); - } - - // error check - - triclinic_check(); - if (domain->triclinic && differentiation_flag == 1) - error->all(FLERR,"Cannot (yet) use PPPM with triclinic box " - "and kspace_modify diff ad"); - if (domain->triclinic && slabflag) - error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and " - "slab correction"); - if (domain->dimension == 2) error->all(FLERR, - "Cannot use PPPM with 2d simulation"); - - if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); - - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM"); - if (slabflag) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab PPPM"); - } - - if (order < 2 || order > MAXORDER) { - char str[128]; - sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER); - error->all(FLERR,str); - } - - // extract short-range Coulombic cutoff from pair style - - triclinic = domain->triclinic; - scale = 1.0; - - pair_check(); - - int itmp = 0; - double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); - if (p_cutoff == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - cutoff = *p_cutoff; - - // if kspace is TIP4P, extract TIP4P params from pair style - // bond/angle are not yet init(), so insure equilibrium request is valid - - qdist = 0.0; - - if (tip4pflag) { - double *p_qdist = (double *) force->pair->extract("qdist",itmp); - int *p_typeO = (int *) force->pair->extract("typeO",itmp); - int *p_typeH = (int *) force->pair->extract("typeH",itmp); - int *p_typeA = (int *) force->pair->extract("typeA",itmp); - int *p_typeB = (int *) force->pair->extract("typeB",itmp); - if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - qdist = *p_qdist; - typeO = *p_typeO; - typeH = *p_typeH; - int typeA = *p_typeA; - int typeB = *p_typeB; - - if (force->angle == NULL || force->bond == NULL || - force->angle->setflag == NULL || force->bond->setflag == NULL) - error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); - if (typeA < 1 || typeA > atom->nangletypes || - force->angle->setflag[typeA] == 0) - error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P"); - if (typeB < 1 || typeB > atom->nbondtypes || - force->bond->setflag[typeB] == 0) - error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P"); - double theta = force->angle->equilibrium_angle(typeA); - double blen = force->bond->equilibrium_distance(typeB); - alpha = qdist / (cos(0.5*theta) * blen); - if (domain->triclinic) - error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and TIP4P"); - } - - // compute qsum & qsqsum and warn if not charge-neutral - - qsum = qsqsum = 0.0; - for (int i = 0; i < atom->nlocal; i++) { - qsum += atom->q[i]; - qsqsum += atom->q[i]*atom->q[i]; - } - - double tmp; - MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum = tmp; - MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsqsum = tmp; - q2 = qsqsum * force->qqrd2e; - - if (qsqsum == 0.0) - error->all(FLERR,"Cannot use kspace solver on system with no charge"); - if (fabs(qsum) > SMALL && me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - // free all arrays previously allocated - - deallocate(); - if (peratom_allocate_flag) deallocate_peratom(); - if (group_allocate_flag) deallocate_groups(); - - // setup FFT grid resolution and g_ewald - // normally one iteration thru while loop is all that is required - // if grid stencil does not extend beyond neighbor proc - // or overlap is allowed, then done - // else reduce order and try again - - int (*procneigh)[2] = comm->procneigh; - - CommGrid *cgtmp = NULL; - int iteration = 0; - - while (order >= minorder) { - if (iteration && me == 0) - error->warning(FLERR,"Reducing PPPM order b/c stencil extends " - "beyond nearest neighbor processor"); - - if (stagger_flag && !differentiation_flag) compute_gf_denom(); - set_grid_global(); - set_grid_local(); - if (overlap_allowed) break; - - cgtmp = new CommGrid(lmp,world,1,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - cgtmp->ghost_notify(); - if (!cgtmp->ghost_overlap()) break; - delete cgtmp; - - order--; - iteration++; - } - - if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order"); - if (!overlap_allowed && cgtmp->ghost_overlap()) - error->all(FLERR,"PPPM grid stencil extends " - "beyond nearest neighbor processor"); - if (cgtmp) delete cgtmp; - - // adjust g_ewald - - if (!gewaldflag) adjust_gewald(); - - // calculate the final accuracy - - double estimated_accuracy = final_accuracy(); - - // print stats - - int ngrid_max,nfft_both_max; - MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); - - if (me == 0) { - -#ifdef FFT_SINGLE - const char fft_prec[] = "single"; -#else - const char fft_prec[] = "double"; -#endif - - if (screen) { - fprintf(screen," G vector (1/distance) = %g\n",g_ewald); - fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(screen," stencil order = %d\n",order); - fprintf(screen," estimated absolute RMS force accuracy = %g\n", - estimated_accuracy); - fprintf(screen," estimated relative force accuracy = %g\n", - estimated_accuracy/two_charge_force); - fprintf(screen," using %s precision FFTs\n",fft_prec); - fprintf(screen," 3d grid and FFT values/proc = %d %d\n", - ngrid_max,nfft_both_max); - } - if (logfile) { - fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); - fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(logfile," stencil order = %d\n",order); - fprintf(logfile," estimated absolute RMS force accuracy = %g\n", - estimated_accuracy); - fprintf(logfile," estimated relative force accuracy = %g\n", - estimated_accuracy/two_charge_force); - fprintf(logfile," using %s precision FFTs\n",fft_prec); - fprintf(logfile," 3d grid and FFT values/proc = %d %d\n", - ngrid_max,nfft_both_max); - } - } - - // allocate K-space dependent memory - // don't invoke allocate peratom() or group(), will be allocated when needed - - allocate(); - cg->ghost_notify(); - cg->setup(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - compute_gf_denom(); - if (differentiation_flag == 1) compute_sf_precoeff(); - compute_rho_coeff(); -} - -/* ---------------------------------------------------------------------- - adjust PPPM coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void PPPM::setup() -{ - if (triclinic) { - setup_triclinic(); - return; - } - - int i,j,k,n; - double *prd; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - delxinv = nx_pppm/xprd; - delyinv = ny_pppm/yprd; - delzinv = nz_pppm/zprd_slab; - - delvolinv = delxinv*delyinv*delzinv; - - double unitkx = (MY_2PI/xprd); - double unitky = (MY_2PI/yprd); - double unitkz = (MY_2PI/zprd_slab); - - // fkx,fky,fkz for my FFT grid pts - - double per; - - for (i = nxlo_fft; i <= nxhi_fft; i++) { - per = i - nx_pppm*(2*i/nx_pppm); - fkx[i] = unitkx*per; - } - - for (i = nylo_fft; i <= nyhi_fft; i++) { - per = i - ny_pppm*(2*i/ny_pppm); - fky[i] = unitky*per; - } - - for (i = nzlo_fft; i <= nzhi_fft; i++) { - per = i - nz_pppm*(2*i/nz_pppm); - fkz[i] = unitkz*per; - } - - // virial coefficients - - double sqk,vterm; - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) { - for (j = nylo_fft; j <= nyhi_fft; j++) { - for (i = nxlo_fft; i <= nxhi_fft; i++) { - sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; - if (sqk == 0.0) { - vg[n][0] = 0.0; - vg[n][1] = 0.0; - vg[n][2] = 0.0; - vg[n][3] = 0.0; - vg[n][4] = 0.0; - vg[n][5] = 0.0; - } else { - vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); - vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; - vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; - vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; - vg[n][3] = vterm*fkx[i]*fky[j]; - vg[n][4] = vterm*fkx[i]*fkz[k]; - vg[n][5] = vterm*fky[j]*fkz[k]; - } - n++; - } - } - } - - if (differentiation_flag == 1) compute_gf_ad(); - else compute_gf_ik(); -} - -/* ---------------------------------------------------------------------- - adjust PPPM coeffs, called initially and whenever volume has changed - for a triclinic system -------------------------------------------------------------------------- */ - -void PPPM::setup_triclinic() -{ - int i,j,k,n; - double *prd; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - prd = domain->prd; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - // use lamda (0-1) coordinates - - delxinv = nx_pppm; - delyinv = ny_pppm; - delzinv = nz_pppm; - delvolinv = delxinv*delyinv*delzinv/volume; - - // fkx,fky,fkz for my FFT grid pts - - double per_i,per_j,per_k; - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) { - per_k = k - nz_pppm*(2*k/nz_pppm); - for (j = nylo_fft; j <= nyhi_fft; j++) { - per_j = j - ny_pppm*(2*j/ny_pppm); - for (i = nxlo_fft; i <= nxhi_fft; i++) { - per_i = i - nx_pppm*(2*i/nx_pppm); - - double unitk_lamda[3]; - unitk_lamda[0] = 2.0*MY_PI*per_i; - unitk_lamda[1] = 2.0*MY_PI*per_j; - unitk_lamda[2] = 2.0*MY_PI*per_k; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - fkx[n] = unitk_lamda[0]; - fky[n] = unitk_lamda[1]; - fkz[n] = unitk_lamda[2]; - n++; - } - } - } - - // virial coefficients - - double sqk,vterm; - - for (n = 0; n < nfft; n++) { - sqk = fkx[n]*fkx[n] + fky[n]*fky[n] + fkz[n]*fkz[n]; - if (sqk == 0.0) { - vg[n][0] = 0.0; - vg[n][1] = 0.0; - vg[n][2] = 0.0; - vg[n][3] = 0.0; - vg[n][4] = 0.0; - vg[n][5] = 0.0; - } else { - vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); - vg[n][0] = 1.0 + vterm*fkx[n]*fkx[n]; - vg[n][1] = 1.0 + vterm*fky[n]*fky[n]; - vg[n][2] = 1.0 + vterm*fkz[n]*fkz[n]; - vg[n][3] = vterm*fkx[n]*fky[n]; - vg[n][4] = vterm*fkx[n]*fkz[n]; - vg[n][5] = vterm*fky[n]*fkz[n]; - } - } - - compute_gf_ik_triclinic(); -} - -/* ---------------------------------------------------------------------- - reset local grid arrays and communication stencils - called by fix balance b/c it changed sizes of processor sub-domains -------------------------------------------------------------------------- */ - -void PPPM::setup_grid() -{ - // free all arrays previously allocated - - deallocate(); - if (peratom_allocate_flag) deallocate_peratom(); - if (group_allocate_flag) deallocate_groups(); - - // reset portion of global grid that each proc owns - - set_grid_local(); - - // reallocate K-space dependent memory - // check if grid communication is now overlapping if not allowed - // don't invoke allocate peratom() or group(), will be allocated when needed - - allocate(); - - cg->ghost_notify(); - if (overlap_allowed == 0 && cg->ghost_overlap()) - error->all(FLERR,"PPPM grid stencil extends " - "beyond nearest neighbor processor"); - cg->setup(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - compute_gf_denom(); - if (differentiation_flag == 1) compute_sf_precoeff(); - compute_rho_coeff(); - - // pre-compute volume-dependent coeffs - - setup(); -} - -/* ---------------------------------------------------------------------- - compute the PPPM long-range force, energy, virial -------------------------------------------------------------------------- */ - -void PPPM::compute(int eflag, int vflag) -{ - int i,j; - - // set energy/virial flags - // invoke allocate_peratom() if needed for first time - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = evflag_atom = eflag_global = vflag_global = - eflag_atom = vflag_atom = 0; - - if (evflag_atom && !peratom_allocate_flag) { - allocate_peratom(); - cg_peratom->ghost_notify(); - cg_peratom->setup(); - } - - // convert atoms from box to lamda coords - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - - // extend size of per-atom arrays if necessary - - if (atom->nlocal > nmax) { - memory->destroy(part2grid); - nmax = atom->nmax; - memory->create(part2grid,nmax,3,"pppm:part2grid"); - } - - // find grid points for all my particles - // map my particle charge onto my local 3d density grid - - particle_map(); - make_rho(); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - cg->reverse_comm(this,REVERSE_RHO); - brick2fft(); - - // compute potential gradient on my FFT grid and - // portion of e_long on this proc's FFT grid - // return gradients (electric fields) in 3d brick decomposition - // also performs per-atom calculations via poisson_peratom() - - poisson(); - - // all procs communicate E-field values - // to fill ghost cells surrounding their 3d bricks - - if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD); - else cg->forward_comm(this,FORWARD_IK); - - // extra per-atom energy/virial communication - - if (evflag_atom) { - if (differentiation_flag == 1 && vflag_atom) - cg_peratom->forward_comm(this,FORWARD_AD_PERATOM); - else if (differentiation_flag == 0) - cg_peratom->forward_comm(this,FORWARD_IK_PERATOM); - } - - // calculate the force on my particles - - fieldforce(); - - // extra per-atom energy/virial communication - - if (evflag_atom) fieldforce_peratom(); - - // sum global energy across procs and add in volume-dependent term - - const double qscale = force->qqrd2e * scale; - - if (eflag_global) { - double energy_all; - MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy = energy_all; - - energy *= 0.5*volume; - energy -= g_ewald*qsqsum/MY_PIS + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy *= qscale; - } - - // sum global virial across procs - - if (vflag_global) { - double virial_all[6]; - MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; - } - - // per-atom energy/virial - // energy includes self-energy correction - // notal accounts for TIP4P tallying eatom/vatom for ghost atoms - - if (evflag_atom) { - double *q = atom->q; - int nlocal = atom->nlocal; - int ntotal = nlocal; - if (tip4pflag) ntotal += atom->nghost; - - if (eflag_atom) { - for (i = 0; i < nlocal; i++) { - eatom[i] *= 0.5; - eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / - (g_ewald*g_ewald*volume); - eatom[i] *= qscale; - } - for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5*qscale; - } - - if (vflag_atom) { - for (i = 0; i < ntotal; i++) - for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale; - } - } - - // 2d slab correction - - if (slabflag == 1) slabcorr(); - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); -} - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPM::allocate() -{ - memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_brick"); - - memory->create(density_fft,nfft_both,"pppm:density_fft"); - memory->create(greensfn,nfft_both,"pppm:greensfn"); - memory->create(work1,2*nfft_both,"pppm:work1"); - memory->create(work2,2*nfft_both,"pppm:work2"); - memory->create(vg,nfft_both,6,"pppm:vg"); - - if (triclinic == 0) { - memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx"); - memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky"); - memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz"); - } else { - memory->create(fkx,nfft_both,"pppm:fkx"); - memory->create(fky,nfft_both,"pppm:fky"); - memory->create(fkz,nfft_both,"pppm:fkz"); - } - - if (differentiation_flag == 1) { - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:u_brick"); - - memory->create(sf_precoeff1,nfft_both,"pppm:sf_precoeff1"); - memory->create(sf_precoeff2,nfft_both,"pppm:sf_precoeff2"); - memory->create(sf_precoeff3,nfft_both,"pppm:sf_precoeff3"); - memory->create(sf_precoeff4,nfft_both,"pppm:sf_precoeff4"); - memory->create(sf_precoeff5,nfft_both,"pppm:sf_precoeff5"); - memory->create(sf_precoeff6,nfft_both,"pppm:sf_precoeff6"); - - } else { - memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdx_brick"); - memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdy_brick"); - memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdz_brick"); - } - - // summation coeffs - - order_allocated = order; - if (!stagger_flag) memory->create(gf_b,order,"pppm:gf_b"); - memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); - memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d"); - memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); - memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2, - "pppm:drho_coeff"); - - // create 2 FFTs and a Remap - // 1st FFT keeps data in FFT decompostion - // 2nd FFT returns data in 3d brick decomposition - // remap takes data from 3d brick to FFT decomposition - - int tmp; - - fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 0,0,&tmp); - - fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - 0,0,&tmp); - - remap = new Remap(lmp,world, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_PRECISION); - - // create ghost grid object for rho and electric field communication - - int (*procneigh)[2] = comm->procneigh; - - if (differentiation_flag == 1) - cg = new CommGrid(lmp,world,1,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg = new CommGrid(lmp,world,3,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); -} - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPM::deallocate() -{ - memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); - - if (differentiation_flag == 1) { - memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy(sf_precoeff1); - memory->destroy(sf_precoeff2); - memory->destroy(sf_precoeff3); - memory->destroy(sf_precoeff4); - memory->destroy(sf_precoeff5); - memory->destroy(sf_precoeff6); - } else { - memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); - } - - memory->destroy(density_fft); - memory->destroy(greensfn); - memory->destroy(work1); - memory->destroy(work2); - memory->destroy(vg); - - if (triclinic == 0) { - memory->destroy1d_offset(fkx,nxlo_fft); - memory->destroy1d_offset(fky,nylo_fft); - memory->destroy1d_offset(fkz,nzlo_fft); - } else { - memory->destroy(fkx); - memory->destroy(fky); - memory->destroy(fkz); - } - - memory->destroy(gf_b); - if (stagger_flag) gf_b = NULL; - memory->destroy2d_offset(rho1d,-order_allocated/2); - memory->destroy2d_offset(drho1d,-order_allocated/2); - memory->destroy2d_offset(rho_coeff,(1-order_allocated)/2); - memory->destroy2d_offset(drho_coeff,(1-order_allocated)/2); - - delete fft1; - delete fft2; - delete remap; - delete cg; -} - -/* ---------------------------------------------------------------------- - allocate per-atom memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPM::allocate_peratom() -{ - peratom_allocate_flag = 1; - - if (differentiation_flag != 1) - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:u_brick"); - - memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v0_brick"); - - memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v1_brick"); - memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v2_brick"); - memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v3_brick"); - memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v4_brick"); - memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v5_brick"); - - // create ghost grid object for rho and electric field communication - - int (*procneigh)[2] = comm->procneigh; - - if (differentiation_flag == 1) - cg_peratom = - new CommGrid(lmp,world,6,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_peratom = - new CommGrid(lmp,world,7,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); -} - -/* ---------------------------------------------------------------------- - deallocate per-atom memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPM::deallocate_peratom() -{ - peratom_allocate_flag = 0; - - memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out); - - if (differentiation_flag != 1) - memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); - - delete cg_peratom; -} - -/* ---------------------------------------------------------------------- - set global size of PPPM grid = nx,ny,nz_pppm - used for charge accumulation, FFTs, and electric field interpolation -------------------------------------------------------------------------- */ - -void PPPM::set_grid_global() -{ - // use xprd,yprd,zprd (even if triclinic, and then scale later) - // adjust z dimension for 2d slab PPPM - // 3d PPPM just uses zprd since slab_volfactor = 1.0 - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - // make initial g_ewald estimate - // based on desired accuracy and real space cutoff - // fluid-occupied volume used to estimate real-space error - // zprd used rather than zprd_slab - - double h; - bigint natoms = atom->natoms; - - if (!gewaldflag) { - if (accuracy <= 0.0) - error->all(FLERR,"KSpace accuracy must be > 0"); - g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); - if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; - else g_ewald = sqrt(-log(g_ewald)) / cutoff; - } - - // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy - // nz_pppm uses extended zprd_slab instead of zprd - // reduce it until accuracy target is met - - if (!gridflag) { - - if (differentiation_flag == 1 || stagger_flag) { - - h = h_x = h_y = h_z = 4.0/g_ewald; - int count = 0; - while (1) { - - // set grid dimension - nx_pppm = static_cast (xprd/h_x); - ny_pppm = static_cast (yprd/h_y); - nz_pppm = static_cast (zprd_slab/h_z); - - if (nx_pppm <= 1) nx_pppm = 2; - if (ny_pppm <= 1) ny_pppm = 2; - if (nz_pppm <= 1) nz_pppm = 2; - - //set local grid dimension - int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft = 0; - nxhi_fft = nx_pppm - 1; - nylo_fft = me_y*ny_pppm/npey_fft; - nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; - nzlo_fft = me_z*nz_pppm/npez_fft; - nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - double df_kspace = compute_df_kspace(); - - count++; - - // break loop if the accuracy has been reached or - // too many loops have been performed - - if (df_kspace <= accuracy) break; - if (count > 500) error->all(FLERR, "Could not compute grid size"); - h *= 0.95; - h_x = h_y = h_z = h; - } - - } else { - - double err; - h_x = h_y = h_z = 1.0/g_ewald; - - nx_pppm = static_cast (xprd/h_x) + 1; - ny_pppm = static_cast (yprd/h_y) + 1; - nz_pppm = static_cast (zprd_slab/h_z) + 1; - - err = estimate_ik_error(h_x,xprd,natoms); - while (err > accuracy) { - err = estimate_ik_error(h_x,xprd,natoms); - nx_pppm++; - h_x = xprd/nx_pppm; - } - - err = estimate_ik_error(h_y,yprd,natoms); - while (err > accuracy) { - err = estimate_ik_error(h_y,yprd,natoms); - ny_pppm++; - h_y = yprd/ny_pppm; - } - - err = estimate_ik_error(h_z,zprd_slab,natoms); - while (err > accuracy) { - err = estimate_ik_error(h_z,zprd_slab,natoms); - nz_pppm++; - h_z = zprd_slab/nz_pppm; - } - } - - // scale grid for triclinic skew - - if (triclinic) { - double tmp[3]; - tmp[0] = nx_pppm/xprd; - tmp[1] = ny_pppm/yprd; - tmp[2] = nz_pppm/zprd; - lamda2xT(&tmp[0],&tmp[0]); - nx_pppm = static_cast(tmp[0]) + 1; - ny_pppm = static_cast(tmp[1]) + 1; - nz_pppm = static_cast(tmp[2]) + 1; - } - } - - // boost grid size until it is factorable - - while (!factorable(nx_pppm)) nx_pppm++; - while (!factorable(ny_pppm)) ny_pppm++; - while (!factorable(nz_pppm)) nz_pppm++; - - if (triclinic == 0) { - h_x = xprd/nx_pppm; - h_y = yprd/ny_pppm; - h_z = zprd_slab/nz_pppm; - } else { - double tmp[3]; - tmp[0] = nx_pppm; - tmp[1] = ny_pppm; - tmp[2] = nz_pppm; - x2lamdaT(&tmp[0],&tmp[0]); - h_x = 1.0/tmp[0]; - h_y = 1.0/tmp[1]; - h_z = 1.0/tmp[2]; - } - - if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) - error->all(FLERR,"PPPM grid is too large"); -} - -/* ---------------------------------------------------------------------- - check if all factors of n are in list of factors - return 1 if yes, 0 if no -------------------------------------------------------------------------- */ - -int PPPM::factorable(int n) -{ - int i; - - while (n > 1) { - for (i = 0; i < nfactors; i++) { - if (n % factors[i] == 0) { - n /= factors[i]; - break; - } - } - if (i == nfactors) return 0; - } - - return 1; -} - -/* ---------------------------------------------------------------------- - compute estimated kspace force error -------------------------------------------------------------------------- */ - -double PPPM::compute_df_kspace() -{ - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - double df_kspace = 0.0; - if (differentiation_flag == 1 || stagger_flag) { - double qopt = compute_qopt(); - df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); - } else { - double lprx = estimate_ik_error(h_x,xprd,natoms); - double lpry = estimate_ik_error(h_y,yprd,natoms); - double lprz = estimate_ik_error(h_z,zprd_slab,natoms); - df_kspace = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - } - return df_kspace; -} - -/* ---------------------------------------------------------------------- - compute qopt -------------------------------------------------------------------------- */ - -double PPPM::compute_qopt() -{ - double qopt = 0.0; - double *prd = domain->prd; - - const double xprd = prd[0]; - const double yprd = prd[1]; - const double zprd = prd[2]; - const double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - const double unitkx = (MY_2PI/xprd); - const double unitky = (MY_2PI/yprd); - const double unitkz = (MY_2PI/zprd_slab); - - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double u1, u2, sqk; - double sum1,sum2,sum3,sum4,dot2; - - int k,l,m,nx,ny,nz; - const int twoorder = 2*order; - - for (m = nzlo_fft; m <= nzhi_fft; m++) { - const int mper = m - nz_pppm*(2*m/nz_pppm); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - const int lper = l - ny_pppm*(2*l/ny_pppm); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - const int kper = k - nx_pppm*(2*k/nx_pppm); - - sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); - - if (sqk != 0.0) { - - sum1 = 0.0; - sum2 = 0.0; - sum3 = 0.0; - sum4 = 0.0; - for (nx = -2; nx <= 2; nx++) { - qx = unitkx*(kper+nx_pppm*nx); - sx = exp(-0.25*square(qx/g_ewald)); - argx = 0.5*qx*xprd/nx_pppm; - wx = powsinxx(argx,twoorder); - qx *= qx; - - for (ny = -2; ny <= 2; ny++) { - qy = unitky*(lper+ny_pppm*ny); - sy = exp(-0.25*square(qy/g_ewald)); - argy = 0.5*qy*yprd/ny_pppm; - wy = powsinxx(argy,twoorder); - qy *= qy; - - for (nz = -2; nz <= 2; nz++) { - qz = unitkz*(mper+nz_pppm*nz); - sz = exp(-0.25*square(qz/g_ewald)); - argz = 0.5*qz*zprd_slab/nz_pppm; - wz = powsinxx(argz,twoorder); - qz *= qz; - - dot2 = qx+qy+qz; - u1 = sx*sy*sz; - u2 = wx*wy*wz; - sum1 += u1*u1/dot2*MY_4PI*MY_4PI; - sum2 += u1 * u2 * MY_4PI; - sum3 += u2; - sum4 += dot2*u2; - } - } - } - sum2 *= sum2; - qopt += sum1 - sum2/(sum3*sum4); - } - } - } - } - double qopt_all; - MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); - return qopt_all; -} - -/* ---------------------------------------------------------------------- - estimate kspace force error for ik method -------------------------------------------------------------------------- */ - -double PPPM::estimate_ik_error(double h, double prd, bigint natoms) -{ - double sum = 0.0; - for (int m = 0; m < order; m++) - sum += acons[order][m] * pow(h*g_ewald,2.0*m); - double value = q2 * pow(h*g_ewald,(double)order) * - sqrt(g_ewald*prd*sqrt(MY_2PI)*sum/natoms) / (prd*prd); - - return value; -} - -/* ---------------------------------------------------------------------- - adjust the g_ewald parameter to near its optimal value - using a Newton-Raphson solver -------------------------------------------------------------------------- */ - -void PPPM::adjust_gewald() -{ - double dx; - - for (int i = 0; i < LARGE; i++) { - dx = newton_raphson_f() / derivf(); - g_ewald -= dx; - if (fabs(newton_raphson_f()) < SMALL) return; - } - - char str[128]; - sprintf(str, "Could not compute g_ewald"); - error->all(FLERR, str); -} - -/* ---------------------------------------------------------------------- - Calculate f(x) using Newton-Raphson solver - ------------------------------------------------------------------------- */ - -double PPPM::newton_raphson_f() -{ - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - bigint natoms = atom->natoms; - - double df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(natoms*cutoff*xprd*yprd*zprd); - - double df_kspace = compute_df_kspace(); - - return df_rspace - df_kspace; -} - -/* ---------------------------------------------------------------------- - Calculate numerical derivative f'(x) using forward difference - [f(x + h) - f(x)] / h - ------------------------------------------------------------------------- */ - -double PPPM::derivf() -{ - double h = 0.000001; //Derivative step-size - double df,f1,f2,g_ewald_old; - - f1 = newton_raphson_f(); - g_ewald_old = g_ewald; - g_ewald += h; - f2 = newton_raphson_f(); - g_ewald = g_ewald_old; - df = (f2 - f1)/h; - - return df; -} - -/* ---------------------------------------------------------------------- - Calculate the final estimate of the accuracy -------------------------------------------------------------------------- */ - -double PPPM::final_accuracy() -{ - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - - double df_kspace = compute_df_kspace(); - double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd); - double df_rspace = 2.0 * q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); - double df_table = estimate_table_accuracy(q2_over_sqrt,df_rspace); - double estimated_accuracy = sqrt(df_kspace*df_kspace + df_rspace*df_rspace + - df_table*df_table); - - return estimated_accuracy; -} - -/* ---------------------------------------------------------------------- - set local subset of PPPM/FFT grid that I own - n xyz lo/hi in = 3d brick that I own (inclusive) - n xyz lo/hi out = 3d brick + ghost cells in 6 directions (inclusive) - n xyz lo/hi fft = FFT columns that I own (all of x dim, 2d decomp in yz) -------------------------------------------------------------------------- */ - -void PPPM::set_grid_local() -{ - // global indices of PPPM grid range from 0 to N-1 - // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of - // global PPPM grid that I own without ghost cells - // for slab PPPM, assign z grid as if it were not extended - - nxlo_in = static_cast (comm->xsplit[comm->myloc[0]] * nx_pppm); - nxhi_in = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1; - - nylo_in = static_cast (comm->ysplit[comm->myloc[1]] * ny_pppm); - nyhi_in = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1; - - nzlo_in = static_cast - (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor); - nzhi_in = static_cast - (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1; - - // nlower,nupper = stencil size for mapping particles to PPPM grid - - nlower = -(order-1)/2; - nupper = order/2; - - // shift values for particle <-> grid mapping - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - if (order % 2) shift = OFFSET + 0.5; - else shift = OFFSET; - if (order % 2) shiftone = 0.0; - else shiftone = 0.5; - - // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of - // global PPPM grid that my particles can contribute charge to - // effectively nlo_in,nhi_in + ghost cells - // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest - // position a particle in my box can be at - // dist[3] = particle position bound = subbox + skin/2.0 + qdist - // qdist = offset due to TIP4P fictitious charge - // convert to triclinic if necessary - // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping - // for slab PPPM, assign z grid as if it were not extended - - double *prd,*sublo,*subhi; - - if (triclinic == 0) { - prd = domain->prd; - boxlo = domain->boxlo; - sublo = domain->sublo; - subhi = domain->subhi; - } else { - prd = domain->prd_lamda; - boxlo = domain->boxlo_lamda; - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double dist[3]; - double cuthalf = 0.5*neighbor->skin + qdist; - if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; - else kspacebbox(cuthalf,&dist[0]); - - int nlo,nhi; - - nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nxlo_out = nlo + nlower; - nxhi_out = nhi + nupper; - - nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nylo_out = nlo + nlower; - nyhi_out = nhi + nupper; - - nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nzlo_out = nlo + nlower; - nzhi_out = nhi + nupper; - - if (stagger_flag) { - nxhi_out++; - nyhi_out++; - nzhi_out++; - } - - // for slab PPPM, change the grid boundary for processors at +z end - // to include the empty volume between periodically repeating slabs - // for slab PPPM, want charge data communicated from -z proc to +z proc, - // but not vice versa, also want field data communicated from +z proc to - // -z proc, but not vice versa - // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) - // also insure no other procs use ghost cells beyond +z limit - - if (slabflag == 1) { - if (comm->myloc[2] == comm->procgrid[2]-1) - nzhi_in = nzhi_out = nz_pppm - 1; - nzhi_out = MIN(nzhi_out,nz_pppm-1); - } - - // decomposition of FFT mesh - // global indices range from 0 to N-1 - // proc owns entire x-dimension, clumps of columns in y,z dimensions - // npey_fft,npez_fft = # of procs in y,z dims - // if nprocs is small enough, proc can own 1 or more entire xy planes, - // else proc owns 2d sub-blocks of yz plane - // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions - // nlo_fft,nhi_fft = lower/upper limit of the section - // of the global FFT mesh that I own - - int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft = 0; - nxhi_fft = nx_pppm - 1; - nylo_fft = me_y*ny_pppm/npey_fft; - nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; - nzlo_fft = me_z*nz_pppm/npez_fft; - nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - // PPPM grid pts owned by this proc, including ghosts - - ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - - // FFT grids owned by this proc, without ghosts - // nfft = FFT points in FFT decomposition on this proc - // nfft_brick = FFT points in 3d brick-decomposition on this proc - // nfft_both = greater of 2 values - - nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * - (nzhi_fft-nzlo_fft+1); - int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * - (nzhi_in-nzlo_in+1); - nfft_both = MAX(nfft,nfft_brick); -} - -/* ---------------------------------------------------------------------- - pre-compute Green's function denominator expansion coeffs, Gamma(2n) -------------------------------------------------------------------------- */ - -void PPPM::compute_gf_denom() -{ - int k,l,m; - - for (l = 1; l < order; l++) gf_b[l] = 0.0; - gf_b[0] = 1.0; - - for (m = 1; m < order; m++) { - for (l = m; l > 0; l--) - gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1)); - gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5)); - } - - bigint ifact = 1; - for (k = 1; k < 2*order; k++) ifact *= k; - double gaminv = 1.0/ifact; - for (l = 0; l < order; l++) gf_b[l] *= gaminv; -} - -/* ---------------------------------------------------------------------- - pre-compute modified (Hockney-Eastwood) Coulomb Green's function -------------------------------------------------------------------------- */ - -void PPPM::compute_gf_ik() -{ - const double * const prd = domain->prd; - - const double xprd = prd[0]; - const double yprd = prd[1]; - const double zprd = prd[2]; - const double zprd_slab = zprd*slab_volfactor; - const double unitkx = (MY_2PI/xprd); - const double unitky = (MY_2PI/yprd); - const double unitkz = (MY_2PI/zprd_slab); - - double snx,sny,snz; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double sum1,dot1,dot2; - double numerator,denominator; - double sqk; - - int k,l,m,n,nx,ny,nz,kper,lper,mper; - - const int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * - pow(-log(EPS_HOC),0.25)); - const int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * - pow(-log(EPS_HOC),0.25)); - const int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * - pow(-log(EPS_HOC),0.25)); - const int twoorder = 2*order; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm)); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - sny = square(sin(0.5*unitky*lper*yprd/ny_pppm)); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm)); - - sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); - - if (sqk != 0.0) { - numerator = 12.5663706/sqk; - denominator = gf_denom(snx,sny,snz); - sum1 = 0.0; - - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm*nx); - sx = exp(-0.25*square(qx/g_ewald)); - argx = 0.5*qx*xprd/nx_pppm; - wx = powsinxx(argx,twoorder); - - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm*ny); - sy = exp(-0.25*square(qy/g_ewald)); - argy = 0.5*qy*yprd/ny_pppm; - wy = powsinxx(argy,twoorder); - - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm*nz); - sz = exp(-0.25*square(qz/g_ewald)); - argz = 0.5*qz*zprd_slab/nz_pppm; - wz = powsinxx(argz,twoorder); - - dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; - dot2 = qx*qx+qy*qy+qz*qz; - sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; - } - } - } - greensfn[n++] = numerator*sum1/denominator; - } else greensfn[n++] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - pre-compute modified (Hockney-Eastwood) Coulomb Green's function - for a triclinic system -------------------------------------------------------------------------- */ - -void PPPM::compute_gf_ik_triclinic() -{ - double snx,sny,snz; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double sum1,dot1,dot2; - double numerator,denominator; - double sqk; - - int k,l,m,n,nx,ny,nz,kper,lper,mper; - - double tmp[3]; - tmp[0] = (g_ewald/(MY_PI*nx_pppm)) * pow(-log(EPS_HOC),0.25); - tmp[1] = (g_ewald/(MY_PI*ny_pppm)) * pow(-log(EPS_HOC),0.25); - tmp[2] = (g_ewald/(MY_PI*nz_pppm)) * pow(-log(EPS_HOC),0.25); - lamda2xT(&tmp[0],&tmp[0]); - const int nbx = static_cast (tmp[0]); - const int nby = static_cast (tmp[1]); - const int nbz = static_cast (tmp[2]); - - const int twoorder = 2*order; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - snz = square(sin(MY_PI*mper/nz_pppm)); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - sny = square(sin(MY_PI*lper/ny_pppm)); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - snx = square(sin(MY_PI*kper/nx_pppm)); - - double unitk_lamda[3]; - unitk_lamda[0] = 2.0*MY_PI*kper; - unitk_lamda[1] = 2.0*MY_PI*lper; - unitk_lamda[2] = 2.0*MY_PI*mper; - x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); - - sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]); - - if (sqk != 0.0) { - numerator = 12.5663706/sqk; - denominator = gf_denom(snx,sny,snz); - sum1 = 0.0; - - for (nx = -nbx; nx <= nbx; nx++) { - argx = MY_PI*kper/nx_pppm + MY_PI*nx; - wx = powsinxx(argx,twoorder); - - for (ny = -nby; ny <= nby; ny++) { - argy = MY_PI*lper/ny_pppm + MY_PI*ny; - wy = powsinxx(argy,twoorder); - - for (nz = -nbz; nz <= nbz; nz++) { - argz = MY_PI*mper/nz_pppm + MY_PI*nz; - wz = powsinxx(argz,twoorder); - - double b[3]; - b[0] = 2.0*MY_PI*nx_pppm*nx; - b[1] = 2.0*MY_PI*ny_pppm*ny; - b[2] = 2.0*MY_PI*nz_pppm*nz; - x2lamdaT(&b[0],&b[0]); - - qx = unitk_lamda[0]+b[0]; - sx = exp(-0.25*square(qx/g_ewald)); - - qy = unitk_lamda[1]+b[1]; - sy = exp(-0.25*square(qy/g_ewald)); - - qz = unitk_lamda[2]+b[2]; - sz = exp(-0.25*square(qz/g_ewald)); - - dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz; - dot2 = qx*qx+qy*qy+qz*qz; - sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; - } - } - } - greensfn[n++] = numerator*sum1/denominator; - } else greensfn[n++] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - compute optimized Green's function for energy calculation -------------------------------------------------------------------------- */ - -void PPPM::compute_gf_ad() -{ - const double * const prd = domain->prd; - - const double xprd = prd[0]; - const double yprd = prd[1]; - const double zprd = prd[2]; - const double zprd_slab = zprd*slab_volfactor; - const double unitkx = (MY_2PI/xprd); - const double unitky = (MY_2PI/yprd); - const double unitkz = (MY_2PI/zprd_slab); - - double snx,sny,snz,sqk; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double numerator,denominator; - int k,l,m,n,kper,lper,mper; - - const int twoorder = 2*order; - - for (int i = 0; i < 6; i++) sf_coeff[i] = 0.0; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - qz = unitkz*mper; - snz = square(sin(0.5*qz*zprd_slab/nz_pppm)); - sz = exp(-0.25*square(qz/g_ewald)); - argz = 0.5*qz*zprd_slab/nz_pppm; - wz = powsinxx(argz,twoorder); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - qy = unitky*lper; - sny = square(sin(0.5*qy*yprd/ny_pppm)); - sy = exp(-0.25*square(qy/g_ewald)); - argy = 0.5*qy*yprd/ny_pppm; - wy = powsinxx(argy,twoorder); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - qx = unitkx*kper; - snx = square(sin(0.5*qx*xprd/nx_pppm)); - sx = exp(-0.25*square(qx/g_ewald)); - argx = 0.5*qx*xprd/nx_pppm; - wx = powsinxx(argx,twoorder); - - sqk = qx*qx + qy*qy + qz*qz; - - if (sqk != 0.0) { - numerator = MY_4PI/sqk; - denominator = gf_denom(snx,sny,snz); - greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator; - sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; - sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; - sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; - sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; - sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; - sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; - n++; - } else { - greensfn[n] = 0.0; - sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; - sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; - sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; - sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; - sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; - sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; - n++; - } - } - } - } - - // compute the coefficients for the self-force correction - - double prex, prey, prez; - prex = prey = prez = MY_PI/volume; - prex *= nx_pppm/xprd; - prey *= ny_pppm/yprd; - prez *= nz_pppm/zprd_slab; - sf_coeff[0] *= prex; - sf_coeff[1] *= prex*2; - sf_coeff[2] *= prey; - sf_coeff[3] *= prey*2; - sf_coeff[4] *= prez; - sf_coeff[5] *= prez*2; - - // communicate values with other procs - - double tmp[6]; - MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world); - for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n]; -} - -/* ---------------------------------------------------------------------- - compute self force coefficients for ad-differentiation scheme -------------------------------------------------------------------------- */ - -void PPPM::compute_sf_precoeff() -{ - int i,k,l,m,n; - int nx,ny,nz,kper,lper,mper; - double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5]; - double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2; - double u0,u1,u2,u3,u4,u5,u6; - double sum1,sum2,sum3,sum4,sum5,sum6; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - - sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0; - for (i = 0; i < 5; i++) { - - qx0 = MY_2PI*(kper+nx_pppm*(i-2)); - qx1 = MY_2PI*(kper+nx_pppm*(i-1)); - qx2 = MY_2PI*(kper+nx_pppm*(i )); - wx0[i] = powsinxx(0.5*qx0/nx_pppm,order); - wx1[i] = powsinxx(0.5*qx1/nx_pppm,order); - wx2[i] = powsinxx(0.5*qx2/nx_pppm,order); - - qy0 = MY_2PI*(lper+ny_pppm*(i-2)); - qy1 = MY_2PI*(lper+ny_pppm*(i-1)); - qy2 = MY_2PI*(lper+ny_pppm*(i )); - wy0[i] = powsinxx(0.5*qy0/ny_pppm,order); - wy1[i] = powsinxx(0.5*qy1/ny_pppm,order); - wy2[i] = powsinxx(0.5*qy2/ny_pppm,order); - - qz0 = MY_2PI*(mper+nz_pppm*(i-2)); - qz1 = MY_2PI*(mper+nz_pppm*(i-1)); - qz2 = MY_2PI*(mper+nz_pppm*(i )); - - wz0[i] = powsinxx(0.5*qz0/nz_pppm,order); - wz1[i] = powsinxx(0.5*qz1/nz_pppm,order); - wz2[i] = powsinxx(0.5*qz2/nz_pppm,order); - } - - for (nx = 0; nx < 5; nx++) { - for (ny = 0; ny < 5; ny++) { - for (nz = 0; nz < 5; nz++) { - u0 = wx0[nx]*wy0[ny]*wz0[nz]; - u1 = wx1[nx]*wy0[ny]*wz0[nz]; - u2 = wx2[nx]*wy0[ny]*wz0[nz]; - u3 = wx0[nx]*wy1[ny]*wz0[nz]; - u4 = wx0[nx]*wy2[ny]*wz0[nz]; - u5 = wx0[nx]*wy0[ny]*wz1[nz]; - u6 = wx0[nx]*wy0[ny]*wz2[nz]; - - sum1 += u0*u1; - sum2 += u0*u2; - sum3 += u0*u3; - sum4 += u0*u4; - sum5 += u0*u5; - sum6 += u0*u6; - } - } - } - - // store values - - sf_precoeff1[n] = sum1; - sf_precoeff2[n] = sum2; - sf_precoeff3[n] = sum3; - sf_precoeff4[n] = sum4; - sf_precoeff5[n] = sum5; - sf_precoeff6[n++] = sum6; - } - } - } -} - -/* ---------------------------------------------------------------------- - find center grid pt for each of my particles - check that full stencil for the particle will fit in my 3d brick - store central grid pt indices in part2grid array -------------------------------------------------------------------------- */ - -void PPPM::particle_map() -{ - int nx,ny,nz; - - double **x = atom->x; - int nlocal = atom->nlocal; - - int flag = 0; - for (int i = 0; i < nlocal; i++) { - - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // current particle coord can be outside global and local box - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; - ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; - nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; - - part2grid[i][0] = nx; - part2grid[i][1] = ny; - part2grid[i][2] = nz; - - // check that entire stencil around nx,ny,nz will fit in my 3d brick - - if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || - ny+nlower < nylo_out || ny+nupper > nyhi_out || - nz+nlower < nzlo_out || nz+nupper > nzhi_out) - flag = 1; - } - - if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid -------------------------------------------------------------------------- */ - -void PPPM::make_rho() -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density array - - memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - density_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - remap density from 3d brick decomposition to FFT decomposition -------------------------------------------------------------------------- */ - -void PPPM::brick2fft() -{ - int n,ix,iy,iz; - - // copy grabs inner portion of density from 3d brick - // remap could be done as pre-stage of FFT, - // but this works optimally on only double values, not complex values - - n = 0; - for (iz = nzlo_in; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_fft[n++] = density_brick[iz][iy][ix]; - - remap->perform(density_fft,density_fft,work1); -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver -------------------------------------------------------------------------- */ - -void PPPM::poisson() -{ - if (differentiation_flag == 1) poisson_ad(); - else poisson_ik(); -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for ik -------------------------------------------------------------------------- */ - -void PPPM::poisson_ik() -{ - int i,j,k,n; - double eng; - - // transform charge density (r -> k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] = density_fft[i]; - work1[n++] = ZEROF; - } - - fft1->compute(work1,work1,1); - - // global energy and virial contribution - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nfft; i++) { - eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; - if (eflag_global) energy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft; i++) { - energy += - s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] *= scaleinv * greensfn[i]; - work1[n++] *= scaleinv * greensfn[i]; - } - - // extra FFTs for per-atom energy/virial - - if (evflag_atom) poisson_peratom(); - - // triclinic system - - if (triclinic) { - poisson_ik_triclinic(); - return; - } - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fkx[i]*work1[n+1]; - work2[n+1] = -fkx[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdx_brick[k][j][i] = work2[n]; - n += 2; - } - - // y direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fky[j]*work1[n+1]; - work2[n+1] = -fky[j]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdy_brick[k][j][i] = work2[n]; - n += 2; - } - - // z direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fkz[k]*work1[n+1]; - work2[n+1] = -fkz[k]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdz_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for ik for a triclinic system -------------------------------------------------------------------------- */ - -void PPPM::poisson_ik_triclinic() -{ - int i,j,k,n; - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = fkx[i]*work1[n+1]; - work2[n+1] = -fkx[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdx_brick[k][j][i] = work2[n]; - n += 2; - } - - // y direction gradient - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = fky[i]*work1[n+1]; - work2[n+1] = -fky[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdy_brick[k][j][i] = work2[n]; - n += 2; - } - - // z direction gradient - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = fkz[i]*work1[n+1]; - work2[n+1] = -fkz[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdz_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for ad -------------------------------------------------------------------------- */ - -void PPPM::poisson_ad() -{ - int i,j,k,n; - double eng; - - // transform charge density (r -> k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] = density_fft[i]; - work1[n++] = ZEROF; - } - - fft1->compute(work1,work1,1); - - // global energy and virial contribution - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nfft; i++) { - eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; - if (eflag_global) energy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft; i++) { - energy += - s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] *= scaleinv * greensfn[i]; - work1[n++] *= scaleinv * greensfn[i]; - } - - // extra FFTs for per-atom energy/virial - - if (vflag_atom) poisson_peratom(); - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]; - work2[n+1] = work1[n+1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - u_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for per-atom energy/virial -------------------------------------------------------------------------- */ - -void PPPM::poisson_peratom() -{ - int i,j,k,n; - - // energy - - if (eflag_atom && differentiation_flag != 1) { - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]; - work2[n+1] = work1[n+1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - u_brick[k][j][i] = work2[n]; - n += 2; - } - } - - // 6 components of virial in v0 thru v5 - - if (!vflag_atom) return; - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][0]; - work2[n+1] = work1[n+1]*vg[i][0]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v0_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][1]; - work2[n+1] = work1[n+1]*vg[i][1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v1_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][2]; - work2[n+1] = work1[n+1]*vg[i][2]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v2_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][3]; - work2[n+1] = work1[n+1]*vg[i][3]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v3_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][4]; - work2[n+1] = work1[n+1]*vg[i][4]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v4_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][5]; - work2[n+1] = work1[n+1]*vg[i][5]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v5_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles -------------------------------------------------------------------------- */ - -void PPPM::fieldforce() -{ - if (differentiation_flag == 1) fieldforce_ad(); - else fieldforce_ik(); -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles for ik -------------------------------------------------------------------------- */ - -void PPPM::fieldforce_ik() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - ekx -= x0*vdx_brick[mz][my][mx]; - eky -= x0*vdy_brick[mz][my][mx]; - ekz -= x0*vdz_brick[mz][my][mx]; - } - } - } - - // convert E-field to force - - const double qfactor = force->qqrd2e * scale * q[i]; - f[i][0] += qfactor*ekx; - f[i][1] += qfactor*eky; - if (slabflag != 2) f[i][2] += qfactor*ekz; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles for ad -------------------------------------------------------------------------- */ - -void PPPM::fieldforce_ad() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz; - FFT_SCALAR ekx,eky,ekz; - double s1,s2,s3; - double sf = 0.0; - double *prd; - - prd = domain->prd; - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - - double hx_inv = nx_pppm/xprd; - double hy_inv = ny_pppm/yprd; - double hz_inv = nz_pppm/zprd; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - compute_drho1d(dx,dy,dz); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; - eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; - ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx]; - } - } - } - ekx *= hx_inv; - eky *= hy_inv; - ekz *= hz_inv; - - // convert E-field to force and substract self forces - - const double qfactor = force->qqrd2e * scale; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - sf = sf_coeff[0]*sin(2*MY_PI*s1); - sf += sf_coeff[1]*sin(4*MY_PI*s1); - sf *= 2*q[i]*q[i]; - f[i][0] += qfactor*(ekx*q[i] - sf); - - sf = sf_coeff[2]*sin(2*MY_PI*s2); - sf += sf_coeff[3]*sin(4*MY_PI*s2); - sf *= 2*q[i]*q[i]; - f[i][1] += qfactor*(eky*q[i] - sf); - - - sf = sf_coeff[4]*sin(2*MY_PI*s3); - sf += sf_coeff[5]*sin(4*MY_PI*s3); - sf *= 2*q[i]*q[i]; - if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get per-atom energy/virial -------------------------------------------------------------------------- */ - -void PPPM::fieldforce_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u,v0,v1,v2,v3,v4,v5; - - // loop over my charges, interpolate from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - if (eflag_atom) u += x0*u_brick[mz][my][mx]; - if (vflag_atom) { - v0 += x0*v0_brick[mz][my][mx]; - v1 += x0*v1_brick[mz][my][mx]; - v2 += x0*v2_brick[mz][my][mx]; - v3 += x0*v3_brick[mz][my][mx]; - v4 += x0*v4_brick[mz][my][mx]; - v5 += x0*v5_brick[mz][my][mx]; - } - } - } - } - - if (eflag_atom) eatom[i] += q[i]*u; - if (vflag_atom) { - vatom[i][0] += q[i]*v0; - vatom[i][1] += q[i]*v1; - vatom[i][2] += q[i]*v2; - vatom[i][3] += q[i]*v3; - vatom[i][4] += q[i]*v4; - vatom[i][5] += q[i]*v5; - } - } -} - -/* ---------------------------------------------------------------------- - pack own values to buf to send to another proc -------------------------------------------------------------------------- */ - -void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - if (flag == FORWARD_IK) { - FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - buf[n++] = xsrc[list[i]]; - buf[n++] = ysrc[list[i]]; - buf[n++] = zsrc[list[i]]; - } - } else if (flag == FORWARD_AD) { - FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - } else if (flag == FORWARD_IK_PERATOM) { - FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) buf[n++] = esrc[list[i]]; - if (vflag_atom) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } - } else if (flag == FORWARD_AD_PERATOM) { - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } -} - -/* ---------------------------------------------------------------------- - unpack another proc's own values from buf and set own ghost values -------------------------------------------------------------------------- */ - -void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - if (flag == FORWARD_IK) { - FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - xdest[list[i]] = buf[n++]; - ydest[list[i]] = buf[n++]; - zdest[list[i]] = buf[n++]; - } - } else if (flag == FORWARD_AD) { - FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - dest[list[i]] = buf[i]; - } else if (flag == FORWARD_IK_PERATOM) { - FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) esrc[list[i]] = buf[n++]; - if (vflag_atom) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } - } else if (flag == FORWARD_AD_PERATOM) { - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } -} - -/* ---------------------------------------------------------------------- - pack ghost values into buf to send to another proc -------------------------------------------------------------------------- */ - -void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - if (flag == REVERSE_RHO) { - FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - } -} - -/* ---------------------------------------------------------------------- - unpack another proc's ghost values from buf and add to own values -------------------------------------------------------------------------- */ - -void PPPM::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - if (flag == REVERSE_RHO) { - FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - dest[list[i]] += buf[i]; - } -} - -/* ---------------------------------------------------------------------- - map nprocs to NX by NY grid as PX by PY procs - return optimal px,py -------------------------------------------------------------------------- */ - -void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) -{ - // loop thru all possible factorizations of nprocs - // surf = surface area of largest proc sub-domain - // innermost if test minimizes surface area and surface/volume ratio - - int bestsurf = 2 * (nx + ny); - int bestboxx = 0; - int bestboxy = 0; - - int boxx,boxy,surf,ipx,ipy; - - ipx = 1; - while (ipx <= nprocs) { - if (nprocs % ipx == 0) { - ipy = nprocs/ipx; - boxx = nx/ipx; - if (nx % ipx) boxx++; - boxy = ny/ipy; - if (ny % ipy) boxy++; - surf = boxx + boxy; - if (surf < bestsurf || - (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { - bestsurf = surf; - bestboxx = boxx; - bestboxy = boxy; - *px = ipx; - *py = ipy; - } - } - ipx++; - } -} - -/* ---------------------------------------------------------------------- - charge assignment into rho1d - dx,dy,dz = distance of particle from "lower left" grid point -------------------------------------------------------------------------- */ - -void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz) -{ - int k,l; - FFT_SCALAR r1,r2,r3; - - for (k = (1-order)/2; k <= order/2; k++) { - r1 = r2 = r3 = ZEROF; - - for (l = order-1; l >= 0; l--) { - r1 = rho_coeff[l][k] + r1*dx; - r2 = rho_coeff[l][k] + r2*dy; - r3 = rho_coeff[l][k] + r3*dz; - } - rho1d[0][k] = r1; - rho1d[1][k] = r2; - rho1d[2][k] = r3; - } -} - -/* ---------------------------------------------------------------------- - charge assignment into drho1d - dx,dy,dz = distance of particle from "lower left" grid point -------------------------------------------------------------------------- */ - -void PPPM::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz) -{ - int k,l; - FFT_SCALAR r1,r2,r3; - - for (k = (1-order)/2; k <= order/2; k++) { - r1 = r2 = r3 = ZEROF; - - for (l = order-2; l >= 0; l--) { - r1 = drho_coeff[l][k] + r1*dx; - r2 = drho_coeff[l][k] + r2*dy; - r3 = drho_coeff[l][k] + r3*dz; - } - drho1d[0][k] = r1; - drho1d[1][k] = r2; - drho1d[2][k] = r3; - } -} - -/* ---------------------------------------------------------------------- - generate coeffients for the weight function of order n - - (n-1) - Wn(x) = Sum wn(k,x) , Sum is over every other integer - k=-(n-1) - For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 - k is odd integers if n is even and even integers if n is odd - --- - | n-1 - | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 - wn(k,x) = < l=0 - | - | 0 otherwise - --- - a coeffients are packed into the array rho_coeff to eliminate zeros - rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) -------------------------------------------------------------------------- */ - -void PPPM::compute_rho_coeff() -{ - int j,k,l,m; - FFT_SCALAR s; - - FFT_SCALAR **a; - memory->create2d_offset(a,order,-order,order,"pppm:a"); - - for (k = -order; k <= order; k++) - for (l = 0; l < order; l++) - a[l][k] = 0.0; - - a[0][0] = 1.0; - for (j = 1; j < order; j++) { - for (k = -j; k <= j; k += 2) { - s = 0.0; - for (l = 0; l < j; l++) { - a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); -#ifdef FFT_SINGLE - s += powf(0.5,(float) l+1) * - (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); -#else - s += pow(0.5,(double) l+1) * - (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); -#endif - } - a[0][k] = s; - } - } - - m = (1-order)/2; - for (k = -(order-1); k < order; k += 2) { - for (l = 0; l < order; l++) - rho_coeff[l][m] = a[l][k]; - for (l = 1; l < order; l++) - drho_coeff[l-1][m] = l*a[l][k]; - m++; - } - - memory->destroy2d_offset(a,-order); -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void PPPM::slabcorr() -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int nlocal = atom->nlocal; - - double dipole = 0.0; - for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // need to make non-neutral systems and/or - // per-atom energy translationally invariant - - double dipole_r2 = 0.0; - if (eflag_atom || fabs(qsum) > SMALL) { - for (int i = 0; i < nlocal; i++) - dipole_r2 += q[i]*x[i][2]*x[i][2]; - - // sum local contributions - - double tmp; - MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2 = tmp; - } - - // compute corrections - - const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - - qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; - const double qscale = force->qqrd2e * scale; - - if (eflag_global) energy += qscale * e_slabcorr; - - // per-atom energy - - if (eflag_atom) { - double efact = qscale * MY_2PI/volume; - for (int i = 0; i < nlocal; i++) - eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + - qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); - } - - // add on force corrections - - double ffact = qscale * (-4.0*MY_PI/volume); - double **f = atom->f; - - for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); -} - -/* ---------------------------------------------------------------------- - perform and time the 1d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPM::timing_1d(int n, double &time1d) -{ - double time1,time2; - - for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - for (int i = 0; i < n; i++) { - fft1->timing1d(work1,nfft_both,1); - fft2->timing1d(work1,nfft_both,-1); - if (differentiation_flag != 1) { - fft2->timing1d(work1,nfft_both,-1); - fft2->timing1d(work1,nfft_both,-1); - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time1d = time2 - time1; - - if (differentiation_flag) return 2; - return 4; -} - -/* ---------------------------------------------------------------------- - perform and time the 3d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPM::timing_3d(int n, double &time3d) -{ - double time1,time2; - - for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - for (int i = 0; i < n; i++) { - fft1->compute(work1,work1,1); - fft2->compute(work1,work1,-1); - if (differentiation_flag != 1) { - fft2->compute(work1,work1,-1); - fft2->compute(work1,work1,-1); - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time3d = time2 - time1; - - if (differentiation_flag) return 2; - return 4; -} - -/* ---------------------------------------------------------------------- - memory usage of local arrays -------------------------------------------------------------------------- */ - -double PPPM::memory_usage() -{ - double bytes = nmax*3 * sizeof(double); - int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - if (differentiation_flag == 1) { - bytes += 2 * nbrick * sizeof(FFT_SCALAR); - } else { - bytes += 4 * nbrick * sizeof(FFT_SCALAR); - } - if (triclinic) bytes += 3 * nfft_both * sizeof(double); - bytes += 6 * nfft_both * sizeof(double); - bytes += nfft_both * sizeof(double); - bytes += nfft_both*5 * sizeof(FFT_SCALAR); - - if (peratom_allocate_flag) - bytes += 6 * nbrick * sizeof(FFT_SCALAR); - - if (group_allocate_flag) { - bytes += 2 * nbrick * sizeof(FFT_SCALAR); - bytes += 2 * nfft_both * sizeof(FFT_SCALAR);; - } - - bytes += cg->memory_usage(); - - return bytes; -} - -/* ---------------------------------------------------------------------- - group-group interactions - ------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - compute the PPPM total long-range force and energy for groups A and B - ------------------------------------------------------------------------- */ - -void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag) -{ - if (slabflag && triclinic) - error->all(FLERR,"Cannot (yet) use K-space slab " - "correction with compute group/group for triclinic systems"); - - if (differentiation_flag) - error->all(FLERR,"Cannot (yet) use kspace_modify " - "diff ad with compute group/group"); - - if (!group_allocate_flag) allocate_groups(); - - // convert atoms from box to lamda coords - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - - e2group = 0.0; //energy - f2group[0] = 0.0; //force in x-direction - f2group[1] = 0.0; //force in y-direction - f2group[2] = 0.0; //force in z-direction - - // map my particle charge onto my local 3d density grid - - make_rho_groups(groupbit_A,groupbit_B,AA_flag); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - // temporarily store and switch pointers so we can - // use brick2fft() for groups A and B (without - // writing an additional function) - - FFT_SCALAR ***density_brick_real = density_brick; - FFT_SCALAR *density_fft_real = density_fft; - - // group A - - density_brick = density_A_brick; - density_fft = density_A_fft; - - cg->reverse_comm(this,REVERSE_RHO); - brick2fft(); - - // group B - - density_brick = density_B_brick; - density_fft = density_B_fft; - - cg->reverse_comm(this,REVERSE_RHO); - brick2fft(); - - // switch back pointers - - density_brick = density_brick_real; - density_fft = density_fft_real; - - // compute potential gradient on my FFT grid and - // portion of group-group energy/force on this proc's FFT grid - - poisson_groups(AA_flag); - - const double qscale = force->qqrd2e * scale; - - // total group A <--> group B energy - // self and boundary correction terms are in compute_group_group.cpp - - double e2group_all; - MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world); - e2group = e2group_all; - - e2group *= qscale*0.5*volume; - - // total group A <--> group B force - - double f2group_all[3]; - MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world); - - f2group[0] = qscale*volume*f2group_all[0]; - f2group[1] = qscale*volume*f2group_all[1]; - if (slabflag != 2) f2group[2] = qscale*volume*f2group_all[2]; - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); - - if (slabflag == 1) - slabcorr_groups(groupbit_A, groupbit_B, AA_flag); -} - -/* ---------------------------------------------------------------------- - allocate group-group memory that depends on # of K-vectors and order - ------------------------------------------------------------------------- */ - -void PPPM::allocate_groups() -{ - group_allocate_flag = 1; - - memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_A_brick"); - memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_B_brick"); - memory->create(density_A_fft,nfft_both,"pppm:density_A_fft"); - memory->create(density_B_fft,nfft_both,"pppm:density_B_fft"); -} - -/* ---------------------------------------------------------------------- - deallocate group-group memory that depends on # of K-vectors and order - ------------------------------------------------------------------------- */ - -void PPPM::deallocate_groups() -{ - group_allocate_flag = 0; - - memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy(density_A_fft); - memory->destroy(density_B_fft); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid for group-group interactions - ------------------------------------------------------------------------- */ - -void PPPM::make_rho_groups(int groupbit_A, int groupbit_B, int AA_flag) -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density arrays - - memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - int *mask = atom->mask; - - for (int i = 0; i < nlocal; i++) { - - if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) - if (AA_flag) continue; - - if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - - // group A - - if (mask[i] & groupbit_A) - density_A_brick[mz][my][mx] += x0*rho1d[0][l]; - - // group B - - if (mask[i] & groupbit_B) - density_B_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for group-group interactions - ------------------------------------------------------------------------- */ - -void PPPM::poisson_groups(int AA_flag) -{ - int i,j,k,n; - - // reuse memory (already declared) - - FFT_SCALAR *work_A = work1; - FFT_SCALAR *work_B = work2; - - // transform charge density (r -> k) - - // group A - - n = 0; - for (i = 0; i < nfft; i++) { - work_A[n++] = density_A_fft[i]; - work_A[n++] = ZEROF; - } - - fft1->compute(work_A,work_A,1); - - // group B - - n = 0; - for (i = 0; i < nfft; i++) { - work_B[n++] = density_B_fft[i]; - work_B[n++] = ZEROF; - } - - fft1->compute(work_B,work_B,1); - - // group-group energy and force contribution, - // keep everything in reciprocal space so - // no inverse FFTs needed - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - // energy - - n = 0; - for (i = 0; i < nfft; i++) { - e2group += s2 * greensfn[i] * - (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]); - n += 2; - } - - if (AA_flag) return; - - - // multiply by Green's function and s2 - // (only for work_A so it is not squared below) - - n = 0; - for (i = 0; i < nfft; i++) { - work_A[n++] *= s2 * greensfn[i]; - work_A[n++] *= s2 * greensfn[i]; - } - - // triclinic system - - if (triclinic) { - poisson_groups_triclinic(); - return; - } - - double partial_group; - - // force, x direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[0] += fkx[i] * partial_group; - n += 2; - } - - // force, y direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[1] += fky[j] * partial_group; - n += 2; - } - - // force, z direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[2] += fkz[k] * partial_group; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for group-group interactions - for a triclinic system - ------------------------------------------------------------------------- */ - -void PPPM::poisson_groups_triclinic() -{ - int i,j,k,n; - - // reuse memory (already declared) - - FFT_SCALAR *work_A = work1; - FFT_SCALAR *work_B = work2; - - double partial_group; - - // force, x direction - - n = 0; - for (i = 0; i < nfft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[0] += fkx[i] * partial_group; - n += 2; - } - - // force, y direction - - n = 0; - for (i = 0; i < nfft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[1] += fky[i] * partial_group; - n += 2; - } - - // force, z direction - - n = 0; - for (i = 0; i < nfft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[2] += fkz[i] * partial_group; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void PPPM::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag) -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int *mask = atom->mask; - int nlocal = atom->nlocal; - - double qsum_A = 0.0; - double qsum_B = 0.0; - double dipole_A = 0.0; - double dipole_B = 0.0; - double dipole_r2_A = 0.0; - double dipole_r2_B = 0.0; - - for (int i = 0; i < nlocal; i++) { - if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) - if (AA_flag) continue; - - if (mask[i] & groupbit_A) { - qsum_A += q[i]; - dipole_A += q[i]*x[i][2]; - dipole_r2_A += q[i]*x[i][2]*x[i][2]; - } - - if (mask[i] & groupbit_B) { - qsum_B += q[i]; - dipole_B += q[i]*x[i][2]; - dipole_r2_B += q[i]*x[i][2]*x[i][2]; - } - } - - // sum local contributions to get total charge and global dipole moment - // for each group - - double tmp; - MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum_A = tmp; - - MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum_B = tmp; - - MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_A = tmp; - - MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_B = tmp; - - MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2_A = tmp; - - MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2_B = tmp; - - // compute corrections - - const double qscale = force->qqrd2e * scale; - const double efact = qscale * MY_2PI/volume; - - e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B + - qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0); - - // add on force corrections - - const double ffact = qscale * (-4.0*MY_PI/volume); - f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A); -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) + per-atom energy/virial & group/group energy/force added by Stan Moore (BYU) + analytic diff (2 FFT) option added by Rolf Isele-Holder (Aachen University) + triclinic added by Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "string.h" +#include "stdio.h" +#include "stdlib.h" +#include "math.h" +#include "pppm.h" +#include "atom.h" +#include "comm.h" +#include "commgrid.h" +#include "neighbor.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "domain.h" +#include "fft3d_wrap.h" +#include "remap_wrap.h" +#include "memory.h" +#include "error.h" + +#include "math_const.h" +#include "math_special.h" + +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace MathSpecial; + +#define MAXORDER 7 +#define OFFSET 16384 +#define SMALL 0.00001 +#define LARGE 10000.0 +#define EPS_HOC 1.0e-7 + +enum{REVERSE_RHO}; +enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM}; + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) +{ + if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command"); + + pppmflag = 1; + group_group_enable = 1; + + accuracy_relative = fabs(force->numeric(FLERR,arg[0])); + + nfactors = 3; + factors = new int[nfactors]; + factors[0] = 2; + factors[1] = 3; + factors[2] = 5; + + MPI_Comm_rank(world,&me); + MPI_Comm_size(world,&nprocs); + + density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; + density_fft = NULL; + u_brick = NULL; + v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; + greensfn = NULL; + work1 = work2 = NULL; + vg = NULL; + fkx = fky = fkz = NULL; + + sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = + sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL; + + density_A_brick = density_B_brick = NULL; + density_A_fft = density_B_fft = NULL; + + gf_b = NULL; + rho1d = rho_coeff = drho1d = drho_coeff = NULL; + + fft1 = fft2 = NULL; + remap = NULL; + cg = NULL; + cg_peratom = NULL; + + nmax = 0; + part2grid = NULL; + + peratom_allocate_flag = 0; + group_allocate_flag = 0; + + // define acons coefficients for estimation of kspace errors + // see JCP 109, pg 7698 for derivation of coefficients + // higher order coefficients may be computed if needed + + memory->create(acons,8,7,"pppm:acons"); + acons[1][0] = 2.0 / 3.0; + acons[2][0] = 1.0 / 50.0; + acons[2][1] = 5.0 / 294.0; + acons[3][0] = 1.0 / 588.0; + acons[3][1] = 7.0 / 1440.0; + acons[3][2] = 21.0 / 3872.0; + acons[4][0] = 1.0 / 4320.0; + acons[4][1] = 3.0 / 1936.0; + acons[4][2] = 7601.0 / 2271360.0; + acons[4][3] = 143.0 / 28800.0; + acons[5][0] = 1.0 / 23232.0; + acons[5][1] = 7601.0 / 13628160.0; + acons[5][2] = 143.0 / 69120.0; + acons[5][3] = 517231.0 / 106536960.0; + acons[5][4] = 106640677.0 / 11737571328.0; + acons[6][0] = 691.0 / 68140800.0; + acons[6][1] = 13.0 / 57600.0; + acons[6][2] = 47021.0 / 35512320.0; + acons[6][3] = 9694607.0 / 2095994880.0; + acons[6][4] = 733191589.0 / 59609088000.0; + acons[6][5] = 326190917.0 / 11700633600.0; + acons[7][0] = 1.0 / 345600.0; + acons[7][1] = 3617.0 / 35512320.0; + acons[7][2] = 745739.0 / 838397952.0; + acons[7][3] = 56399353.0 / 12773376000.0; + acons[7][4] = 25091609.0 / 1560084480.0; + acons[7][5] = 1755948832039.0 / 36229939200000.0; + acons[7][6] = 4887769399.0 / 37838389248.0; +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +PPPM::~PPPM() +{ + delete [] factors; + deallocate(); + if (peratom_allocate_flag) deallocate_peratom(); + if (group_allocate_flag) deallocate_groups(); + memory->destroy(part2grid); + memory->destroy(acons); +} + +/* ---------------------------------------------------------------------- + called once before run +------------------------------------------------------------------------- */ + +void PPPM::init() +{ + if (me == 0) { + if (screen) fprintf(screen,"PPPM initialization ...\n"); + if (logfile) fprintf(logfile,"PPPM initialization ...\n"); + } + + // error check + + triclinic_check(); + if (domain->triclinic && differentiation_flag == 1) + error->all(FLERR,"Cannot (yet) use PPPM with triclinic box " + "and kspace_modify diff ad"); + if (domain->triclinic && slabflag) + error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and " + "slab correction"); + if (domain->dimension == 2) error->all(FLERR, + "Cannot use PPPM with 2d simulation"); + + if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM"); + if (slabflag) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab PPPM"); + } + + if (order < 2 || order > MAXORDER) { + char str[128]; + sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER); + error->all(FLERR,str); + } + + // extract short-range Coulombic cutoff from pair style + + triclinic = domain->triclinic; + scale = 1.0; + + pair_check(); + + int itmp = 0; + double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); + if (p_cutoff == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + cutoff = *p_cutoff; + + // if kspace is TIP4P, extract TIP4P params from pair style + // bond/angle are not yet init(), so insure equilibrium request is valid + + qdist = 0.0; + + if (tip4pflag) { + double *p_qdist = (double *) force->pair->extract("qdist",itmp); + int *p_typeO = (int *) force->pair->extract("typeO",itmp); + int *p_typeH = (int *) force->pair->extract("typeH",itmp); + int *p_typeA = (int *) force->pair->extract("typeA",itmp); + int *p_typeB = (int *) force->pair->extract("typeB",itmp); + if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + qdist = *p_qdist; + typeO = *p_typeO; + typeH = *p_typeH; + int typeA = *p_typeA; + int typeB = *p_typeB; + + if (force->angle == NULL || force->bond == NULL || + force->angle->setflag == NULL || force->bond->setflag == NULL) + error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); + if (typeA < 1 || typeA > atom->nangletypes || + force->angle->setflag[typeA] == 0) + error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P"); + if (typeB < 1 || typeB > atom->nbondtypes || + force->bond->setflag[typeB] == 0) + error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P"); + double theta = force->angle->equilibrium_angle(typeA); + double blen = force->bond->equilibrium_distance(typeB); + alpha = qdist / (cos(0.5*theta) * blen); + if (domain->triclinic) + error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and TIP4P"); + } + + // compute qsum & qsqsum and warn if not charge-neutral + + qsum = qsqsum = 0.0; + for (int i = 0; i < atom->nlocal; i++) { + qsum += atom->q[i]; + qsqsum += atom->q[i]*atom->q[i]; + } + + double tmp; + MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum = tmp; + MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsqsum = tmp; + q2 = qsqsum * force->qqrd2e; + + if (qsqsum == 0.0) + error->all(FLERR,"Cannot use kspace solver on system with no charge"); + if (fabs(qsum) > SMALL && me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // free all arrays previously allocated + + deallocate(); + if (peratom_allocate_flag) deallocate_peratom(); + if (group_allocate_flag) deallocate_groups(); + + // setup FFT grid resolution and g_ewald + // normally one iteration thru while loop is all that is required + // if grid stencil does not extend beyond neighbor proc + // or overlap is allowed, then done + // else reduce order and try again + + int (*procneigh)[2] = comm->procneigh; + + CommGrid *cgtmp = NULL; + int iteration = 0; + + while (order >= minorder) { + if (iteration && me == 0) + error->warning(FLERR,"Reducing PPPM order b/c stencil extends " + "beyond nearest neighbor processor"); + + if (stagger_flag && !differentiation_flag) compute_gf_denom(); + set_grid_global(); + set_grid_local(); + if (overlap_allowed) break; + + cgtmp = new CommGrid(lmp,world,1,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + cgtmp->ghost_notify(); + if (!cgtmp->ghost_overlap()) break; + delete cgtmp; + + order--; + iteration++; + } + + if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order"); + if (!overlap_allowed && cgtmp->ghost_overlap()) + error->all(FLERR,"PPPM grid stencil extends " + "beyond nearest neighbor processor"); + if (cgtmp) delete cgtmp; + + // adjust g_ewald + + if (!gewaldflag) adjust_gewald(); + + // calculate the final accuracy + + double estimated_accuracy = final_accuracy(); + + // print stats + + int ngrid_max,nfft_both_max; + MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); + + if (me == 0) { + +#ifdef FFT_SINGLE + const char fft_prec[] = "single"; +#else + const char fft_prec[] = "double"; +#endif + + if (screen) { + fprintf(screen," G vector (1/distance) = %g\n",g_ewald); + fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(screen," stencil order = %d\n",order); + fprintf(screen," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(screen," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(screen," using %s precision FFTs\n",fft_prec); + fprintf(screen," 3d grid and FFT values/proc = %d %d\n", + ngrid_max,nfft_both_max); + } + if (logfile) { + fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); + fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(logfile," stencil order = %d\n",order); + fprintf(logfile," estimated absolute RMS force accuracy = %g\n", + estimated_accuracy); + fprintf(logfile," estimated relative force accuracy = %g\n", + estimated_accuracy/two_charge_force); + fprintf(logfile," using %s precision FFTs\n",fft_prec); + fprintf(logfile," 3d grid and FFT values/proc = %d %d\n", + ngrid_max,nfft_both_max); + } + } + + // allocate K-space dependent memory + // don't invoke allocate peratom() or group(), will be allocated when needed + + allocate(); + cg->ghost_notify(); + cg->setup(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + compute_gf_denom(); + if (differentiation_flag == 1) compute_sf_precoeff(); + compute_rho_coeff(); +} + +/* ---------------------------------------------------------------------- + adjust PPPM coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void PPPM::setup() +{ + if (triclinic) { + setup_triclinic(); + return; + } + + int i,j,k,n; + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + delxinv = nx_pppm/xprd; + delyinv = ny_pppm/yprd; + delzinv = nz_pppm/zprd_slab; + + delvolinv = delxinv*delyinv*delzinv; + + double unitkx = (MY_2PI/xprd); + double unitky = (MY_2PI/yprd); + double unitkz = (MY_2PI/zprd_slab); + + // fkx,fky,fkz for my FFT grid pts + + double per; + + for (i = nxlo_fft; i <= nxhi_fft; i++) { + per = i - nx_pppm*(2*i/nx_pppm); + fkx[i] = unitkx*per; + } + + for (i = nylo_fft; i <= nyhi_fft; i++) { + per = i - ny_pppm*(2*i/ny_pppm); + fky[i] = unitky*per; + } + + for (i = nzlo_fft; i <= nzhi_fft; i++) { + per = i - nz_pppm*(2*i/nz_pppm); + fkz[i] = unitkz*per; + } + + // virial coefficients + + double sqk,vterm; + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) { + for (j = nylo_fft; j <= nyhi_fft; j++) { + for (i = nxlo_fft; i <= nxhi_fft; i++) { + sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; + if (sqk == 0.0) { + vg[n][0] = 0.0; + vg[n][1] = 0.0; + vg[n][2] = 0.0; + vg[n][3] = 0.0; + vg[n][4] = 0.0; + vg[n][5] = 0.0; + } else { + vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); + vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; + vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; + vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; + vg[n][3] = vterm*fkx[i]*fky[j]; + vg[n][4] = vterm*fkx[i]*fkz[k]; + vg[n][5] = vterm*fky[j]*fkz[k]; + } + n++; + } + } + } + + if (differentiation_flag == 1) compute_gf_ad(); + else compute_gf_ik(); +} + +/* ---------------------------------------------------------------------- + adjust PPPM coeffs, called initially and whenever volume has changed + for a triclinic system +------------------------------------------------------------------------- */ + +void PPPM::setup_triclinic() +{ + int i,j,k,n; + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + prd = domain->prd; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + // use lamda (0-1) coordinates + + delxinv = nx_pppm; + delyinv = ny_pppm; + delzinv = nz_pppm; + delvolinv = delxinv*delyinv*delzinv/volume; + + // fkx,fky,fkz for my FFT grid pts + + double per_i,per_j,per_k; + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) { + per_k = k - nz_pppm*(2*k/nz_pppm); + for (j = nylo_fft; j <= nyhi_fft; j++) { + per_j = j - ny_pppm*(2*j/ny_pppm); + for (i = nxlo_fft; i <= nxhi_fft; i++) { + per_i = i - nx_pppm*(2*i/nx_pppm); + + double unitk_lamda[3]; + unitk_lamda[0] = 2.0*MY_PI*per_i; + unitk_lamda[1] = 2.0*MY_PI*per_j; + unitk_lamda[2] = 2.0*MY_PI*per_k; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + fkx[n] = unitk_lamda[0]; + fky[n] = unitk_lamda[1]; + fkz[n] = unitk_lamda[2]; + n++; + } + } + } + + // virial coefficients + + double sqk,vterm; + + for (n = 0; n < nfft; n++) { + sqk = fkx[n]*fkx[n] + fky[n]*fky[n] + fkz[n]*fkz[n]; + if (sqk == 0.0) { + vg[n][0] = 0.0; + vg[n][1] = 0.0; + vg[n][2] = 0.0; + vg[n][3] = 0.0; + vg[n][4] = 0.0; + vg[n][5] = 0.0; + } else { + vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); + vg[n][0] = 1.0 + vterm*fkx[n]*fkx[n]; + vg[n][1] = 1.0 + vterm*fky[n]*fky[n]; + vg[n][2] = 1.0 + vterm*fkz[n]*fkz[n]; + vg[n][3] = vterm*fkx[n]*fky[n]; + vg[n][4] = vterm*fkx[n]*fkz[n]; + vg[n][5] = vterm*fky[n]*fkz[n]; + } + } + + compute_gf_ik_triclinic(); +} + +/* ---------------------------------------------------------------------- + reset local grid arrays and communication stencils + called by fix balance b/c it changed sizes of processor sub-domains +------------------------------------------------------------------------- */ + +void PPPM::setup_grid() +{ + // free all arrays previously allocated + + deallocate(); + if (peratom_allocate_flag) deallocate_peratom(); + if (group_allocate_flag) deallocate_groups(); + + // reset portion of global grid that each proc owns + + set_grid_local(); + + // reallocate K-space dependent memory + // check if grid communication is now overlapping if not allowed + // don't invoke allocate peratom() or group(), will be allocated when needed + + allocate(); + + cg->ghost_notify(); + if (overlap_allowed == 0 && cg->ghost_overlap()) + error->all(FLERR,"PPPM grid stencil extends " + "beyond nearest neighbor processor"); + cg->setup(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + compute_gf_denom(); + if (differentiation_flag == 1) compute_sf_precoeff(); + compute_rho_coeff(); + + // pre-compute volume-dependent coeffs + + setup(); +} + +/* ---------------------------------------------------------------------- + compute the PPPM long-range force, energy, virial +------------------------------------------------------------------------- */ + +void PPPM::compute(int eflag, int vflag) +{ + int i,j; + + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + if (evflag_atom && !peratom_allocate_flag) { + allocate_peratom(); + cg_peratom->ghost_notify(); + cg_peratom->setup(); + } + + // convert atoms from box to lamda coords + + if (triclinic == 0) boxlo = domain->boxlo; + else { + boxlo = domain->boxlo_lamda; + domain->x2lamda(atom->nlocal); + } + + // extend size of per-atom arrays if necessary + + if (atom->nlocal > nmax) { + memory->destroy(part2grid); + nmax = atom->nmax; + memory->create(part2grid,nmax,3,"pppm:part2grid"); + } + + // find grid points for all my particles + // map my particle charge onto my local 3d density grid + + particle_map(); + make_rho(); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + cg->reverse_comm(this,REVERSE_RHO); + brick2fft(); + + // compute potential gradient on my FFT grid and + // portion of e_long on this proc's FFT grid + // return gradients (electric fields) in 3d brick decomposition + // also performs per-atom calculations via poisson_peratom() + + poisson(); + + // all procs communicate E-field values + // to fill ghost cells surrounding their 3d bricks + + if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD); + else cg->forward_comm(this,FORWARD_IK); + + // extra per-atom energy/virial communication + + if (evflag_atom) { + if (differentiation_flag == 1 && vflag_atom) + cg_peratom->forward_comm(this,FORWARD_AD_PERATOM); + else if (differentiation_flag == 0) + cg_peratom->forward_comm(this,FORWARD_IK_PERATOM); + } + + // calculate the force on my particles + + fieldforce(); + + // extra per-atom energy/virial communication + + if (evflag_atom) fieldforce_peratom(); + + // sum global energy across procs and add in volume-dependent term + + const double qscale = force->qqrd2e * scale; + + if (eflag_global) { + double energy_all; + MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy = energy_all; + + energy *= 0.5*volume; + energy -= g_ewald*qsqsum/MY_PIS + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy *= qscale; + } + + // sum global virial across procs + + if (vflag_global) { + double virial_all[6]; + MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; + } + + // per-atom energy/virial + // energy includes self-energy correction + // notal accounts for TIP4P tallying eatom/vatom for ghost atoms + + if (evflag_atom) { + double *q = atom->q; + int nlocal = atom->nlocal; + int ntotal = nlocal; + if (tip4pflag) ntotal += atom->nghost; + + if (eflag_atom) { + for (i = 0; i < nlocal; i++) { + eatom[i] *= 0.5; + eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / + (g_ewald*g_ewald*volume); + eatom[i] *= qscale; + } + for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5*qscale; + } + + if (vflag_atom) { + for (i = 0; i < ntotal; i++) + for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale; + } + } + + // 2d slab correction + + if (slabflag == 1) slabcorr(); + + // convert atoms back from lamda to box coords + + if (triclinic) domain->lamda2x(atom->nlocal); +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPM::allocate() +{ + memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_brick"); + + memory->create(density_fft,nfft_both,"pppm:density_fft"); + memory->create(greensfn,nfft_both,"pppm:greensfn"); + memory->create(work1,2*nfft_both,"pppm:work1"); + memory->create(work2,2*nfft_both,"pppm:work2"); + memory->create(vg,nfft_both,6,"pppm:vg"); + + if (triclinic == 0) { + memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx"); + memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky"); + memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz"); + } else { + memory->create(fkx,nfft_both,"pppm:fkx"); + memory->create(fky,nfft_both,"pppm:fky"); + memory->create(fkz,nfft_both,"pppm:fkz"); + } + + if (differentiation_flag == 1) { + memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:u_brick"); + + memory->create(sf_precoeff1,nfft_both,"pppm:sf_precoeff1"); + memory->create(sf_precoeff2,nfft_both,"pppm:sf_precoeff2"); + memory->create(sf_precoeff3,nfft_both,"pppm:sf_precoeff3"); + memory->create(sf_precoeff4,nfft_both,"pppm:sf_precoeff4"); + memory->create(sf_precoeff5,nfft_both,"pppm:sf_precoeff5"); + memory->create(sf_precoeff6,nfft_both,"pppm:sf_precoeff6"); + + } else { + memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdx_brick"); + memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdy_brick"); + memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdz_brick"); + } + + // summation coeffs + + order_allocated = order; + if (!stagger_flag) memory->create(gf_b,order,"pppm:gf_b"); + memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); + memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d"); + memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); + memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2, + "pppm:drho_coeff"); + + // create 2 FFTs and a Remap + // 1st FFT keeps data in FFT decompostion + // 2nd FFT returns data in 3d brick decomposition + // remap takes data from 3d brick to FFT decomposition + + int tmp; + + fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 0,0,&tmp); + + fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + 0,0,&tmp); + + remap = new Remap(lmp,world, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 1,0,0,FFT_PRECISION); + + // create ghost grid object for rho and electric field communication + + int (*procneigh)[2] = comm->procneigh; + + if (differentiation_flag == 1) + cg = new CommGrid(lmp,world,1,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg = new CommGrid(lmp,world,3,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); +} + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPM::deallocate() +{ + memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); + + if (differentiation_flag == 1) { + memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy(sf_precoeff1); + memory->destroy(sf_precoeff2); + memory->destroy(sf_precoeff3); + memory->destroy(sf_precoeff4); + memory->destroy(sf_precoeff5); + memory->destroy(sf_precoeff6); + } else { + memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); + } + + memory->destroy(density_fft); + memory->destroy(greensfn); + memory->destroy(work1); + memory->destroy(work2); + memory->destroy(vg); + + if (triclinic == 0) { + memory->destroy1d_offset(fkx,nxlo_fft); + memory->destroy1d_offset(fky,nylo_fft); + memory->destroy1d_offset(fkz,nzlo_fft); + } else { + memory->destroy(fkx); + memory->destroy(fky); + memory->destroy(fkz); + } + + memory->destroy(gf_b); + if (stagger_flag) gf_b = NULL; + memory->destroy2d_offset(rho1d,-order_allocated/2); + memory->destroy2d_offset(drho1d,-order_allocated/2); + memory->destroy2d_offset(rho_coeff,(1-order_allocated)/2); + memory->destroy2d_offset(drho_coeff,(1-order_allocated)/2); + + delete fft1; + delete fft2; + delete remap; + delete cg; +} + +/* ---------------------------------------------------------------------- + allocate per-atom memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPM::allocate_peratom() +{ + peratom_allocate_flag = 1; + + if (differentiation_flag != 1) + memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:u_brick"); + + memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v0_brick"); + + memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v1_brick"); + memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v2_brick"); + memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v3_brick"); + memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v4_brick"); + memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v5_brick"); + + // create ghost grid object for rho and electric field communication + + int (*procneigh)[2] = comm->procneigh; + + if (differentiation_flag == 1) + cg_peratom = + new CommGrid(lmp,world,6,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_peratom = + new CommGrid(lmp,world,7,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); +} + +/* ---------------------------------------------------------------------- + deallocate per-atom memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPM::deallocate_peratom() +{ + peratom_allocate_flag = 0; + + memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out); + + if (differentiation_flag != 1) + memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); + + delete cg_peratom; +} + +/* ---------------------------------------------------------------------- + set global size of PPPM grid = nx,ny,nz_pppm + used for charge accumulation, FFTs, and electric field interpolation +------------------------------------------------------------------------- */ + +void PPPM::set_grid_global() +{ + // use xprd,yprd,zprd (even if triclinic, and then scale later) + // adjust z dimension for 2d slab PPPM + // 3d PPPM just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired accuracy and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + double h; + bigint natoms = atom->natoms; + + if (!gewaldflag) { + if (accuracy <= 0.0) + error->all(FLERR,"KSpace accuracy must be > 0"); + g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); + if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; + else g_ewald = sqrt(-log(g_ewald)) / cutoff; + } + + // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy + // nz_pppm uses extended zprd_slab instead of zprd + // reduce it until accuracy target is met + + if (!gridflag) { + + if (differentiation_flag == 1 || stagger_flag) { + + h = h_x = h_y = h_z = 4.0/g_ewald; + int count = 0; + while (1) { + + // set grid dimension + nx_pppm = static_cast (xprd/h_x); + ny_pppm = static_cast (yprd/h_y); + nz_pppm = static_cast (zprd_slab/h_z); + + if (nx_pppm <= 1) nx_pppm = 2; + if (ny_pppm <= 1) ny_pppm = 2; + if (nz_pppm <= 1) nz_pppm = 2; + + //set local grid dimension + int npey_fft,npez_fft; + if (nz_pppm >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft = 0; + nxhi_fft = nx_pppm - 1; + nylo_fft = me_y*ny_pppm/npey_fft; + nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; + nzlo_fft = me_z*nz_pppm/npez_fft; + nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; + + double df_kspace = compute_df_kspace(); + + count++; + + // break loop if the accuracy has been reached or + // too many loops have been performed + + if (df_kspace <= accuracy) break; + if (count > 500) error->all(FLERR, "Could not compute grid size"); + h *= 0.95; + h_x = h_y = h_z = h; + } + + } else { + + double err; + h_x = h_y = h_z = 1.0/g_ewald; + + nx_pppm = static_cast (xprd/h_x) + 1; + ny_pppm = static_cast (yprd/h_y) + 1; + nz_pppm = static_cast (zprd_slab/h_z) + 1; + + err = estimate_ik_error(h_x,xprd,natoms); + while (err > accuracy) { + err = estimate_ik_error(h_x,xprd,natoms); + nx_pppm++; + h_x = xprd/nx_pppm; + } + + err = estimate_ik_error(h_y,yprd,natoms); + while (err > accuracy) { + err = estimate_ik_error(h_y,yprd,natoms); + ny_pppm++; + h_y = yprd/ny_pppm; + } + + err = estimate_ik_error(h_z,zprd_slab,natoms); + while (err > accuracy) { + err = estimate_ik_error(h_z,zprd_slab,natoms); + nz_pppm++; + h_z = zprd_slab/nz_pppm; + } + } + + // scale grid for triclinic skew + + if (triclinic) { + double tmp[3]; + tmp[0] = nx_pppm/xprd; + tmp[1] = ny_pppm/yprd; + tmp[2] = nz_pppm/zprd; + lamda2xT(&tmp[0],&tmp[0]); + nx_pppm = static_cast(tmp[0]) + 1; + ny_pppm = static_cast(tmp[1]) + 1; + nz_pppm = static_cast(tmp[2]) + 1; + } + } + + // boost grid size until it is factorable + + while (!factorable(nx_pppm)) nx_pppm++; + while (!factorable(ny_pppm)) ny_pppm++; + while (!factorable(nz_pppm)) nz_pppm++; + + if (triclinic == 0) { + h_x = xprd/nx_pppm; + h_y = yprd/ny_pppm; + h_z = zprd_slab/nz_pppm; + } else { + double tmp[3]; + tmp[0] = nx_pppm; + tmp[1] = ny_pppm; + tmp[2] = nz_pppm; + x2lamdaT(&tmp[0],&tmp[0]); + h_x = 1.0/tmp[0]; + h_y = 1.0/tmp[1]; + h_z = 1.0/tmp[2]; + } + + if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) + error->all(FLERR,"PPPM grid is too large"); +} + +/* ---------------------------------------------------------------------- + check if all factors of n are in list of factors + return 1 if yes, 0 if no +------------------------------------------------------------------------- */ + +int PPPM::factorable(int n) +{ + int i; + + while (n > 1) { + for (i = 0; i < nfactors; i++) { + if (n % factors[i] == 0) { + n /= factors[i]; + break; + } + } + if (i == nfactors) return 0; + } + + return 1; +} + +/* ---------------------------------------------------------------------- + compute estimated kspace force error +------------------------------------------------------------------------- */ + +double PPPM::compute_df_kspace() +{ + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + double df_kspace = 0.0; + if (differentiation_flag == 1 || stagger_flag) { + double qopt = compute_qopt(); + df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); + } else { + double lprx = estimate_ik_error(h_x,xprd,natoms); + double lpry = estimate_ik_error(h_y,yprd,natoms); + double lprz = estimate_ik_error(h_z,zprd_slab,natoms); + df_kspace = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + } + return df_kspace; +} + +/* ---------------------------------------------------------------------- + compute qopt +------------------------------------------------------------------------- */ + +double PPPM::compute_qopt() +{ + double qopt = 0.0; + double *prd = domain->prd; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]; + const double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + const double unitkx = (MY_2PI/xprd); + const double unitky = (MY_2PI/yprd); + const double unitkz = (MY_2PI/zprd_slab); + + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double u1, u2, sqk; + double sum1,sum2,sum3,sum4,dot2; + + int k,l,m,nx,ny,nz; + const int twoorder = 2*order; + + for (m = nzlo_fft; m <= nzhi_fft; m++) { + const int mper = m - nz_pppm*(2*m/nz_pppm); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + const int lper = l - ny_pppm*(2*l/ny_pppm); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + const int kper = k - nx_pppm*(2*k/nx_pppm); + + sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); + + if (sqk != 0.0) { + + sum1 = 0.0; + sum2 = 0.0; + sum3 = 0.0; + sum4 = 0.0; + for (nx = -2; nx <= 2; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*square(qx/g_ewald)); + argx = 0.5*qx*xprd/nx_pppm; + wx = powsinxx(argx,twoorder); + qx *= qx; + + for (ny = -2; ny <= 2; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*square(qy/g_ewald)); + argy = 0.5*qy*yprd/ny_pppm; + wy = powsinxx(argy,twoorder); + qy *= qy; + + for (nz = -2; nz <= 2; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*square(qz/g_ewald)); + argz = 0.5*qz*zprd_slab/nz_pppm; + wz = powsinxx(argz,twoorder); + qz *= qz; + + dot2 = qx+qy+qz; + u1 = sx*sy*sz; + u2 = wx*wy*wz; + sum1 += u1*u1/dot2*MY_4PI*MY_4PI; + sum2 += u1 * u2 * MY_4PI; + sum3 += u2; + sum4 += dot2*u2; + } + } + } + sum2 *= sum2; + qopt += sum1 - sum2/(sum3*sum4); + } + } + } + } + double qopt_all; + MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); + return qopt_all; +} + +/* ---------------------------------------------------------------------- + estimate kspace force error for ik method +------------------------------------------------------------------------- */ + +double PPPM::estimate_ik_error(double h, double prd, bigint natoms) +{ + double sum = 0.0; + for (int m = 0; m < order; m++) + sum += acons[order][m] * pow(h*g_ewald,2.0*m); + double value = q2 * pow(h*g_ewald,(double)order) * + sqrt(g_ewald*prd*sqrt(MY_2PI)*sum/natoms) / (prd*prd); + + return value; +} + +/* ---------------------------------------------------------------------- + adjust the g_ewald parameter to near its optimal value + using a Newton-Raphson solver +------------------------------------------------------------------------- */ + +void PPPM::adjust_gewald() +{ + double dx; + + for (int i = 0; i < LARGE; i++) { + dx = newton_raphson_f() / derivf(); + g_ewald -= dx; + if (fabs(newton_raphson_f()) < SMALL) return; + } + + char str[128]; + sprintf(str, "Could not compute g_ewald"); + error->all(FLERR, str); +} + +/* ---------------------------------------------------------------------- + Calculate f(x) using Newton-Raphson solver + ------------------------------------------------------------------------- */ + +double PPPM::newton_raphson_f() +{ + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + bigint natoms = atom->natoms; + + double df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) / + sqrt(natoms*cutoff*xprd*yprd*zprd); + + double df_kspace = compute_df_kspace(); + + return df_rspace - df_kspace; +} + +/* ---------------------------------------------------------------------- + Calculate numerical derivative f'(x) using forward difference + [f(x + h) - f(x)] / h + ------------------------------------------------------------------------- */ + +double PPPM::derivf() +{ + double h = 0.000001; //Derivative step-size + double df,f1,f2,g_ewald_old; + + f1 = newton_raphson_f(); + g_ewald_old = g_ewald; + g_ewald += h; + f2 = newton_raphson_f(); + g_ewald = g_ewald_old; + df = (f2 - f1)/h; + + return df; +} + +/* ---------------------------------------------------------------------- + Calculate the final estimate of the accuracy +------------------------------------------------------------------------- */ + +double PPPM::final_accuracy() +{ + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + + double df_kspace = compute_df_kspace(); + double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd); + double df_rspace = 2.0 * q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); + double df_table = estimate_table_accuracy(q2_over_sqrt,df_rspace); + double estimated_accuracy = sqrt(df_kspace*df_kspace + df_rspace*df_rspace + + df_table*df_table); + + return estimated_accuracy; +} + +/* ---------------------------------------------------------------------- + set local subset of PPPM/FFT grid that I own + n xyz lo/hi in = 3d brick that I own (inclusive) + n xyz lo/hi out = 3d brick + ghost cells in 6 directions (inclusive) + n xyz lo/hi fft = FFT columns that I own (all of x dim, 2d decomp in yz) +------------------------------------------------------------------------- */ + +void PPPM::set_grid_local() +{ + // global indices of PPPM grid range from 0 to N-1 + // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of + // global PPPM grid that I own without ghost cells + // for slab PPPM, assign z grid as if it were not extended + + nxlo_in = static_cast (comm->xsplit[comm->myloc[0]] * nx_pppm); + nxhi_in = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1; + + nylo_in = static_cast (comm->ysplit[comm->myloc[1]] * ny_pppm); + nyhi_in = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1; + + nzlo_in = static_cast + (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor); + nzhi_in = static_cast + (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1; + + // nlower,nupper = stencil size for mapping particles to PPPM grid + + nlower = -(order-1)/2; + nupper = order/2; + + // shift values for particle <-> grid mapping + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + if (order % 2) shift = OFFSET + 0.5; + else shift = OFFSET; + if (order % 2) shiftone = 0.0; + else shiftone = 0.5; + + // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of + // global PPPM grid that my particles can contribute charge to + // effectively nlo_in,nhi_in + ghost cells + // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest + // position a particle in my box can be at + // dist[3] = particle position bound = subbox + skin/2.0 + qdist + // qdist = offset due to TIP4P fictitious charge + // convert to triclinic if necessary + // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping + // for slab PPPM, assign z grid as if it were not extended + + double *prd,*sublo,*subhi; + + if (triclinic == 0) { + prd = domain->prd; + boxlo = domain->boxlo; + sublo = domain->sublo; + subhi = domain->subhi; + } else { + prd = domain->prd_lamda; + boxlo = domain->boxlo_lamda; + sublo = domain->sublo_lamda; + subhi = domain->subhi_lamda; + } + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double dist[3]; + double cuthalf = 0.5*neighbor->skin + qdist; + if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; + else kspacebbox(cuthalf,&dist[0]); + + int nlo,nhi; + + nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * + nx_pppm/xprd + shift) - OFFSET; + nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * + nx_pppm/xprd + shift) - OFFSET; + nxlo_out = nlo + nlower; + nxhi_out = nhi + nupper; + + nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * + ny_pppm/yprd + shift) - OFFSET; + nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * + ny_pppm/yprd + shift) - OFFSET; + nylo_out = nlo + nlower; + nyhi_out = nhi + nupper; + + nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nzlo_out = nlo + nlower; + nzhi_out = nhi + nupper; + + if (stagger_flag) { + nxhi_out++; + nyhi_out++; + nzhi_out++; + } + + // for slab PPPM, change the grid boundary for processors at +z end + // to include the empty volume between periodically repeating slabs + // for slab PPPM, want charge data communicated from -z proc to +z proc, + // but not vice versa, also want field data communicated from +z proc to + // -z proc, but not vice versa + // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) + // also insure no other procs use ghost cells beyond +z limit + + if (slabflag == 1) { + if (comm->myloc[2] == comm->procgrid[2]-1) + nzhi_in = nzhi_out = nz_pppm - 1; + nzhi_out = MIN(nzhi_out,nz_pppm-1); + } + + // decomposition of FFT mesh + // global indices range from 0 to N-1 + // proc owns entire x-dimension, clumps of columns in y,z dimensions + // npey_fft,npez_fft = # of procs in y,z dims + // if nprocs is small enough, proc can own 1 or more entire xy planes, + // else proc owns 2d sub-blocks of yz plane + // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions + // nlo_fft,nhi_fft = lower/upper limit of the section + // of the global FFT mesh that I own + + int npey_fft,npez_fft; + if (nz_pppm >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft = 0; + nxhi_fft = nx_pppm - 1; + nylo_fft = me_y*ny_pppm/npey_fft; + nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; + nzlo_fft = me_z*nz_pppm/npez_fft; + nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; + + // PPPM grid pts owned by this proc, including ghosts + + ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + + // FFT grids owned by this proc, without ghosts + // nfft = FFT points in FFT decomposition on this proc + // nfft_brick = FFT points in 3d brick-decomposition on this proc + // nfft_both = greater of 2 values + + nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * + (nzhi_fft-nzlo_fft+1); + int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * + (nzhi_in-nzlo_in+1); + nfft_both = MAX(nfft,nfft_brick); +} + +/* ---------------------------------------------------------------------- + pre-compute Green's function denominator expansion coeffs, Gamma(2n) +------------------------------------------------------------------------- */ + +void PPPM::compute_gf_denom() +{ + int k,l,m; + + for (l = 1; l < order; l++) gf_b[l] = 0.0; + gf_b[0] = 1.0; + + for (m = 1; m < order; m++) { + for (l = m; l > 0; l--) + gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1)); + gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5)); + } + + bigint ifact = 1; + for (k = 1; k < 2*order; k++) ifact *= k; + double gaminv = 1.0/ifact; + for (l = 0; l < order; l++) gf_b[l] *= gaminv; +} + +/* ---------------------------------------------------------------------- + pre-compute modified (Hockney-Eastwood) Coulomb Green's function +------------------------------------------------------------------------- */ + +void PPPM::compute_gf_ik() +{ + const double * const prd = domain->prd; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]; + const double zprd_slab = zprd*slab_volfactor; + const double unitkx = (MY_2PI/xprd); + const double unitky = (MY_2PI/yprd); + const double unitkz = (MY_2PI/zprd_slab); + + double snx,sny,snz; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,dot1,dot2; + double numerator,denominator; + double sqk; + + int k,l,m,n,nx,ny,nz,kper,lper,mper; + + const int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * + pow(-log(EPS_HOC),0.25)); + const int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * + pow(-log(EPS_HOC),0.25)); + const int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * + pow(-log(EPS_HOC),0.25)); + const int twoorder = 2*order; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm)); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + sny = square(sin(0.5*unitky*lper*yprd/ny_pppm)); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm)); + + sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); + + if (sqk != 0.0) { + numerator = 12.5663706/sqk; + denominator = gf_denom(snx,sny,snz); + sum1 = 0.0; + + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*square(qx/g_ewald)); + argx = 0.5*qx*xprd/nx_pppm; + wx = powsinxx(argx,twoorder); + + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*square(qy/g_ewald)); + argy = 0.5*qy*yprd/ny_pppm; + wy = powsinxx(argy,twoorder); + + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*square(qz/g_ewald)); + argz = 0.5*qz*zprd_slab/nz_pppm; + wz = powsinxx(argz,twoorder); + + dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; + dot2 = qx*qx+qy*qy+qz*qz; + sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; + } + } + } + greensfn[n++] = numerator*sum1/denominator; + } else greensfn[n++] = 0.0; + } + } + } +} + +/* ---------------------------------------------------------------------- + pre-compute modified (Hockney-Eastwood) Coulomb Green's function + for a triclinic system +------------------------------------------------------------------------- */ + +void PPPM::compute_gf_ik_triclinic() +{ + double snx,sny,snz; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,dot1,dot2; + double numerator,denominator; + double sqk; + + int k,l,m,n,nx,ny,nz,kper,lper,mper; + + double tmp[3]; + tmp[0] = (g_ewald/(MY_PI*nx_pppm)) * pow(-log(EPS_HOC),0.25); + tmp[1] = (g_ewald/(MY_PI*ny_pppm)) * pow(-log(EPS_HOC),0.25); + tmp[2] = (g_ewald/(MY_PI*nz_pppm)) * pow(-log(EPS_HOC),0.25); + lamda2xT(&tmp[0],&tmp[0]); + const int nbx = static_cast (tmp[0]); + const int nby = static_cast (tmp[1]); + const int nbz = static_cast (tmp[2]); + + const int twoorder = 2*order; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + snz = square(sin(MY_PI*mper/nz_pppm)); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + sny = square(sin(MY_PI*lper/ny_pppm)); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + snx = square(sin(MY_PI*kper/nx_pppm)); + + double unitk_lamda[3]; + unitk_lamda[0] = 2.0*MY_PI*kper; + unitk_lamda[1] = 2.0*MY_PI*lper; + unitk_lamda[2] = 2.0*MY_PI*mper; + x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]); + + sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]); + + if (sqk != 0.0) { + numerator = 12.5663706/sqk; + denominator = gf_denom(snx,sny,snz); + sum1 = 0.0; + + for (nx = -nbx; nx <= nbx; nx++) { + argx = MY_PI*kper/nx_pppm + MY_PI*nx; + wx = powsinxx(argx,twoorder); + + for (ny = -nby; ny <= nby; ny++) { + argy = MY_PI*lper/ny_pppm + MY_PI*ny; + wy = powsinxx(argy,twoorder); + + for (nz = -nbz; nz <= nbz; nz++) { + argz = MY_PI*mper/nz_pppm + MY_PI*nz; + wz = powsinxx(argz,twoorder); + + double b[3]; + b[0] = 2.0*MY_PI*nx_pppm*nx; + b[1] = 2.0*MY_PI*ny_pppm*ny; + b[2] = 2.0*MY_PI*nz_pppm*nz; + x2lamdaT(&b[0],&b[0]); + + qx = unitk_lamda[0]+b[0]; + sx = exp(-0.25*square(qx/g_ewald)); + + qy = unitk_lamda[1]+b[1]; + sy = exp(-0.25*square(qy/g_ewald)); + + qz = unitk_lamda[2]+b[2]; + sz = exp(-0.25*square(qz/g_ewald)); + + dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz; + dot2 = qx*qx+qy*qy+qz*qz; + sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; + } + } + } + greensfn[n++] = numerator*sum1/denominator; + } else greensfn[n++] = 0.0; + } + } + } +} + +/* ---------------------------------------------------------------------- + compute optimized Green's function for energy calculation +------------------------------------------------------------------------- */ + +void PPPM::compute_gf_ad() +{ + const double * const prd = domain->prd; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]; + const double zprd_slab = zprd*slab_volfactor; + const double unitkx = (MY_2PI/xprd); + const double unitky = (MY_2PI/yprd); + const double unitkz = (MY_2PI/zprd_slab); + + double snx,sny,snz,sqk; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double numerator,denominator; + int k,l,m,n,kper,lper,mper; + + const int twoorder = 2*order; + + for (int i = 0; i < 6; i++) sf_coeff[i] = 0.0; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + qz = unitkz*mper; + snz = square(sin(0.5*qz*zprd_slab/nz_pppm)); + sz = exp(-0.25*square(qz/g_ewald)); + argz = 0.5*qz*zprd_slab/nz_pppm; + wz = powsinxx(argz,twoorder); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + qy = unitky*lper; + sny = square(sin(0.5*qy*yprd/ny_pppm)); + sy = exp(-0.25*square(qy/g_ewald)); + argy = 0.5*qy*yprd/ny_pppm; + wy = powsinxx(argy,twoorder); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + qx = unitkx*kper; + snx = square(sin(0.5*qx*xprd/nx_pppm)); + sx = exp(-0.25*square(qx/g_ewald)); + argx = 0.5*qx*xprd/nx_pppm; + wx = powsinxx(argx,twoorder); + + sqk = qx*qx + qy*qy + qz*qz; + + if (sqk != 0.0) { + numerator = MY_4PI/sqk; + denominator = gf_denom(snx,sny,snz); + greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator; + sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; + sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; + sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; + sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; + sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; + sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; + n++; + } else { + greensfn[n] = 0.0; + sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; + sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; + sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; + sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; + sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; + sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; + n++; + } + } + } + } + + // compute the coefficients for the self-force correction + + double prex, prey, prez; + prex = prey = prez = MY_PI/volume; + prex *= nx_pppm/xprd; + prey *= ny_pppm/yprd; + prez *= nz_pppm/zprd_slab; + sf_coeff[0] *= prex; + sf_coeff[1] *= prex*2; + sf_coeff[2] *= prey; + sf_coeff[3] *= prey*2; + sf_coeff[4] *= prez; + sf_coeff[5] *= prez*2; + + // communicate values with other procs + + double tmp[6]; + MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world); + for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n]; +} + +/* ---------------------------------------------------------------------- + compute self force coefficients for ad-differentiation scheme +------------------------------------------------------------------------- */ + +void PPPM::compute_sf_precoeff() +{ + int i,k,l,m,n; + int nx,ny,nz,kper,lper,mper; + double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5]; + double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2; + double u0,u1,u2,u3,u4,u5,u6; + double sum1,sum2,sum3,sum4,sum5,sum6; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + + sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0; + for (i = 0; i < 5; i++) { + + qx0 = MY_2PI*(kper+nx_pppm*(i-2)); + qx1 = MY_2PI*(kper+nx_pppm*(i-1)); + qx2 = MY_2PI*(kper+nx_pppm*(i )); + wx0[i] = powsinxx(0.5*qx0/nx_pppm,order); + wx1[i] = powsinxx(0.5*qx1/nx_pppm,order); + wx2[i] = powsinxx(0.5*qx2/nx_pppm,order); + + qy0 = MY_2PI*(lper+ny_pppm*(i-2)); + qy1 = MY_2PI*(lper+ny_pppm*(i-1)); + qy2 = MY_2PI*(lper+ny_pppm*(i )); + wy0[i] = powsinxx(0.5*qy0/ny_pppm,order); + wy1[i] = powsinxx(0.5*qy1/ny_pppm,order); + wy2[i] = powsinxx(0.5*qy2/ny_pppm,order); + + qz0 = MY_2PI*(mper+nz_pppm*(i-2)); + qz1 = MY_2PI*(mper+nz_pppm*(i-1)); + qz2 = MY_2PI*(mper+nz_pppm*(i )); + + wz0[i] = powsinxx(0.5*qz0/nz_pppm,order); + wz1[i] = powsinxx(0.5*qz1/nz_pppm,order); + wz2[i] = powsinxx(0.5*qz2/nz_pppm,order); + } + + for (nx = 0; nx < 5; nx++) { + for (ny = 0; ny < 5; ny++) { + for (nz = 0; nz < 5; nz++) { + u0 = wx0[nx]*wy0[ny]*wz0[nz]; + u1 = wx1[nx]*wy0[ny]*wz0[nz]; + u2 = wx2[nx]*wy0[ny]*wz0[nz]; + u3 = wx0[nx]*wy1[ny]*wz0[nz]; + u4 = wx0[nx]*wy2[ny]*wz0[nz]; + u5 = wx0[nx]*wy0[ny]*wz1[nz]; + u6 = wx0[nx]*wy0[ny]*wz2[nz]; + + sum1 += u0*u1; + sum2 += u0*u2; + sum3 += u0*u3; + sum4 += u0*u4; + sum5 += u0*u5; + sum6 += u0*u6; + } + } + } + + // store values + + sf_precoeff1[n] = sum1; + sf_precoeff2[n] = sum2; + sf_precoeff3[n] = sum3; + sf_precoeff4[n] = sum4; + sf_precoeff5[n] = sum5; + sf_precoeff6[n++] = sum6; + } + } + } +} + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void PPPM::particle_map() +{ + int nx,ny,nz; + + double **x = atom->x; + int nlocal = atom->nlocal; + + int flag = 0; + for (int i = 0; i < nlocal; i++) { + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; + ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; + nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; + + part2grid[i][0] = nx; + part2grid[i][1] = ny; + part2grid[i][2] = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || + ny+nlower < nylo_out || ny+nupper > nyhi_out || + nz+nlower < nzlo_out || nz+nupper > nzhi_out) + flag = 1; + } + + if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPM::make_rho() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density array + + memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + density_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + remap density from 3d brick decomposition to FFT decomposition +------------------------------------------------------------------------- */ + +void PPPM::brick2fft() +{ + int n,ix,iy,iz; + + // copy grabs inner portion of density from 3d brick + // remap could be done as pre-stage of FFT, + // but this works optimally on only double values, not complex values + + n = 0; + for (iz = nzlo_in; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_fft[n++] = density_brick[iz][iy][ix]; + + remap->perform(density_fft,density_fft,work1); +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver +------------------------------------------------------------------------- */ + +void PPPM::poisson() +{ + if (differentiation_flag == 1) poisson_ad(); + else poisson_ik(); +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ik +------------------------------------------------------------------------- */ + +void PPPM::poisson_ik() +{ + int i,j,k,n; + double eng; + + // transform charge density (r -> k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] = density_fft[i]; + work1[n++] = ZEROF; + } + + fft1->compute(work1,work1,1); + + // global energy and virial contribution + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + for (i = 0; i < nfft; i++) { + eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; + if (eflag_global) energy += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft; i++) { + energy += + s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + n += 2; + } + } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] *= scaleinv * greensfn[i]; + work1[n++] *= scaleinv * greensfn[i]; + } + + // extra FFTs for per-atom energy/virial + + if (evflag_atom) poisson_peratom(); + + // triclinic system + + if (triclinic) { + poisson_ik_triclinic(); + return; + } + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fkx[i]*work1[n+1]; + work2[n+1] = -fkx[i]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdx_brick[k][j][i] = work2[n]; + n += 2; + } + + // y direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fky[j]*work1[n+1]; + work2[n+1] = -fky[j]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdy_brick[k][j][i] = work2[n]; + n += 2; + } + + // z direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fkz[k]*work1[n+1]; + work2[n+1] = -fkz[k]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdz_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ik for a triclinic system +------------------------------------------------------------------------- */ + +void PPPM::poisson_ik_triclinic() +{ + int i,j,k,n; + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = fkx[i]*work1[n+1]; + work2[n+1] = -fkx[i]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdx_brick[k][j][i] = work2[n]; + n += 2; + } + + // y direction gradient + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = fky[i]*work1[n+1]; + work2[n+1] = -fky[i]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdy_brick[k][j][i] = work2[n]; + n += 2; + } + + // z direction gradient + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = fkz[i]*work1[n+1]; + work2[n+1] = -fkz[i]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdz_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ad +------------------------------------------------------------------------- */ + +void PPPM::poisson_ad() +{ + int i,j,k,n; + double eng; + + // transform charge density (r -> k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] = density_fft[i]; + work1[n++] = ZEROF; + } + + fft1->compute(work1,work1,1); + + // global energy and virial contribution + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + for (i = 0; i < nfft; i++) { + eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; + if (eflag_global) energy += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft; i++) { + energy += + s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + n += 2; + } + } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] *= scaleinv * greensfn[i]; + work1[n++] *= scaleinv * greensfn[i]; + } + + // extra FFTs for per-atom energy/virial + + if (vflag_atom) poisson_peratom(); + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]; + work2[n+1] = work1[n+1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + u_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPM::poisson_peratom() +{ + int i,j,k,n; + + // energy + + if (eflag_atom && differentiation_flag != 1) { + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]; + work2[n+1] = work1[n+1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + u_brick[k][j][i] = work2[n]; + n += 2; + } + } + + // 6 components of virial in v0 thru v5 + + if (!vflag_atom) return; + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][0]; + work2[n+1] = work1[n+1]*vg[i][0]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v0_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][1]; + work2[n+1] = work1[n+1]*vg[i][1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v1_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][2]; + work2[n+1] = work1[n+1]*vg[i][2]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v2_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][3]; + work2[n+1] = work1[n+1]*vg[i][3]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v3_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][4]; + work2[n+1] = work1[n+1]*vg[i][4]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v4_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][5]; + work2[n+1] = work1[n+1]*vg[i][5]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v5_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPM::fieldforce() +{ + if (differentiation_flag == 1) fieldforce_ad(); + else fieldforce_ik(); +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles for ik +------------------------------------------------------------------------- */ + +void PPPM::fieldforce_ik() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + + const double qfactor = force->qqrd2e * scale * q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + if (slabflag != 2) f[i][2] += qfactor*ekz; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles for ad +------------------------------------------------------------------------- */ + +void PPPM::fieldforce_ad() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz; + FFT_SCALAR ekx,eky,ekz; + double s1,s2,s3; + double sf = 0.0; + double *prd; + + prd = domain->prd; + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + + double hx_inv = nx_pppm/xprd; + double hy_inv = ny_pppm/yprd; + double hz_inv = nz_pppm/zprd; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + compute_drho1d(dx,dy,dz); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; + eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; + ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx]; + } + } + } + ekx *= hx_inv; + eky *= hy_inv; + ekz *= hz_inv; + + // convert E-field to force and substract self forces + + const double qfactor = force->qqrd2e * scale; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + sf = sf_coeff[0]*sin(2*MY_PI*s1); + sf += sf_coeff[1]*sin(4*MY_PI*s1); + sf *= 2*q[i]*q[i]; + f[i][0] += qfactor*(ekx*q[i] - sf); + + sf = sf_coeff[2]*sin(2*MY_PI*s2); + sf += sf_coeff[3]*sin(4*MY_PI*s2); + sf *= 2*q[i]*q[i]; + f[i][1] += qfactor*(eky*q[i] - sf); + + + sf = sf_coeff[4]*sin(2*MY_PI*s3); + sf += sf_coeff[5]*sin(4*MY_PI*s3); + sf *= 2*q[i]*q[i]; + if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPM::fieldforce_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + if (eflag_atom) u += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + if (eflag_atom) eatom[i] += q[i]*u; + if (vflag_atom) { + vatom[i][0] += q[i]*v0; + vatom[i][1] += q[i]*v1; + vatom[i][2] += q[i]*v2; + vatom[i][3] += q[i]*v3; + vatom[i][4] += q[i]*v4; + vatom[i][5] += q[i]*v5; + } + } +} + +/* ---------------------------------------------------------------------- + pack own values to buf to send to another proc +------------------------------------------------------------------------- */ + +void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + if (flag == FORWARD_IK) { + FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + buf[n++] = xsrc[list[i]]; + buf[n++] = ysrc[list[i]]; + buf[n++] = zsrc[list[i]]; + } + } else if (flag == FORWARD_AD) { + FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + } else if (flag == FORWARD_IK_PERATOM) { + FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) buf[n++] = esrc[list[i]]; + if (vflag_atom) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } + } else if (flag == FORWARD_AD_PERATOM) { + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } +} + +/* ---------------------------------------------------------------------- + unpack another proc's own values from buf and set own ghost values +------------------------------------------------------------------------- */ + +void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + if (flag == FORWARD_IK) { + FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + xdest[list[i]] = buf[n++]; + ydest[list[i]] = buf[n++]; + zdest[list[i]] = buf[n++]; + } + } else if (flag == FORWARD_AD) { + FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + dest[list[i]] = buf[i]; + } else if (flag == FORWARD_IK_PERATOM) { + FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) esrc[list[i]] = buf[n++]; + if (vflag_atom) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } + } else if (flag == FORWARD_AD_PERATOM) { + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } +} + +/* ---------------------------------------------------------------------- + pack ghost values into buf to send to another proc +------------------------------------------------------------------------- */ + +void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + if (flag == REVERSE_RHO) { + FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + } +} + +/* ---------------------------------------------------------------------- + unpack another proc's ghost values from buf and add to own values +------------------------------------------------------------------------- */ + +void PPPM::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + if (flag == REVERSE_RHO) { + FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + dest[list[i]] += buf[i]; + } +} + +/* ---------------------------------------------------------------------- + map nprocs to NX by NY grid as PX by PY procs - return optimal px,py +------------------------------------------------------------------------- */ + +void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) +{ + // loop thru all possible factorizations of nprocs + // surf = surface area of largest proc sub-domain + // innermost if test minimizes surface area and surface/volume ratio + + int bestsurf = 2 * (nx + ny); + int bestboxx = 0; + int bestboxy = 0; + + int boxx,boxy,surf,ipx,ipy; + + ipx = 1; + while (ipx <= nprocs) { + if (nprocs % ipx == 0) { + ipy = nprocs/ipx; + boxx = nx/ipx; + if (nx % ipx) boxx++; + boxy = ny/ipy; + if (ny % ipy) boxy++; + surf = boxx + boxy; + if (surf < bestsurf || + (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { + bestsurf = surf; + bestboxx = boxx; + bestboxy = boxy; + *px = ipx; + *py = ipy; + } + } + ipx++; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into rho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-order)/2; k <= order/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = order-1; l >= 0; l--) { + r1 = rho_coeff[l][k] + r1*dx; + r2 = rho_coeff[l][k] + r2*dy; + r3 = rho_coeff[l][k] + r3*dz; + } + rho1d[0][k] = r1; + rho1d[1][k] = r2; + rho1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into drho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPM::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-order)/2; k <= order/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = order-2; l >= 0; l--) { + r1 = drho_coeff[l][k] + r1*dx; + r2 = drho_coeff[l][k] + r2*dy; + r3 = drho_coeff[l][k] + r3*dz; + } + drho1d[0][k] = r1; + drho1d[1][k] = r2; + drho1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + generate coeffients for the weight function of order n + + (n-1) + Wn(x) = Sum wn(k,x) , Sum is over every other integer + k=-(n-1) + For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 + k is odd integers if n is even and even integers if n is odd + --- + | n-1 + | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 + wn(k,x) = < l=0 + | + | 0 otherwise + --- + a coeffients are packed into the array rho_coeff to eliminate zeros + rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) +------------------------------------------------------------------------- */ + +void PPPM::compute_rho_coeff() +{ + int j,k,l,m; + FFT_SCALAR s; + + FFT_SCALAR **a; + memory->create2d_offset(a,order,-order,order,"pppm:a"); + + for (k = -order; k <= order; k++) + for (l = 0; l < order; l++) + a[l][k] = 0.0; + + a[0][0] = 1.0; + for (j = 1; j < order; j++) { + for (k = -j; k <= j; k += 2) { + s = 0.0; + for (l = 0; l < j; l++) { + a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); +#ifdef FFT_SINGLE + s += powf(0.5,(float) l+1) * + (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); +#else + s += pow(0.5,(double) l+1) * + (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); +#endif + } + a[0][k] = s; + } + } + + m = (1-order)/2; + for (k = -(order-1); k < order; k += 2) { + for (l = 0; l < order; l++) + rho_coeff[l][m] = a[l][k]; + for (l = 1; l < order; l++) + drho_coeff[l-1][m] = l*a[l][k]; + m++; + } + + memory->destroy2d_offset(a,-order); +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void PPPM::slabcorr() +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); +} + +/* ---------------------------------------------------------------------- + perform and time the 1d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPM::timing_1d(int n, double &time1d) +{ + double time1,time2; + + for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + for (int i = 0; i < n; i++) { + fft1->timing1d(work1,nfft_both,1); + fft2->timing1d(work1,nfft_both,-1); + if (differentiation_flag != 1) { + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time1d = time2 - time1; + + if (differentiation_flag) return 2; + return 4; +} + +/* ---------------------------------------------------------------------- + perform and time the 3d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPM::timing_3d(int n, double &time3d) +{ + double time1,time2; + + for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + for (int i = 0; i < n; i++) { + fft1->compute(work1,work1,1); + fft2->compute(work1,work1,-1); + if (differentiation_flag != 1) { + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time3d = time2 - time1; + + if (differentiation_flag) return 2; + return 4; +} + +/* ---------------------------------------------------------------------- + memory usage of local arrays +------------------------------------------------------------------------- */ + +double PPPM::memory_usage() +{ + double bytes = nmax*3 * sizeof(double); + int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + if (differentiation_flag == 1) { + bytes += 2 * nbrick * sizeof(FFT_SCALAR); + } else { + bytes += 4 * nbrick * sizeof(FFT_SCALAR); + } + if (triclinic) bytes += 3 * nfft_both * sizeof(double); + bytes += 6 * nfft_both * sizeof(double); + bytes += nfft_both * sizeof(double); + bytes += nfft_both*5 * sizeof(FFT_SCALAR); + + if (peratom_allocate_flag) + bytes += 6 * nbrick * sizeof(FFT_SCALAR); + + if (group_allocate_flag) { + bytes += 2 * nbrick * sizeof(FFT_SCALAR); + bytes += 2 * nfft_both * sizeof(FFT_SCALAR);; + } + + bytes += cg->memory_usage(); + + return bytes; +} + +/* ---------------------------------------------------------------------- + group-group interactions + ------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + compute the PPPM total long-range force and energy for groups A and B + ------------------------------------------------------------------------- */ + +void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag) +{ + if (slabflag && triclinic) + error->all(FLERR,"Cannot (yet) use K-space slab " + "correction with compute group/group for triclinic systems"); + + if (differentiation_flag) + error->all(FLERR,"Cannot (yet) use kspace_modify " + "diff ad with compute group/group"); + + if (!group_allocate_flag) allocate_groups(); + + // convert atoms from box to lamda coords + + if (triclinic == 0) boxlo = domain->boxlo; + else { + boxlo = domain->boxlo_lamda; + domain->x2lamda(atom->nlocal); + } + + e2group = 0.0; //energy + f2group[0] = 0.0; //force in x-direction + f2group[1] = 0.0; //force in y-direction + f2group[2] = 0.0; //force in z-direction + + // map my particle charge onto my local 3d density grid + + make_rho_groups(groupbit_A,groupbit_B,AA_flag); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + // temporarily store and switch pointers so we can + // use brick2fft() for groups A and B (without + // writing an additional function) + + FFT_SCALAR ***density_brick_real = density_brick; + FFT_SCALAR *density_fft_real = density_fft; + + // group A + + density_brick = density_A_brick; + density_fft = density_A_fft; + + cg->reverse_comm(this,REVERSE_RHO); + brick2fft(); + + // group B + + density_brick = density_B_brick; + density_fft = density_B_fft; + + cg->reverse_comm(this,REVERSE_RHO); + brick2fft(); + + // switch back pointers + + density_brick = density_brick_real; + density_fft = density_fft_real; + + // compute potential gradient on my FFT grid and + // portion of group-group energy/force on this proc's FFT grid + + poisson_groups(AA_flag); + + const double qscale = force->qqrd2e * scale; + + // total group A <--> group B energy + // self and boundary correction terms are in compute_group_group.cpp + + double e2group_all; + MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world); + e2group = e2group_all; + + e2group *= qscale*0.5*volume; + + // total group A <--> group B force + + double f2group_all[3]; + MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world); + + f2group[0] = qscale*volume*f2group_all[0]; + f2group[1] = qscale*volume*f2group_all[1]; + if (slabflag != 2) f2group[2] = qscale*volume*f2group_all[2]; + + // convert atoms back from lamda to box coords + + if (triclinic) domain->lamda2x(atom->nlocal); + + if (slabflag == 1) + slabcorr_groups(groupbit_A, groupbit_B, AA_flag); +} + +/* ---------------------------------------------------------------------- + allocate group-group memory that depends on # of K-vectors and order + ------------------------------------------------------------------------- */ + +void PPPM::allocate_groups() +{ + group_allocate_flag = 1; + + memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_A_brick"); + memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_B_brick"); + memory->create(density_A_fft,nfft_both,"pppm:density_A_fft"); + memory->create(density_B_fft,nfft_both,"pppm:density_B_fft"); +} + +/* ---------------------------------------------------------------------- + deallocate group-group memory that depends on # of K-vectors and order + ------------------------------------------------------------------------- */ + +void PPPM::deallocate_groups() +{ + group_allocate_flag = 0; + + memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy(density_A_fft); + memory->destroy(density_B_fft); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid for group-group interactions + ------------------------------------------------------------------------- */ + +void PPPM::make_rho_groups(int groupbit_A, int groupbit_B, int AA_flag) +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density arrays + + memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + int nlocal = atom->nlocal; + int *mask = atom->mask; + + for (int i = 0; i < nlocal; i++) { + + if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) + if (AA_flag) continue; + + if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + + // group A + + if (mask[i] & groupbit_A) + density_A_brick[mz][my][mx] += x0*rho1d[0][l]; + + // group B + + if (mask[i] & groupbit_B) + density_B_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for group-group interactions + ------------------------------------------------------------------------- */ + +void PPPM::poisson_groups(int AA_flag) +{ + int i,j,k,n; + + // reuse memory (already declared) + + FFT_SCALAR *work_A = work1; + FFT_SCALAR *work_B = work2; + + // transform charge density (r -> k) + + // group A + + n = 0; + for (i = 0; i < nfft; i++) { + work_A[n++] = density_A_fft[i]; + work_A[n++] = ZEROF; + } + + fft1->compute(work_A,work_A,1); + + // group B + + n = 0; + for (i = 0; i < nfft; i++) { + work_B[n++] = density_B_fft[i]; + work_B[n++] = ZEROF; + } + + fft1->compute(work_B,work_B,1); + + // group-group energy and force contribution, + // keep everything in reciprocal space so + // no inverse FFTs needed + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + // energy + + n = 0; + for (i = 0; i < nfft; i++) { + e2group += s2 * greensfn[i] * + (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]); + n += 2; + } + + if (AA_flag) return; + + + // multiply by Green's function and s2 + // (only for work_A so it is not squared below) + + n = 0; + for (i = 0; i < nfft; i++) { + work_A[n++] *= s2 * greensfn[i]; + work_A[n++] *= s2 * greensfn[i]; + } + + // triclinic system + + if (triclinic) { + poisson_groups_triclinic(); + return; + } + + double partial_group; + + // force, x direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[0] += fkx[i] * partial_group; + n += 2; + } + + // force, y direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[1] += fky[j] * partial_group; + n += 2; + } + + // force, z direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[2] += fkz[k] * partial_group; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for group-group interactions + for a triclinic system + ------------------------------------------------------------------------- */ + +void PPPM::poisson_groups_triclinic() +{ + int i,j,k,n; + + // reuse memory (already declared) + + FFT_SCALAR *work_A = work1; + FFT_SCALAR *work_B = work2; + + double partial_group; + + // force, x direction + + n = 0; + for (i = 0; i < nfft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[0] += fkx[i] * partial_group; + n += 2; + } + + // force, y direction + + n = 0; + for (i = 0; i < nfft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[1] += fky[i] * partial_group; + n += 2; + } + + // force, z direction + + n = 0; + for (i = 0; i < nfft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[2] += fkz[i] * partial_group; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void PPPM::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag) +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + double qsum_A = 0.0; + double qsum_B = 0.0; + double dipole_A = 0.0; + double dipole_B = 0.0; + double dipole_r2_A = 0.0; + double dipole_r2_B = 0.0; + + for (int i = 0; i < nlocal; i++) { + if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B))) + if (AA_flag) continue; + + if (mask[i] & groupbit_A) { + qsum_A += q[i]; + dipole_A += q[i]*x[i][2]; + dipole_r2_A += q[i]*x[i][2]*x[i][2]; + } + + if (mask[i] & groupbit_B) { + qsum_B += q[i]; + dipole_B += q[i]*x[i][2]; + dipole_r2_B += q[i]*x[i][2]*x[i][2]; + } + } + + // sum local contributions to get total charge and global dipole moment + // for each group + + double tmp; + MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum_A = tmp; + + MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum_B = tmp; + + MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_A = tmp; + + MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_B = tmp; + + MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2_A = tmp; + + MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2_B = tmp; + + // compute corrections + + const double qscale = force->qqrd2e * scale; + const double efact = qscale * MY_2PI/volume; + + e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B + + qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0); + + // add on force corrections + + const double ffact = qscale * (-4.0*MY_PI/volume); + f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A); +} diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp index 426dbf0e7b..a15cf38515 100755 --- a/src/KSPACE/pppm_disp.cpp +++ b/src/KSPACE/pppm_disp.cpp @@ -1,8209 +1,8209 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Rolf Isele-Holder (Aachen University) - Paul Crozier (SNL) -------------------------------------------------------------------------- */ - -#include "lmptype.h" -#include "mpi.h" -#include "string.h" -#include "stdio.h" -#include "stdlib.h" -#include "math.h" -#include "pppm_disp.h" -#include "math_const.h" -#include "atom.h" -#include "comm.h" -#include "commgrid.h" -#include "neighbor.h" -#include "force.h" -#include "pair.h" -#include "bond.h" -#include "angle.h" -#include "domain.h" -#include "fft3d_wrap.h" -#include "remap_wrap.h" -#include "memory.h" -#include "error.h" - -using namespace LAMMPS_NS; -using namespace MathConst; - -#define MAXORDER 7 -#define OFFSET 16384 -#define SMALL 0.00001 -#define LARGE 10000.0 -#define EPS_HOC 1.0e-7 - -enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER}; -enum{REVERSE_RHO, REVERSE_RHO_G, REVERSE_RHO_A, REVERSE_RHO_NONE}; -enum{FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM, - FORWARD_IK_G, FORWARD_AD_G, FORWARD_IK_PERATOM_G, FORWARD_AD_PERATOM_G, - FORWARD_IK_A, FORWARD_AD_A, FORWARD_IK_PERATOM_A, FORWARD_AD_PERATOM_A, - FORWARD_IK_NONE, FORWARD_AD_NONE, FORWARD_IK_PERATOM_NONE, FORWARD_AD_PERATOM_NONE}; - - -#ifdef FFT_SINGLE -#define ZEROF 0.0f -#define ONEF 1.0f -#else -#define ZEROF 0.0 -#define ONEF 1.0 -#endif - -/* ---------------------------------------------------------------------- */ - -PPPMDisp::PPPMDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) -{ - if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm/disp command"); - - triclinic_support = 0; - pppmflag = dispersionflag = 1; - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - nfactors = 3; - factors = new int[nfactors]; - factors[0] = 2; - factors[1] = 3; - factors[2] = 5; - - MPI_Comm_rank(world,&me); - MPI_Comm_size(world,&nprocs); - - csumflag = 0; - B = NULL; - cii = NULL; - csumi = NULL; - peratom_allocate_flag = 0; - - density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; - density_fft = NULL; - u_brick = v0_brick = v1_brick = v2_brick = v3_brick = - v4_brick = v5_brick = NULL; - - density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL; - density_fft_g = NULL; - u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = - v4_brick_g = v5_brick_g = NULL; - - density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL; - density_fft_a0 = NULL; - u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = - v4_brick_a0 = v5_brick_a0 = NULL; - - density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL; - density_fft_a1 = NULL; - u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = - v4_brick_a1 = v5_brick_a1 = NULL; - - density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL; - density_fft_a2 = NULL; - u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = - v4_brick_a2 = v5_brick_a2 = NULL; - - density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL; - density_fft_a3 = NULL; - u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = - v4_brick_a3 = v5_brick_a3 = NULL; - - density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL; - density_fft_a4 = NULL; - u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = - v4_brick_a4 = v5_brick_a4 = NULL; - - density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL; - density_fft_a5 = NULL; - u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = - v4_brick_a5 = v5_brick_a5 = NULL; - - density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL; - density_fft_a6 = NULL; - u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = - v4_brick_a6 = v5_brick_a6 = NULL; - - density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL; - density_fft_none = NULL; - u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = - v4_brick_none = v5_brick_none = NULL; - - greensfn = NULL; - greensfn_6 = NULL; - work1 = work2 = NULL; - work1_6 = work2_6 = NULL; - vg = NULL; - vg2 = NULL; - vg_6 = NULL; - vg2_6 = NULL; - fkx = fky = fkz = NULL; - fkx2 = fky2 = fkz2 = NULL; - fkx_6 = fky_6 = fkz_6 = NULL; - fkx2_6 = fky2_6 = fkz2_6 = NULL; - - sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = - sf_precoeff5 = sf_precoeff6 = NULL; - sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = - sf_precoeff5_6 = sf_precoeff6_6 = NULL; - - gf_b = NULL; - gf_b_6 = NULL; - rho1d = rho_coeff = NULL; - drho1d = drho_coeff = NULL; - rho1d_6 = rho_coeff_6 = NULL; - drho1d_6 = drho_coeff_6 = NULL; - fft1 = fft2 = NULL; - fft1_6 = fft2_6 = NULL; - remap = NULL; - remap_6 = NULL; - - nmax = 0; - part2grid = NULL; - part2grid_6 = NULL; - - cg = NULL; - cg_peratom = NULL; - cg_6 = NULL; - cg_peratom_6 = NULL; - - memset(function, 0, EWALD_FUNCS*sizeof(int)); -} - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -PPPMDisp::~PPPMDisp() -{ - delete [] factors; - delete [] B; - B = NULL; - delete [] cii; - cii = NULL; - delete [] csumi; - csumi = NULL; - deallocate(); - deallocate_peratom(); - memory->destroy(part2grid); - memory->destroy(part2grid_6); - part2grid = part2grid_6 = NULL; -} - -/* ---------------------------------------------------------------------- - called once before run -------------------------------------------------------------------------- */ - -void PPPMDisp::init() -{ - if (me == 0) { - if (screen) fprintf(screen,"PPPMDisp initialization ...\n"); - if (logfile) fprintf(logfile,"PPPMDisp initialization ...\n"); - } - - triclinic_check(); - if (domain->dimension == 2) - error->all(FLERR,"Cannot use PPPMDisp with 2d simulation"); - - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp"); - if (slabflag == 1) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab PPPMDisp"); - } - - if (order > MAXORDER || order_6 > MAXORDER) { - char str[128]; - sprintf(str,"PPPMDisp coulomb order cannot be greater than %d",MAXORDER); - error->all(FLERR,str); - } - - // free all arrays previously allocated - - deallocate(); - deallocate_peratom(); - - // set scale - - scale = 1.0; - - triclinic = domain->triclinic; - - // check whether cutoff and pair style are set - - pair_check(); - - int tmp; - Pair *pair = force->pair; - int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL; - double *p_cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL; - double *p_cutoff_lj = pair ? (double *) pair->extract("cut_LJ",tmp) : NULL; - if (!(ptr||*p_cutoff||*p_cutoff_lj)) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - cutoff = *p_cutoff; - cutoff_lj = *p_cutoff_lj; - - double tmp2; - MPI_Allreduce(&cutoff, &tmp2,1,MPI_DOUBLE,MPI_SUM,world); - - // check out which types of potentials will have to be calculated - - int ewald_order = ptr ? *((int *) ptr) : 1<<1; - int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC; - memset(function, 0, EWALD_FUNCS*sizeof(int)); - for (int i=0; i<=EWALD_MAXORDER; ++i) // transcribe order - if (ewald_order&(1<pair_style); - error->all(FLERR,str); - } - function[k] = 1; - } - - - // warn, if function[0] is not set but charge attribute is set! - if (!function[0] && atom->q_flag && me == 0) { - char str[128]; - sprintf(str, "Charges are set, but coulombic solver is not used"); - error->warning(FLERR, str); - } - - // compute qsum & qsqsum, if function[0] is set, print error if no charges are set or warn if not charge-neutral - - if (function[0]) { - if (!atom->q_flag) - error->all(FLERR,"Kspace style with selected options " - "requires atom attribute q"); - - qsum = qsqsum = 0.0; - for (int i = 0; i < atom->nlocal; i++) { - qsum += atom->q[i]; - qsqsum += atom->q[i]*atom->q[i]; - - } - - double tmp; - MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum = tmp; - MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsqsum = tmp; - - if (qsqsum == 0.0) - error->all(FLERR,"Cannot use kspace solver with selected options " - "on system with no charge"); - if (fabs(qsum) > SMALL && me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - } - - // if kspace is TIP4P, extract TIP4P params from pair style - // bond/angle are not yet init(), so insure equilibrium request is valid - - qdist = 0.0; - - if (tip4pflag) { - int itmp; - double *p_qdist = (double *) force->pair->extract("qdist",itmp); - int *p_typeO = (int *) force->pair->extract("typeO",itmp); - int *p_typeH = (int *) force->pair->extract("typeH",itmp); - int *p_typeA = (int *) force->pair->extract("typeA",itmp); - int *p_typeB = (int *) force->pair->extract("typeB",itmp); - if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - qdist = *p_qdist; - typeO = *p_typeO; - typeH = *p_typeH; - int typeA = *p_typeA; - int typeB = *p_typeB; - - if (force->angle == NULL || force->bond == NULL) - error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); - if (typeA < 1 || typeA > atom->nangletypes || - force->angle->setflag[typeA] == 0) - error->all(FLERR,"Bad TIP4P angle type for PPPMDisp/TIP4P"); - if (typeB < 1 || typeB > atom->nbondtypes || - force->bond->setflag[typeB] == 0) - error->all(FLERR,"Bad TIP4P bond type for PPPMDisp/TIP4P"); - double theta = force->angle->equilibrium_angle(typeA); - double blen = force->bond->equilibrium_distance(typeB); - alpha = qdist / (cos(0.5*theta) * blen); - } - - - // initialize the pair style to get the coefficients - neighrequest_flag = 0; - pair->init(); - neighrequest_flag = 1; - init_coeffs(); - - //if g_ewald and g_ewald_6 have not been specified, set some initial value - // to avoid problems when calculating the energies! - - if (!gewaldflag) g_ewald = 1; - if (!gewaldflag_6) g_ewald_6 = 1; - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - int (*procneigh)[2] = comm->procneigh; - - int iteration = 0; - if (function[0]) { - CommGrid *cgtmp = NULL; - while (order >= minorder) { - - if (iteration && me == 0) - error->warning(FLERR,"Reducing PPPMDisp Coulomb order " - "b/c stencil extends beyond neighbor processor"); - iteration++; - - // set grid for dispersion interaction and coulomb interactions - - set_grid(); - - if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) - error->all(FLERR,"PPPMDisp Coulomb grid is too large"); - - set_fft_parameters(nx_pppm, ny_pppm, nz_pppm, - nxlo_fft, nylo_fft, nzlo_fft, - nxhi_fft, nyhi_fft, nzhi_fft, - nxlo_in, nylo_in, nzlo_in, - nxhi_in, nyhi_in, nzhi_in, - nxlo_out, nylo_out, nzlo_out, - nxhi_out, nyhi_out, nzhi_out, - nlower, nupper, - ngrid, nfft, nfft_both, - shift, shiftone, order); - - if (overlap_allowed) break; - - cgtmp = new CommGrid(lmp, world,1,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out, - nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - cgtmp->ghost_notify(); - if (!cgtmp->ghost_overlap()) break; - delete cgtmp; - - order--; - } - - if (order < minorder) - error->all(FLERR, - "Coulomb PPPMDisp order has been reduced below minorder"); - if (cgtmp) delete cgtmp; - - // adjust g_ewald - - if (!gewaldflag) adjust_gewald(); - - // calculate the final accuracy - - double acc = final_accuracy(); - - // print stats - - int ngrid_max,nfft_both_max,nbuf_max; - MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); - - if (me == 0) { - #ifdef FFT_SINGLE - const char fft_prec[] = "single"; - #else - const char fft_prec[] = "double"; - #endif - - if (screen) { - fprintf(screen," Coulomb G vector (1/distance)= %g\n",g_ewald); - fprintf(screen," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(screen," Coulomb stencil order = %d\n",order); - fprintf(screen," Coulomb estimated absolute RMS force accuracy = %g\n", - acc); - fprintf(screen," Coulomb estimated relative force accuracy = %g\n", - acc/two_charge_force); - fprintf(screen," using %s precision FFTs\n",fft_prec); - fprintf(screen," 3d grid and FFT values/proc = %d %d\n", - ngrid_max, nfft_both_max); - } - if (logfile) { - fprintf(logfile," Coulomb G vector (1/distance) = %g\n",g_ewald); - fprintf(logfile," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(logfile," Coulomb stencil order = %d\n",order); - fprintf(logfile, - " Coulomb estimated absolute RMS force accuracy = %g\n", - acc); - fprintf(logfile," Coulomb estimated relative force accuracy = %g\n", - acc/two_charge_force); - fprintf(logfile," using %s precision FFTs\n",fft_prec); - fprintf(logfile," 3d grid and FFT values/proc = %d %d\n", - ngrid_max, nfft_both_max); - } - } - } - - iteration = 0; - if (function[1] + function[2] + function[3]) { - CommGrid *cgtmp = NULL; - while (order_6 >= minorder) { - - if (iteration && me == 0) - error->warning(FLERR,"Reducing PPPMDisp dispersion order " - "b/c stencil extends beyond neighbor processor"); - iteration++; - - set_grid_6(); - - if (nx_pppm_6 >= OFFSET || ny_pppm_6 >= OFFSET || nz_pppm_6 >= OFFSET) - error->all(FLERR,"PPPMDisp Dispersion grid is too large"); - - set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, - nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, - nxhi_in_6, nyhi_in_6, nzhi_in_6, - nxlo_out_6, nylo_out_6, nzlo_out_6, - nxhi_out_6, nyhi_out_6, nzhi_out_6, - nlower_6, nupper_6, - ngrid_6, nfft_6, nfft_both_6, - shift_6, shiftone_6, order_6); - - if (overlap_allowed) break; - - cgtmp = new CommGrid(lmp,world,1,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6, - nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6, - nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - cgtmp->ghost_notify(); - if (!cgtmp->ghost_overlap()) break; - delete cgtmp; - order_6--; - } - - if (order_6 < minorder) - error->all(FLERR,"Dispersion PPPMDisp order has been " - "reduced below minorder"); - if (cgtmp) delete cgtmp; - - // adjust g_ewald_6 - - if (!gewaldflag_6 && accuracy_kspace_6 == accuracy_real_6) - adjust_gewald_6(); - - // calculate the final accuracy - - double acc, acc_real, acc_kspace; - final_accuracy_6(acc, acc_real, acc_kspace); - - - // print stats - - int ngrid_max,nfft_both_max,nbuf_max; - MPI_Allreduce(&ngrid_6,&ngrid_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nfft_both_6,&nfft_both_max,1,MPI_INT,MPI_MAX,world); - - if (me == 0) { - #ifdef FFT_SINGLE - const char fft_prec[] = "single"; - #else - const char fft_prec[] = "double"; - #endif - - if (screen) { - fprintf(screen," Dispersion G vector (1/distance)= %g\n",g_ewald_6); - fprintf(screen," Dispersion grid = %d %d %d\n", - nx_pppm_6,ny_pppm_6,nz_pppm_6); - fprintf(screen," Dispersion stencil order = %d\n",order_6); - fprintf(screen," Dispersion estimated absolute " - "RMS force accuracy = %g\n",acc); - fprintf(screen," Dispersion estimated absolute " - "real space RMS force accuracy = %g\n",acc_real); - fprintf(screen," Dispersion estimated absolute " - "kspace RMS force accuracy = %g\n",acc_kspace); - fprintf(screen," Dispersion estimated relative force accuracy = %g\n", - acc/two_charge_force); - fprintf(screen," using %s precision FFTs\n",fft_prec); - fprintf(screen," 3d grid and FFT values/proc dispersion = %d %d\n", - ngrid_max,nfft_both_max); - } - if (logfile) { - fprintf(logfile," Dispersion G vector (1/distance) = %g\n",g_ewald_6); - fprintf(logfile," Dispersion grid = %d %d %d\n", - nx_pppm_6,ny_pppm_6,nz_pppm_6); - fprintf(logfile," Dispersion stencil order = %d\n",order_6); - fprintf(logfile," Dispersion estimated absolute " - "RMS force accuracy = %g\n",acc); - fprintf(logfile," Dispersion estimated absolute " - "real space RMS force accuracy = %g\n",acc_real); - fprintf(logfile," Dispersion estimated absolute " - "kspace RMS force accuracy = %g\n",acc_kspace); - fprintf(logfile," Disperion estimated relative force accuracy = %g\n", - acc/two_charge_force); - fprintf(logfile," using %s precision FFTs\n",fft_prec); - fprintf(logfile," 3d grid and FFT values/proc dispersion = %d %d\n", - ngrid_max,nfft_both_max); - } - } - } - - // allocate K-space dependent memory - - allocate(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - if (function[0]) { - compute_gf_denom(gf_b, order); - compute_rho_coeff(rho_coeff, drho_coeff, order); - cg->ghost_notify(); - cg->setup(); - if (differentiation_flag == 1) - compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order, - nxlo_fft, nylo_fft, nzlo_fft, - nxhi_fft, nyhi_fft, nzhi_fft, - sf_precoeff1, sf_precoeff2, sf_precoeff3, - sf_precoeff4, sf_precoeff5, sf_precoeff6); - } - if (function[1] + function[2] + function[3]) { - compute_gf_denom(gf_b_6, order_6); - compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6); - cg_6->ghost_notify(); - cg_6->setup(); - if (differentiation_flag == 1) - compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, - nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6, - sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6); - } - -} - -/* ---------------------------------------------------------------------- - adjust PPPM coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void PPPMDisp::setup() -{ - double *prd; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - // compute fkx,fky,fkz for my FFT grid pts - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - //compute the virial coefficients and green functions - if (function[0]){ - - delxinv = nx_pppm/xprd; - delyinv = ny_pppm/yprd; - delzinv = nz_pppm/zprd_slab; - - delvolinv = delxinv*delyinv*delzinv; - - double per; - int i, j, k, n; - - for (i = nxlo_fft; i <= nxhi_fft; i++) { - per = i - nx_pppm*(2*i/nx_pppm); - fkx[i] = unitkx*per; - j = (nx_pppm - i) % nx_pppm; - per = j - nx_pppm*(2*j/nx_pppm); - fkx2[i] = unitkx*per; - } - - for (i = nylo_fft; i <= nyhi_fft; i++) { - per = i - ny_pppm*(2*i/ny_pppm); - fky[i] = unitky*per; - j = (ny_pppm - i) % ny_pppm; - per = j - ny_pppm*(2*j/ny_pppm); - fky2[i] = unitky*per; - } - - for (i = nzlo_fft; i <= nzhi_fft; i++) { - per = i - nz_pppm*(2*i/nz_pppm); - fkz[i] = unitkz*per; - j = (nz_pppm - i) % nz_pppm; - per = j - nz_pppm*(2*j/nz_pppm); - fkz2[i] = unitkz*per; - } - - double sqk,vterm; - double gew2inv = 1/(g_ewald*g_ewald); - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) { - for (j = nylo_fft; j <= nyhi_fft; j++) { - for (i = nxlo_fft; i <= nxhi_fft; i++) { - sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; - if (sqk == 0.0) { - vg[n][0] = 0.0; - vg[n][1] = 0.0; - vg[n][2] = 0.0; - vg[n][3] = 0.0; - vg[n][4] = 0.0; - vg[n][5] = 0.0; - } else { - vterm = -2.0 * (1.0/sqk + 0.25*gew2inv); - vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; - vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; - vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; - vg[n][3] = vterm*fkx[i]*fky[j]; - vg[n][4] = vterm*fkx[i]*fkz[k]; - vg[n][5] = vterm*fky[j]*fkz[k]; - vg2[n][0] = vterm*0.5*(fkx[i]*fky[j] + fkx2[i]*fky2[j]); - vg2[n][1] = vterm*0.5*(fkx[i]*fkz[k] + fkx2[i]*fkz2[k]); - vg2[n][2] = vterm*0.5*(fky[j]*fkz[k] + fky2[j]*fkz2[k]); - } - n++; - } - } - } - compute_gf(); - if (differentiation_flag == 1) compute_sf_coeff(); - } - - if (function[1] + function[2] + function[3]) { - delxinv_6 = nx_pppm_6/xprd; - delyinv_6 = ny_pppm_6/yprd; - delzinv_6 = nz_pppm_6/zprd_slab; - delvolinv_6 = delxinv_6*delyinv_6*delzinv_6; - - double per; - int i, j, k, n; - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - per = i - nx_pppm_6*(2*i/nx_pppm_6); - fkx_6[i] = unitkx*per; - j = (nx_pppm_6 - i) % nx_pppm_6; - per = j - nx_pppm_6*(2*j/nx_pppm_6); - fkx2_6[i] = unitkx*per; - } - for (i = nylo_fft_6; i <= nyhi_fft_6; i++) { - per = i - ny_pppm_6*(2*i/ny_pppm_6); - fky_6[i] = unitky*per; - j = (ny_pppm_6 - i) % ny_pppm_6; - per = j - ny_pppm_6*(2*j/ny_pppm_6); - fky2_6[i] = unitky*per; - } - for (i = nzlo_fft_6; i <= nzhi_fft_6; i++) { - per = i - nz_pppm_6*(2*i/nz_pppm_6); - fkz_6[i] = unitkz*per; - j = (nz_pppm_6 - i) % nz_pppm_6; - per = j - nz_pppm_6*(2*j/nz_pppm_6); - fkz2_6[i] = unitkz*per; - } - double sqk,vterm; - long double erft, expt,nom, denom; - long double b, bs, bt; - double rtpi = sqrt(MY_PI); - double gewinv = 1/g_ewald_6; - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) { - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) { - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - sqk = fkx_6[i]*fkx_6[i] + fky_6[j]*fky_6[j] + fkz_6[k]*fkz_6[k]; - if (sqk == 0.0) { - vg_6[n][0] = 0.0; - vg_6[n][1] = 0.0; - vg_6[n][2] = 0.0; - vg_6[n][3] = 0.0; - vg_6[n][4] = 0.0; - vg_6[n][5] = 0.0; - } else { - b = 0.5*sqrt(sqk)*gewinv; - bs = b*b; - bt = bs*b; - erft = 2*bt*rtpi*erfc(b); - expt = exp(-bs); - nom = erft - 2*bs*expt; - denom = nom + expt; - if (denom == 0) vterm = 3.0/sqk; - else vterm = 3.0*nom/(sqk*denom); - vg_6[n][0] = 1.0 + vterm*fkx_6[i]*fkx_6[i]; - vg_6[n][1] = 1.0 + vterm*fky_6[j]*fky_6[j]; - vg_6[n][2] = 1.0 + vterm*fkz_6[k]*fkz_6[k]; - vg_6[n][3] = vterm*fkx_6[i]*fky_6[j]; - vg_6[n][4] = vterm*fkx_6[i]*fkz_6[k]; - vg_6[n][5] = vterm*fky_6[j]*fkz_6[k]; - vg2_6[n][0] = vterm*0.5*(fkx_6[i]*fky_6[j] + fkx2_6[i]*fky2_6[j]); - vg2_6[n][1] = vterm*0.5*(fkx_6[i]*fkz_6[k] + fkx2_6[i]*fkz2_6[k]); - vg2_6[n][2] = vterm*0.5*(fky_6[j]*fkz_6[k] + fky2_6[j]*fkz2_6[k]); - } - n++; - } - } - } - compute_gf_6(); - if (differentiation_flag == 1) compute_sf_coeff_6(); - } -} - -/* ---------------------------------------------------------------------- - reset local grid arrays and communication stencils - called by fix balance b/c it changed sizes of processor sub-domains -------------------------------------------------------------------------- */ - -void PPPMDisp::setup_grid() -{ - // free all arrays previously allocated - - deallocate(); - deallocate_peratom(); - - // reset portion of global grid that each proc owns - - if (function[0]) - set_fft_parameters(nx_pppm, ny_pppm, nz_pppm, - nxlo_fft, nylo_fft, nzlo_fft, - nxhi_fft, nyhi_fft, nzhi_fft, - nxlo_in, nylo_in, nzlo_in, - nxhi_in, nyhi_in, nzhi_in, - nxlo_out, nylo_out, nzlo_out, - nxhi_out, nyhi_out, nzhi_out, - nlower, nupper, - ngrid, nfft, nfft_both, - shift, shiftone, order); - - if (function[1] + function[2] + function[3]) - set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, - nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, - nxhi_in_6, nyhi_in_6, nzhi_in_6, - nxlo_out_6, nylo_out_6, nzlo_out_6, - nxhi_out_6, nyhi_out_6, nzhi_out_6, - nlower_6, nupper_6, - ngrid_6, nfft_6, nfft_both_6, - shift_6, shiftone_6, order_6); - - // reallocate K-space dependent memory - // check if grid communication is now overlapping if not allowed - // don't invoke allocate_peratom(), compute() will allocate when needed - - allocate(); - - if (function[0]) { - cg->ghost_notify(); - if (overlap_allowed == 0 && cg->ghost_overlap()) - error->all(FLERR,"PPPM grid stencil extends " - "beyond nearest neighbor processor"); - cg->setup(); - } - if (function[1] + function[2] + function[3]) { - cg_6->ghost_notify(); - if (overlap_allowed == 0 && cg_6->ghost_overlap()) - error->all(FLERR,"PPPM grid stencil extends " - "beyond nearest neighbor processor"); - cg_6->setup(); - } - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - if (function[0]) { - compute_gf_denom(gf_b, order); - compute_rho_coeff(rho_coeff, drho_coeff, order); - if (differentiation_flag == 1) - compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order, - nxlo_fft, nylo_fft, nzlo_fft, - nxhi_fft, nyhi_fft, nzhi_fft, - sf_precoeff1, sf_precoeff2, sf_precoeff3, - sf_precoeff4, sf_precoeff5, sf_precoeff6); - } - if (function[1] + function[2] + function[3]) { - compute_gf_denom(gf_b_6, order_6); - compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6); - if (differentiation_flag == 1) - compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, - nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6, - sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6); - } - - // pre-compute volume-dependent coeffs - - setup(); -} - -/* ---------------------------------------------------------------------- - compute the PPPM long-range force, energy, virial -------------------------------------------------------------------------- */ - -void PPPMDisp::compute(int eflag, int vflag) -{ - - int i; - // convert atoms from box to lamda coords - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = evflag_atom = eflag_global = vflag_global = - eflag_atom = vflag_atom = 0; - - if (evflag_atom && !peratom_allocate_flag) { - allocate_peratom(); - if (function[0]) { - cg_peratom->ghost_notify(); - cg_peratom->setup(); - } - if (function[1] + function[2] + function[3]) { - cg_peratom_6->ghost_notify(); - cg_peratom_6->setup(); - } - peratom_allocate_flag = 1; - } - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - // extend size of per-atom arrays if necessary - - if (atom->nlocal > nmax) { - - if (function[0]) memory->destroy(part2grid); - if (function[1] + function[2] + function[3]) memory->destroy(part2grid_6); - nmax = atom->nmax; - if (function[0]) memory->create(part2grid,nmax,3,"pppm/disp:part2grid"); - if (function[1] + function[2] + function[3]) - memory->create(part2grid_6,nmax,3,"pppm/disp:part2grid_6"); - } - - - energy = 0.0; - energy_1 = 0.0; - energy_6 = 0.0; - if (vflag) for (i = 0; i < 6; i++) virial_6[i] = virial_1[i] = 0.0; - - // find grid points for all my particles - // distribute partcles' charges/dispersion coefficients on the grid - // communication between processors and remapping two fft - // Solution of poissons equation in k-space and backtransformation - // communication between processors - // calculation of forces - - if (function[0]) { - - //perfrom calculations for coulomb interactions only - - particle_map_c(delxinv, delyinv, delzinv, shift, part2grid, nupper, nlower, - nxlo_out, nylo_out, nzlo_out, nxhi_out, nyhi_out, nzhi_out); - - make_rho_c(); - - cg->reverse_comm(this,REVERSE_RHO); - - brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, - density_brick, density_fft, work1,remap); - - if (differentiation_flag == 1) { - - poisson_ad(work1, work2, density_fft, fft1, fft2, - nx_pppm, ny_pppm, nz_pppm, nfft, - nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft, - nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, - energy_1, greensfn, - virial_1, vg,vg2, - u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick); - - cg->forward_comm(this,FORWARD_AD); - - fieldforce_c_ad(); - - if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM); - - } else { - poisson_ik(work1, work2, density_fft, fft1, fft2, - nx_pppm, ny_pppm, nz_pppm, nfft, - nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft, - nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, - energy_1, greensfn, - fkx, fky, fkz,fkx2, fky2, fkz2, - vdx_brick, vdy_brick, vdz_brick, virial_1, vg,vg2, - u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick); - - cg->forward_comm(this, FORWARD_IK); - - fieldforce_c_ik(); - - if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM); - } - if (evflag_atom) fieldforce_c_peratom(); - } - - if (function[1]) { - //perfrom calculations for geometric mixing - particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, - nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); - make_rho_g(); - - - cg_6->reverse_comm(this, REVERSE_RHO_G); - - brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, - density_brick_g, density_fft_g, work1_6,remap_6); - - if (differentiation_flag == 1) { - - poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6, - nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, - energy_6, greensfn_6, - virial_6, vg_6, vg2_6, - u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g); - - cg_6->forward_comm(this,FORWARD_AD_G); - - fieldforce_g_ad(); - - if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G); - - } else { - poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6, - nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, - energy_6, greensfn_6, - fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6, - vdx_brick_g, vdy_brick_g, vdz_brick_g, virial_6, vg_6, vg2_6, - u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g); - - cg_6->forward_comm(this,FORWARD_IK_G); - - fieldforce_g_ik(); - - - if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G); - } - if (evflag_atom) fieldforce_g_peratom(); - } - - if (function[2]) { - //perform calculations for arithmetic mixing - particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, - nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); - make_rho_a(); - - cg_6->reverse_comm(this, REVERSE_RHO_A); - - brick2fft_a(); - - if ( differentiation_flag == 1) { - - poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6, - nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, - energy_6, greensfn_6, - virial_6, vg_6, vg2_6, - u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3); - poisson_2s_ad(density_fft_a0, density_fft_a6, - u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0, - u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6); - poisson_2s_ad(density_fft_a1, density_fft_a5, - u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1, - u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5); - poisson_2s_ad(density_fft_a2, density_fft_a4, - u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2, - u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4); - - cg_6->forward_comm(this, FORWARD_AD_A); - - fieldforce_a_ad(); - - if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A); - - } else { - - poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6, - nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, - nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, - nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, - energy_6, greensfn_6, - fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6, - vdx_brick_a3, vdy_brick_a3, vdz_brick_a3, virial_6, vg_6, vg2_6, - u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3); - poisson_2s_ik(density_fft_a0, density_fft_a6, - vdx_brick_a0, vdy_brick_a0, vdz_brick_a0, - vdx_brick_a6, vdy_brick_a6, vdz_brick_a6, - u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0, - u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6); - poisson_2s_ik(density_fft_a1, density_fft_a5, - vdx_brick_a1, vdy_brick_a1, vdz_brick_a1, - vdx_brick_a5, vdy_brick_a5, vdz_brick_a5, - u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1, - u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5); - poisson_2s_ik(density_fft_a2, density_fft_a4, - vdx_brick_a2, vdy_brick_a2, vdz_brick_a2, - vdx_brick_a4, vdy_brick_a4, vdz_brick_a4, - u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2, - u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4); - - cg_6->forward_comm(this, FORWARD_IK_A); - - fieldforce_a_ik(); - - if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A); - } - if (evflag_atom) fieldforce_a_peratom(); - } - - if (function[3]) { - //perfrom calculations if no mixing rule applies - particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, - nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); - - make_rho_none(); - - cg_6->reverse_comm(this, REVERSE_RHO_NONE); - - brick2fft_none(); - - if (differentiation_flag == 1) { - - int n = 0; - for (int k = 0; kforward_comm(this,FORWARD_AD_NONE); - - fieldforce_none_ad(); - - if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_NONE); - - } else { - int n = 0; - for (int k = 0; kforward_comm(this,FORWARD_IK_NONE); - - fieldforce_none_ik(); - - - if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_NONE); - } - if (evflag_atom) fieldforce_none_peratom(); - } - - // sum energy across procs and add in volume-dependent term - - const double qscale = force->qqrd2e * scale; - if (eflag_global) { - double energy_all; - MPI_Allreduce(&energy_1,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy_1 = energy_all; - MPI_Allreduce(&energy_6,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy_6 = energy_all; - - energy_1 *= 0.5*volume; - energy_6 *= 0.5*volume; - - energy_1 -= g_ewald*qsqsum/MY_PIS + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy_6 += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij + - 1.0/12.0*pow(g_ewald_6,6)*csum; - energy_1 *= qscale; - } - - // sum virial across procs - - if (vflag_global) { - double virial_all[6]; - MPI_Allreduce(virial_1,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; - MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i]; - if (function[1]+function[2]+function[3]){ - double a = MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij; - virial[0] -= a; - virial[1] -= a; - virial[2] -= a; - } - } - - if (eflag_atom) { - if (function[0]) { - double *q = atom->q; - for (i = 0; i < atom->nlocal; i++) { - eatom[i] -= qscale*g_ewald*q[i]*q[i]/MY_PIS + qscale*MY_PI2*q[i]*qsum / (g_ewald*g_ewald*volume); //coulomb self energy correction - } - } - if (function[1] + function[2] + function[3]) { - int tmp; - for (i = 0; i < atom->nlocal; i++) { - tmp = atom->type[i]; - eatom[i] += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp] + - 1.0/12.0*pow(g_ewald_6,6)*cii[tmp]; - } - } - } - - if (vflag_atom) { - if (function[1] + function[2] + function[3]) { - int tmp; - for (i = 0; i < atom->nlocal; i++) { - tmp = atom->type[i]; - for (int n = 0; n < 3; n++) vatom[i][n] -= MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp]; //dispersion self virial correction - } - } - } - - - // 2d slab correction - - if (slabflag) slabcorr(eflag); - if (function[0]) energy += energy_1; - if (function[1] + function[2] + function[3]) energy += energy_6; - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); -} - -/* ---------------------------------------------------------------------- - initialize coefficients needed for the dispersion density on the grids -------------------------------------------------------------------------- */ - -void PPPMDisp::init_coeffs() // local pair coeffs -{ - int tmp; - int n = atom->ntypes; - int converged; - delete [] B; - if (function[3] + function[2]) { // no mixing rule or arithmetic - if (function[2] && me == 0) { - if (screen) fprintf(screen," Optimizing splitting of Dispersion coefficients\n"); - if (logfile) fprintf(logfile," Optimizing splitting of Dispersion coefficients\n"); - } - // get dispersion coefficients - double **b = (double **) force->pair->extract("B",tmp); - // allocate data for eigenvalue decomposition - double **A; - double **Q; - memory->create(A,n,n,"pppm/disp:A"); - memory->create(Q,n,n,"pppm/disp:Q"); - // fill coefficients to matrix a - for (int i = 1; i <= n; i++) - for (int j = 1; j <= n; j++) - A[i-1][j-1] = b[i][j]; - // transform q to a unity matrix - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - Q[i][j] = 0.0; - for (int i = 0; i < n; i++) - Q[i][i] = 1.0; - // perfrom eigenvalue decomposition with QR algorithm - converged = qr_alg(A,Q,n); - if (function[3] && !converged) { - error->all(FLERR,"Matrix factorization to split dispersion coefficients failed"); - } - // determine number of used eigenvalues - // based on maximum allowed number or cutoff criterion - // sort eigenvalues according to their size with bubble sort - double t; - for (int i = 0; i < n; i++) { - for (int j = 0; j < n-1-i; j++) { - if (fabs(A[j][j]) < fabs(A[j+1][j+1])) { - t = A[j][j]; - A[j][j] = A[j+1][j+1]; - A[j+1][j+1] = t; - for (int k = 0; k < n; k++) { - t = Q[k][j]; - Q[k][j] = Q[k][j+1]; - Q[k][j+1] = t; - } - } - } - } - - // check which eigenvalue is the first that is smaller - // than a specified tolerance - // check how many are maximum allowed by the user - double amax = fabs(A[0][0]); - double acrit = amax*splittol; - double bmax = 0; - double err = 0; - nsplit = 0; - for (int i = 0; i < n; i++) { - if (fabs(A[i][i]) > acrit) nsplit++; - else { - bmax = fabs(A[i][i]); - break; - } - } - - err = bmax/amax; - if (err > 1.0e-4) { - char str[128]; - sprintf(str,"Error in splitting of dispersion coeffs is estimated %g",err); - error->warning(FLERR, str); - } - // set B - B = new double[nsplit*n+nsplit]; - for (int i = 0; i< nsplit; i++) { - B[i] = A[i][i]; - for (int j = 0; j < n; j++) { - B[nsplit*(j+1) + i] = Q[j][i]; - } - } - - nsplit_alloc = nsplit; - if (nsplit%2 == 1) nsplit_alloc = nsplit + 1; - // check if the function should preferably be [1] or [2] or [3] - if (nsplit == 1) { - delete [] B; - function[3] = 0; - function[2] = 0; - function[1] = 1; - if (me == 0) { - if (screen) fprintf(screen," Using geometric mixing for reciprocal space\n"); - if (logfile) fprintf(logfile," Using geometric mixing for reciprocal space\n"); - } - } - if (function[2] && nsplit <= 6) { - if (me == 0) { - if (screen) fprintf(screen," Using %d instead of 7 structure factors\n",nsplit); - if (logfile) fprintf(logfile," Using %d instead of 7 structure factors\n",nsplit); - } - function[3] = 1; - function[2] = 0; - } - if (function[2] && (nsplit > 6)) { - if (me == 0) { - if (screen) fprintf(screen," Using 7 structure factors\n"); - if (logfile) fprintf(logfile," Using 7 structure factors\n"); - } - delete [] B; - } - if (function[3]) { - if (me == 0) { - if (screen) fprintf(screen," Using %d structure factors\n",nsplit); - if (logfile) fprintf(logfile," Using %d structure factors\n",nsplit); - } - if (nsplit > 9) error->warning(FLERR, "Simulations might be very slow because of large number of structure factors"); - } - - memory->destroy(A); - memory->destroy(Q); - } - if (function[1]) { // geometric 1/r^6 - double **b = (double **) force->pair->extract("B",tmp); - B = new double[n+1]; - for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i])); - } - if (function[2]) { // arithmetic 1/r^6 - //cannot use epsilon, because this has not been set yet - double **epsilon = (double **) force->pair->extract("epsilon",tmp); - //cannot use sigma, because this has not been set yet - double **sigma = (double **) force->pair->extract("sigma",tmp); - if (!(epsilon&&sigma)) - error->all(FLERR,"Epsilon or sigma reference not set by pair style in PPPMDisp"); - double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7]; - double c[7] = { - 1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0}; - for (int i=0; i<=n; ++i) { - eps_i = sqrt(epsilon[i][i]); - sigma_i = sigma[i][i]; - sigma_n = 1.0; - for (int j=0; j<7; ++j) { - *(bi++) = sigma_n*eps_i*c[j]*0.25; - sigma_n *= sigma_i; - } - } - } -} - -/* ---------------------------------------------------------------------- - Eigenvalue decomposition of a real, symmetric matrix with the QR - method (includes transpformation to Tridiagonal Matrix + Wilkinson - shift) -------------------------------------------------------------------------- */ - -int PPPMDisp::qr_alg(double **A, double **Q, int n) -{ - int converged = 0; - double an1, an, bn1, d, mue; - // allocate some memory for the required operations - double **A0,**Qi,**C,**D,**E; - // make a copy of A for convergence check - memory->create(A0,n,n,"pppm/disp:A0"); - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - A0[i][j] = A[i][j]; - - // allocate an auxiliary matrix Qi - memory->create(Qi,n,n,"pppm/disp:Qi"); - - // alllocate an auxillary matrices for the matrix multiplication - memory->create(C,n,n,"pppm/disp:C"); - memory->create(D,n,n,"pppm/disp:D"); - memory->create(E,n,n,"pppm/disp:E"); - - // transform Matrix A to Tridiagonal form - hessenberg(A,Q,n); - - // start loop for the matrix factorization - int count = 0; - int countmax = 100000; - while (1) { - // make a Wilkinson shift - an1 = A[n-2][n-2]; - an = A[n-1][n-1]; - bn1 = A[n-2][n-1]; - d = (an1-an)/2; - mue = an + d - copysign(1.,d)*sqrt(d*d + bn1*bn1); - for (int i = 0; i < n; i++) - A[i][i] -= mue; - - // perform a QR factorization for a tridiagonal matrix A - qr_tri(Qi,A,n); - - // update the matrices - mmult(A,Qi,C,n); - mmult(Q,Qi,C,n); - - // backward Wilkinson shift - for (int i = 0; i < n; i++) - A[i][i] += mue; - - // check the convergence - converged = check_convergence(A,Q,A0,C,D,E,n); - if (converged) break; - count = count + 1; - if (count == countmax) break; - } - - // free allocated memory - memory->destroy(Qi); - memory->destroy(A0); - memory->destroy(C); - memory->destroy(D); - memory->destroy(E); - - return converged; -} - -/* ---------------------------------------------------------------------- - Transform a Matrix to Hessenberg form (for symmetric Matrices, the - result will be a tridiagonal matrix) -------------------------------------------------------------------------- */ - -void PPPMDisp::hessenberg(double **A, double **Q, int n) -{ - double r,a,b,c,s,x1,x2; - for (int i = 0; i < n-1; i++) { - for (int j = i+2; j < n; j++) { - // compute coeffs for the rotation matrix - a = A[i+1][i]; - b = A[j][i]; - r = sqrt(a*a + b*b); - c = a/r; - s = b/r; - // update the entries of A with multiplication from the left - for (int k = 0; k < n; k++) { - x1 = A[i+1][k]; - x2 = A[j][k]; - A[i+1][k] = c*x1 + s*x2; - A[j][k] = -s*x1 + c*x2; - } - // update the entries of A and Q with a multiplication from the right - for (int k = 0; k < n; k++) { - x1 = A[k][i+1]; - x2 = A[k][j]; - A[k][i+1] = c*x1 + s*x2; - A[k][j] = -s*x1 + c*x2; - x1 = Q[k][i+1]; - x2 = Q[k][j]; - Q[k][i+1] = c*x1 + s*x2; - Q[k][j] = -s*x1 + c*x2; - } - } - } -} - -/* ---------------------------------------------------------------------- - QR factorization for a tridiagonal matrix; Result of the factorization - is stored in A and Qi -------------------------------------------------------------------------- */ - -void PPPMDisp::qr_tri(double** Qi,double** A,int n) -{ - double r,a,b,c,s,x1,x2; - int j,k,k0,kmax; - // make Qi a unity matrix - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - Qi[i][j] = 0.0; - for (int i = 0; i < n; i++) - Qi[i][i] = 1.0; - // loop over main diagonal and first of diagonal of A - for (int i = 0; i < n-1; i++) { - j = i+1; - // coefficients of the rotation matrix - a = A[i][i]; - b = A[j][i]; - r = sqrt(a*a + b*b); - c = a/r; - s = b/r; - // update the entries of A and Q - k0 = (i-1>0)?i-1:0; //min(i-1,0); - kmax = (i+3A0[i][j])?Bmax:A0[i][j]; //max(Bmax,A0[i][j]); - double epsabs = eps*Bmax; - - // reconstruct the original matrix - // store the diagonal elements in D - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - D[i][j] = 0.0; - for (int i = 0; i < n; i++) - D[i][i] = A[i][i]; - // store matrix Q in E - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - E[i][j] = Q[i][j]; - // E = Q*A - mmult(E,D,C,n); - // store transpose of Q in D - for (int i = 0; i < n; i++) - for (int j = 0; j < n; j++) - D[i][j] = Q[j][i]; - // E = Q*A*Q.t - mmult(E,D,C,n); - - //compare the original matrix and the final matrix - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - diff = A0[i][j] - E[i][j]; - epsmax = (epsmax>fabs(diff))?epsmax:fabs(diff);//max(epsmax,fabs(diff)); - } - } - if (epsmax > epsabs) converged = 0; - return converged; -} - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMDisp::allocate() -{ - - int (*procneigh)[2] = comm->procneigh; - - if (function[0]) { - memory->create(work1,2*nfft_both,"pppm/disp:work1"); - memory->create(work2,2*nfft_both,"pppm/disp:work2"); - - memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm/disp:fkx"); - memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm/disp:fky"); - memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm/disp:fkz"); - - memory->create1d_offset(fkx2,nxlo_fft,nxhi_fft,"pppm/disp:fkx2"); - memory->create1d_offset(fky2,nylo_fft,nyhi_fft,"pppm/disp:fky2"); - memory->create1d_offset(fkz2,nzlo_fft,nzhi_fft,"pppm/disp:fkz2"); - - - memory->create(gf_b,order,"pppm/disp:gf_b"); - memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm/disp:rho1d"); - memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm/disp:rho_coeff"); - memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm/disp:rho1d"); - memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,"pppm/disp:drho_coeff"); - - memory->create(greensfn,nfft_both,"pppm/disp:greensfn"); - memory->create(vg,nfft_both,6,"pppm/disp:vg"); - memory->create(vg2,nfft_both,3,"pppm/disp:vg2"); - - memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:density_brick"); - if ( differentiation_flag == 1) { - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:u_brick"); - memory->create(sf_precoeff1,nfft_both,"pppm/disp:sf_precoeff1"); - memory->create(sf_precoeff2,nfft_both,"pppm/disp:sf_precoeff2"); - memory->create(sf_precoeff3,nfft_both,"pppm/disp:sf_precoeff3"); - memory->create(sf_precoeff4,nfft_both,"pppm/disp:sf_precoeff4"); - memory->create(sf_precoeff5,nfft_both,"pppm/disp:sf_precoeff5"); - memory->create(sf_precoeff6,nfft_both,"pppm/disp:sf_precoeff6"); - - } else { - memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:vdx_brick"); - memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:vdy_brick"); - memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:vdz_brick"); - } - memory->create(density_fft,nfft_both,"pppm/disp:density_fft"); - - int tmp; - - fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 0,0,&tmp); - - fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - 0,0,&tmp); - - remap = new Remap(lmp,world, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_PRECISION); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg = new CommGrid(lmp,world,1,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg = new CommGrid(lmp,world,3,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - } - - if (function[1]) { - memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); - memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); - - memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); - memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); - memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); - - memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); - memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); - memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); - - memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); - memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); - memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); - memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); - memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); - - memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); - memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); - memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); - - memory->create3d_offset(density_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_g"); - if ( differentiation_flag == 1) { - memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g"); - - memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); - memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); - memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); - memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); - memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); - memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); - - } else { - memory->create3d_offset(vdx_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_g"); - memory->create3d_offset(vdy_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_g"); - memory->create3d_offset(vdz_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_g"); - } - memory->create(density_fft_g,nfft_both_6,"pppm/disp:density_fft_g"); - - - int tmp; - - fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 0,0,&tmp); - - fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - 0,0,&tmp); - - remap_6 = new Remap(lmp,world, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 1,0,0,FFT_PRECISION); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_6 = new CommGrid(lmp,world,1,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_6 = new CommGrid(lmp,world,3,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - } - - if (function[2]) { - memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); - memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); - - memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); - memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); - memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); - - memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); - memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); - memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); - - memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); - memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); - memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); - memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); - memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); - - memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); - memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); - memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); - - memory->create3d_offset(density_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a0"); - memory->create3d_offset(density_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a1"); - memory->create3d_offset(density_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a2"); - memory->create3d_offset(density_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a3"); - memory->create3d_offset(density_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a4"); - memory->create3d_offset(density_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a5"); - memory->create3d_offset(density_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a6"); - - memory->create(density_fft_a0,nfft_both_6,"pppm/disp:density_fft_a0"); - memory->create(density_fft_a1,nfft_both_6,"pppm/disp:density_fft_a1"); - memory->create(density_fft_a2,nfft_both_6,"pppm/disp:density_fft_a2"); - memory->create(density_fft_a3,nfft_both_6,"pppm/disp:density_fft_a3"); - memory->create(density_fft_a4,nfft_both_6,"pppm/disp:density_fft_a4"); - memory->create(density_fft_a5,nfft_both_6,"pppm/disp:density_fft_a5"); - memory->create(density_fft_a6,nfft_both_6,"pppm/disp:density_fft_a6"); - - - if ( differentiation_flag == 1 ) { - memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0"); - memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1"); - memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2"); - memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3"); - memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4"); - memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5"); - memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6"); - - memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); - memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); - memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); - memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); - memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); - memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); - - } else { - - memory->create3d_offset(vdx_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a0"); - memory->create3d_offset(vdy_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a0"); - memory->create3d_offset(vdz_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a0"); - - memory->create3d_offset(vdx_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a1"); - memory->create3d_offset(vdy_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a1"); - memory->create3d_offset(vdz_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a1"); - - memory->create3d_offset(vdx_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a2"); - memory->create3d_offset(vdy_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a2"); - memory->create3d_offset(vdz_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a2"); - - memory->create3d_offset(vdx_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a3"); - memory->create3d_offset(vdy_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a3"); - memory->create3d_offset(vdz_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a3"); - - memory->create3d_offset(vdx_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a4"); - memory->create3d_offset(vdy_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a4"); - memory->create3d_offset(vdz_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a4"); - - memory->create3d_offset(vdx_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a5"); - memory->create3d_offset(vdy_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a5"); - memory->create3d_offset(vdz_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a5"); - - memory->create3d_offset(vdx_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a6"); - memory->create3d_offset(vdy_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a6"); - memory->create3d_offset(vdz_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a6"); - } - - - - int tmp; - - fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 0,0,&tmp); - - fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - 0,0,&tmp); - - remap_6 = new Remap(lmp,world, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 1,0,0,FFT_PRECISION); - - // create ghost grid object for rho and electric field communication - - - if (differentiation_flag == 1) - cg_6 = new CommGrid(lmp,world,7,7, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_6 = new CommGrid(lmp,world,21,7, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - } - - if (function[3]) { - memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); - memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); - - memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); - memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); - memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); - - memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); - memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); - memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); - - memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); - memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); - memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); - memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); - memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); - - memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); - memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); - memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); - - memory->create4d_offset(density_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_none"); - if ( differentiation_flag == 1) { - memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none"); - - memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); - memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); - memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); - memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); - memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); - memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); - - } else { - memory->create4d_offset(vdx_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_none"); - memory->create4d_offset(vdy_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_none"); - memory->create4d_offset(vdz_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_none"); - } - memory->create(density_fft_none,nsplit_alloc,nfft_both_6,"pppm/disp:density_fft_none"); - - - int tmp; - - fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 0,0,&tmp); - - fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - 0,0,&tmp); - - remap_6 = new Remap(lmp,world, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, - 1,0,0,FFT_PRECISION); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_6 = new CommGrid(lmp,world,nsplit_alloc,nsplit_alloc, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_6 = new CommGrid(lmp,world,3*nsplit_alloc,nsplit_alloc, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - } - -} - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors and order - for per atom calculations -------------------------------------------------------------------------- */ - -void PPPMDisp::allocate_peratom() -{ - - int (*procneigh)[2] = comm->procneigh; - - if (function[0]) { - - if (differentiation_flag != 1) - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:u_brick"); - - memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v0_brick"); - memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v1_brick"); - memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v2_brick"); - memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v3_brick"); - memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v4_brick"); - memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm/disp:v5_brick"); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_peratom = - new CommGrid(lmp,world,6,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_peratom = - new CommGrid(lmp,world,7,1, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - - } - - - if (function[1]) { - - if ( differentiation_flag != 1 ) - memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g"); - - memory->create3d_offset(v0_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_g"); - memory->create3d_offset(v1_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_g"); - memory->create3d_offset(v2_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_g"); - memory->create3d_offset(v3_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_g"); - memory->create3d_offset(v4_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_g"); - memory->create3d_offset(v5_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_g"); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_peratom_6 = - new CommGrid(lmp,world,6,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_peratom_6 = - new CommGrid(lmp,world,7,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - - } - - if (function[2]) { - - if ( differentiation_flag != 1 ) { - memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0"); - memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1"); - memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2"); - memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3"); - memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4"); - memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5"); - memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6"); - } - - memory->create3d_offset(v0_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a0"); - memory->create3d_offset(v1_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a0"); - memory->create3d_offset(v2_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a0"); - memory->create3d_offset(v3_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a0"); - memory->create3d_offset(v4_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a0"); - memory->create3d_offset(v5_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a0"); - - memory->create3d_offset(v0_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a1"); - memory->create3d_offset(v1_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a1"); - memory->create3d_offset(v2_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a1"); - memory->create3d_offset(v3_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a1"); - memory->create3d_offset(v4_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a1"); - memory->create3d_offset(v5_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a1"); - - memory->create3d_offset(v0_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a2"); - memory->create3d_offset(v1_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a2"); - memory->create3d_offset(v2_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a2"); - memory->create3d_offset(v3_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a2"); - memory->create3d_offset(v4_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a2"); - memory->create3d_offset(v5_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a2"); - - memory->create3d_offset(v0_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a3"); - memory->create3d_offset(v1_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a3"); - memory->create3d_offset(v2_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a3"); - memory->create3d_offset(v3_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a3"); - memory->create3d_offset(v4_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a3"); - memory->create3d_offset(v5_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a3"); - - memory->create3d_offset(v0_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a4"); - memory->create3d_offset(v1_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a4"); - memory->create3d_offset(v2_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a4"); - memory->create3d_offset(v3_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a4"); - memory->create3d_offset(v4_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a4"); - memory->create3d_offset(v5_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a4"); - - memory->create3d_offset(v0_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a5"); - memory->create3d_offset(v1_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a5"); - memory->create3d_offset(v2_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a5"); - memory->create3d_offset(v3_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a5"); - memory->create3d_offset(v4_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a5"); - memory->create3d_offset(v5_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a5"); - - memory->create3d_offset(v0_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a6"); - memory->create3d_offset(v1_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a6"); - memory->create3d_offset(v2_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a6"); - memory->create3d_offset(v3_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a6"); - memory->create3d_offset(v4_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a6"); - memory->create3d_offset(v5_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a6"); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_peratom_6 = - new CommGrid(lmp,world,42,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_peratom_6 = - new CommGrid(lmp,world,49,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - - } - - if (function[3]) { - - if ( differentiation_flag != 1 ) - memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none"); - - memory->create4d_offset(v0_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_none"); - memory->create4d_offset(v1_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_none"); - memory->create4d_offset(v2_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_none"); - memory->create4d_offset(v3_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_none"); - memory->create4d_offset(v4_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_none"); - memory->create4d_offset(v5_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, - nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_none"); - - // create ghost grid object for rho and electric field communication - - if (differentiation_flag == 1) - cg_peratom_6 = - new CommGrid(lmp,world,6*nsplit_alloc,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - else - cg_peratom_6 = - new CommGrid(lmp,world,7*nsplit_alloc,1, - nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, - nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, - procneigh[0][0],procneigh[0][1],procneigh[1][0], - procneigh[1][1],procneigh[2][0],procneigh[2][1]); - - } -} - - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMDisp::deallocate() -{ - memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy(density_fft); - density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; - density_fft = NULL; - - memory->destroy3d_offset(density_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_g); - density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL; - density_fft_g = NULL; - - memory->destroy3d_offset(density_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a0); - density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL; - density_fft_a0 = NULL; - - memory->destroy3d_offset(density_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a1); - density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL; - density_fft_a1 = NULL; - - memory->destroy3d_offset(density_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a2); - density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL; - density_fft_a2 = NULL; - - memory->destroy3d_offset(density_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a3); - density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL; - density_fft_a3 = NULL; - - memory->destroy3d_offset(density_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a4); - density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL; - density_fft_a4 = NULL; - - memory->destroy3d_offset(density_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a5); - density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL; - density_fft_a5 = NULL; - - memory->destroy3d_offset(density_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdx_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdy_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy3d_offset(vdz_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_a6); - density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL; - density_fft_a6 = NULL; - - memory->destroy4d_offset(density_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy4d_offset(vdx_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy4d_offset(vdy_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy4d_offset(vdz_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); - memory->destroy(density_fft_none); - density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL; - density_fft_none = NULL; - - memory->destroy(sf_precoeff1); - memory->destroy(sf_precoeff2); - memory->destroy(sf_precoeff3); - memory->destroy(sf_precoeff4); - memory->destroy(sf_precoeff5); - memory->destroy(sf_precoeff6); - sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL; - - memory->destroy(sf_precoeff1_6); - memory->destroy(sf_precoeff2_6); - memory->destroy(sf_precoeff3_6); - memory->destroy(sf_precoeff4_6); - memory->destroy(sf_precoeff5_6); - memory->destroy(sf_precoeff6_6); - sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = sf_precoeff5_6 = sf_precoeff6_6 = NULL; - - memory->destroy(greensfn); - memory->destroy(greensfn_6); - memory->destroy(work1); - memory->destroy(work2); - memory->destroy(work1_6); - memory->destroy(work2_6); - memory->destroy(vg); - memory->destroy(vg2); - memory->destroy(vg_6); - memory->destroy(vg2_6); - greensfn = greensfn_6 = NULL; - work1 = work2 = work1_6 = work2_6 = NULL; - vg = vg2 = vg_6 = vg2_6 = NULL; - - memory->destroy1d_offset(fkx,nxlo_fft); - memory->destroy1d_offset(fky,nylo_fft); - memory->destroy1d_offset(fkz,nzlo_fft); - fkx = fky = fkz = NULL; - - memory->destroy1d_offset(fkx2,nxlo_fft); - memory->destroy1d_offset(fky2,nylo_fft); - memory->destroy1d_offset(fkz2,nzlo_fft); - fkx2 = fky2 = fkz2 = NULL; - - memory->destroy1d_offset(fkx_6,nxlo_fft_6); - memory->destroy1d_offset(fky_6,nylo_fft_6); - memory->destroy1d_offset(fkz_6,nzlo_fft_6); - fkx_6 = fky_6 = fkz_6 = NULL; - - memory->destroy1d_offset(fkx2_6,nxlo_fft_6); - memory->destroy1d_offset(fky2_6,nylo_fft_6); - memory->destroy1d_offset(fkz2_6,nzlo_fft_6); - fkx2_6 = fky2_6 = fkz2_6 = NULL; - - - memory->destroy(gf_b); - memory->destroy2d_offset(rho1d,-order/2); - memory->destroy2d_offset(rho_coeff,(1-order)/2); - memory->destroy2d_offset(drho1d,-order/2); - memory->destroy2d_offset(drho_coeff, (1-order)/2); - gf_b = NULL; - rho1d = rho_coeff = drho1d = drho_coeff = NULL; - - memory->destroy(gf_b_6); - memory->destroy2d_offset(rho1d_6,-order_6/2); - memory->destroy2d_offset(rho_coeff_6,(1-order_6)/2); - memory->destroy2d_offset(drho1d_6,-order_6/2); - memory->destroy2d_offset(drho_coeff_6,(1-order_6)/2); - gf_b_6 = NULL; - rho1d_6 = rho_coeff_6 = drho1d_6 = drho_coeff_6 = NULL; - - delete fft1; - delete fft2; - delete remap; - delete cg; - fft1 = fft2 = NULL; - remap = NULL; - cg = NULL; - - delete fft1_6; - delete fft2_6; - delete remap_6; - delete cg_6; - fft1_6 = fft2_6 = NULL; - remap_6 = NULL; - cg_6 = NULL; -} - - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors and order - for per atom calculations -------------------------------------------------------------------------- */ - -void PPPMDisp::deallocate_peratom() -{ - peratom_allocate_flag = 0; - - memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v0_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v1_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v2_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v3_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v4_brick, nzlo_out, nylo_out, nxlo_out); - memory->destroy3d_offset(v5_brick, nzlo_out, nylo_out, nxlo_out); - u_brick = v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; - - memory->destroy3d_offset(u_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = v4_brick_g = v5_brick_g = NULL; - - memory->destroy3d_offset(u_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = v4_brick_a0 = v5_brick_a0 = NULL; - - memory->destroy3d_offset(u_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = v4_brick_a1 = v5_brick_a1 = NULL; - - memory->destroy3d_offset(u_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = v4_brick_a2 = v5_brick_a2 = NULL; - - memory->destroy3d_offset(u_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = v4_brick_a3 = v5_brick_a3 = NULL; - - memory->destroy3d_offset(u_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = v4_brick_a4 = v5_brick_a4 = NULL; - - memory->destroy3d_offset(u_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = v4_brick_a5 = v5_brick_a5 = NULL; - - memory->destroy3d_offset(u_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v0_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v1_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v2_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v3_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v4_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy3d_offset(v5_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = v4_brick_a6 = v5_brick_a6 = NULL; - - memory->destroy4d_offset(u_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v0_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v1_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v2_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v3_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v4_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - memory->destroy4d_offset(v5_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); - u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = v4_brick_none = v5_brick_none = NULL; - - delete cg_peratom; - delete cg_peratom_6; - cg_peratom = cg_peratom_6 = NULL; -} - -/* ---------------------------------------------------------------------- - set size of FFT grid (nx,ny,nz_pppm) and g_ewald - for Coulomb interactions -------------------------------------------------------------------------- */ - -void PPPMDisp::set_grid() -{ - double q2 = qsqsum * force->qqrd2e; - - // use xprd,yprd,zprd even if triclinic so grid size is the same - // adjust z dimension for 2d slab PPPM - // 3d PPPM just uses zprd since slab_volfactor = 1.0 - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - // make initial g_ewald estimate - // based on desired accuracy and real space cutoff - // fluid-occupied volume used to estimate real-space error - // zprd used rather than zprd_slab - - double h, h_x,h_y,h_z; - bigint natoms = atom->natoms; - - if (!gewaldflag) { - g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); - if (g_ewald >= 1.0) - error->all(FLERR,"KSpace accuracy too large to estimate G vector"); - g_ewald = sqrt(-log(g_ewald)) / cutoff; - } - - // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy - // nz_pppm uses extended zprd_slab instead of zprd - // reduce it until accuracy target is met - - if (!gridflag) { - h = h_x = h_y = h_z = 4.0/g_ewald; - int count = 0; - while (1) { - - // set grid dimension - nx_pppm = static_cast (xprd/h_x); - ny_pppm = static_cast (yprd/h_y); - nz_pppm = static_cast (zprd_slab/h_z); - - if (nx_pppm <= 1) nx_pppm = 2; - if (ny_pppm <= 1) ny_pppm = 2; - if (nz_pppm <= 1) nz_pppm = 2; - - //set local grid dimension - int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft = 0; - nxhi_fft = nx_pppm - 1; - nylo_fft = me_y*ny_pppm/npey_fft; - nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; - nzlo_fft = me_z*nz_pppm/npez_fft; - nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - double qopt = compute_qopt(); - - double dfkspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); - - count++; - - // break loop if the accuracy has been reached or too many loops have been performed - if (dfkspace <= accuracy) break; - if (count > 500) error->all(FLERR, "Could not compute grid size for Coulomb interaction"); - h *= 0.95; - h_x = h_y = h_z = h; - } - } - - // boost grid size until it is factorable - - while (!factorable(nx_pppm)) nx_pppm++; - while (!factorable(ny_pppm)) ny_pppm++; - while (!factorable(nz_pppm)) nz_pppm++; -} - -/* ---------------------------------------------------------------------- - set the FFT parameters -------------------------------------------------------------------------- */ - -void PPPMDisp::set_fft_parameters(int& nx_p,int& ny_p,int& nz_p, - int& nxlo_f,int& nylo_f,int& nzlo_f, - int& nxhi_f,int& nyhi_f,int& nzhi_f, - int& nxlo_i,int& nylo_i,int& nzlo_i, - int& nxhi_i,int& nyhi_i,int& nzhi_i, - int& nxlo_o,int& nylo_o,int& nzlo_o, - int& nxhi_o,int& nyhi_o,int& nzhi_o, - int& nlow, int& nupp, - int& ng, int& nf, int& nfb, - double& sft,double& sftone, int& ord) -{ - // global indices of PPPM grid range from 0 to N-1 - // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of - // global PPPM grid that I own without ghost cells - // for slab PPPM, assign z grid as if it were not extended - - nxlo_i = static_cast (comm->xsplit[comm->myloc[0]] * nx_p); - nxhi_i = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_p) - 1; - - nylo_i = static_cast (comm->ysplit[comm->myloc[1]] * ny_p); - nyhi_i = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_p) - 1; - - nzlo_i = static_cast - (comm->zsplit[comm->myloc[2]] * nz_p/slab_volfactor); - nzhi_i = static_cast - (comm->zsplit[comm->myloc[2]+1] * nz_p/slab_volfactor) - 1; - - - // nlow,nupp = stencil size for mapping particles to PPPM grid - - nlow = -(ord-1)/2; - nupp = ord/2; - - // sft values for particle <-> grid mapping - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - if (ord % 2) sft = OFFSET + 0.5; - else sft = OFFSET; - if (ord % 2) sftone = 0.0; - else sftone = 0.5; - - // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of - // global PPPM grid that my particles can contribute charge to - // effectively nlo_in,nhi_in + ghost cells - // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest - // position a particle in my box can be at - // dist[3] = particle position bound = subbox + skin/2.0 + qdist - // qdist = offset due to TIP4P fictitious charge - // convert to triclinic if necessary - // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping - // for slab PPPM, assign z grid as if it were not extended - - double *prd,*sublo,*subhi; - - if (triclinic == 0) { - prd = domain->prd; - boxlo = domain->boxlo; - sublo = domain->sublo; - subhi = domain->subhi; - } else { - prd = domain->prd_lamda; - boxlo = domain->boxlo_lamda; - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double dist[3]; - double cuthalf = 0.5*neighbor->skin + qdist; - if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; - else { - dist[0] = cuthalf/domain->prd[0]; - dist[1] = cuthalf/domain->prd[1]; - dist[2] = cuthalf/domain->prd[2]; - } - - int nlo,nhi; - - nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * - nx_p/xprd + sft) - OFFSET; - nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * - nx_p/xprd + sft) - OFFSET; - nxlo_o = nlo + nlow; - nxhi_o = nhi + nupp; - - nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * - ny_p/yprd + sft) - OFFSET; - nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * - ny_p/yprd + sft) - OFFSET; - nylo_o = nlo + nlow; - nyhi_o = nhi + nupp; - - nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * - nz_p/zprd_slab + sft) - OFFSET; - nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * - nz_p/zprd_slab + sft) - OFFSET; - nzlo_o = nlo + nlow; - nzhi_o = nhi + nupp; - - // for slab PPPM, change the grid boundary for processors at +z end - // to include the empty volume between periodically repeating slabs - // for slab PPPM, want charge data communicated from -z proc to +z proc, - // but not vice versa, also want field data communicated from +z proc to - // -z proc, but not vice versa - // this is accomplished by nzhi_i = nzhi_o on +z end (no ghost cells) - - if (slabflag && (comm->myloc[2] == comm->procgrid[2]-1)) { - nzhi_i = nz_p - 1; - nzhi_o = nz_p - 1; - } - - // decomposition of FFT mesh - // global indices range from 0 to N-1 - // proc owns entire x-dimension, clump of columns in y,z dimensions - // npey_fft,npez_fft = # of procs in y,z dims - // if nprocs is small enough, proc can own 1 or more entire xy planes, - // else proc owns 2d sub-blocks of yz plane - // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions - // nlo_fft,nhi_fft = lower/upper limit of the section - // of the global FFT mesh that I own - - int npey_fft,npez_fft; - if (nz_p >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_p,nz_p,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_f = 0; - nxhi_f = nx_p - 1; - nylo_f = me_y*ny_p/npey_fft; - nyhi_f = (me_y+1)*ny_p/npey_fft - 1; - nzlo_f = me_z*nz_p/npez_fft; - nzhi_f = (me_z+1)*nz_p/npez_fft - 1; - - // PPPM grid for this proc, including ghosts - - ng = (nxhi_o-nxlo_o+1) * (nyhi_o-nylo_o+1) * - (nzhi_o-nzlo_o+1); - - // FFT arrays on this proc, without ghosts - // nfft = FFT points in FFT decomposition on this proc - // nfft_brick = FFT points in 3d brick-decomposition on this proc - // nfft_both = greater of 2 values - - nf = (nxhi_f-nxlo_f+1) * (nyhi_f-nylo_f+1) * - (nzhi_f-nzlo_f+1); - int nfft_brick = (nxhi_i-nxlo_i+1) * (nyhi_i-nylo_i+1) * - (nzhi_i-nzlo_i+1); - nfb = MAX(nf,nfft_brick); - -} - -/* ---------------------------------------------------------------------- - check if all factors of n are in list of factors - return 1 if yes, 0 if no -------------------------------------------------------------------------- */ - -int PPPMDisp::factorable(int n) -{ - int i; - - while (n > 1) { - for (i = 0; i < nfactors; i++) { - if (n % factors[i] == 0) { - n /= factors[i]; - break; - } - } - if (i == nfactors) return 0; - } - - return 1; -} - -/* ---------------------------------------------------------------------- - pre-compute Green's function denominator expansion coeffs, Gamma(2n) -------------------------------------------------------------------------- */ -void PPPMDisp::adjust_gewald() -{ - - // Use Newton solver to find g_ewald - - double dx; - - // Begin algorithm - - for (int i = 0; i < LARGE; i++) { - dx = f() / derivf(); - g_ewald -= dx; //Update g_ewald - if (fabs(f()) < SMALL) return; - } - - // Failed to converge - - char str[128]; - sprintf(str, "Could not compute g_ewald"); - error->all(FLERR, str); - -} - -/* ---------------------------------------------------------------------- - Calculate f(x) - ------------------------------------------------------------------------- */ - -double PPPMDisp::f() -{ - double df_rspace, df_kspace; - double q2 = qsqsum * force->qqrd2e; - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - - df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(natoms*cutoff*xprd*yprd*zprd); - - double qopt = compute_qopt(); - df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); - - return df_rspace - df_kspace; -} - -/* ---------------------------------------------------------------------- - Calculate numerical derivative f'(x) using forward difference - [f(x + h) - f(x)] / h - ------------------------------------------------------------------------- */ - -double PPPMDisp::derivf() -{ - double h = 0.000001; //Derivative step-size - double df,f1,f2,g_ewald_old; - - f1 = f(); - g_ewald_old = g_ewald; - g_ewald += h; - f2 = f(); - g_ewald = g_ewald_old; - df = (f2 - f1)/h; - - return df; -} - -/* ---------------------------------------------------------------------- - Calculate the final estimator for the accuracy -------------------------------------------------------------------------- */ - -double PPPMDisp::final_accuracy() -{ - double df_rspace, df_kspace; - double q2 = qsqsum * force->qqrd2e; - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - df_rspace = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(natoms*cutoff*xprd*yprd*zprd); - - double qopt = compute_qopt(); - - df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); - - double acc = sqrt(df_rspace*df_rspace + df_kspace*df_kspace); - return acc; -} - -/* ---------------------------------------------------------------------- - Calculate the final estimator for the Dispersion accuracy -------------------------------------------------------------------------- */ - -void PPPMDisp::final_accuracy_6(double& acc, double& acc_real, double& acc_kspace) -{ - double df_rspace, df_kspace; - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - acc_real = lj_rspace_error(); - - double qopt = compute_qopt_6(); - - acc_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); - - acc = sqrt(acc_real*acc_real + acc_kspace*acc_kspace); - return; -} - -/* ---------------------------------------------------------------------- - Compute qopt for Coulomb interactions -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt() -{ - double qopt; - if (differentiation_flag == 1) { - qopt = compute_qopt_ad(); - } else { - qopt = compute_qopt_ik(); - } - double qopt_all; - MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); - return qopt_all; -} - -/* ---------------------------------------------------------------------- - Compute qopt for Dispersion interactions -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt_6() -{ - double qopt; - if (differentiation_flag == 1) { - qopt = compute_qopt_6_ad(); - } else { - qopt = compute_qopt_6_ik(); - } - double qopt_all; - MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); - return qopt_all; -} - -/* ---------------------------------------------------------------------- - Compute qopt for the ik differentiation scheme and Coulomb interaction -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt_ik() -{ - double qopt = 0.0; - int k,l,m; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int nx,ny,nz,kper,lper,mper; - double sqk, u2; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double sum1,sum2, sum3,dot1,dot2; - - int nbx = 2; - int nby = 2; - int nbz = 2; - - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - - sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + - pow(unitkz*mper,2.0); - - if (sqk != 0.0) { - sum1 = 0.0; - sum2 = 0.0; - sum3 = 0.0; - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm*nx); - sx = exp(-0.25*pow(qx/g_ewald,2.0)); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm; - if (argx != 0.0) wx = pow(sin(argx)/argx,order); - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm*ny); - sy = exp(-0.25*pow(qy/g_ewald,2.0)); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm; - if (argy != 0.0) wy = pow(sin(argy)/argy,order); - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm*nz); - sz = exp(-0.25*pow(qz/g_ewald,2.0)); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm; - if (argz != 0.0) wz = pow(sin(argz)/argz,order); - - dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; - dot2 = qx*qx+qy*qy+qz*qz; - u2 = pow(wx*wy*wz,2.0); - sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI; - sum2 += u2*sx*sy*sz*4.0*MY_PI/dot2*dot1; - sum3 += u2; - } - } - } - sum2 *= sum2; - sum3 *= sum3*sqk; - qopt += sum1 -sum2/sum3; - } - } - } - } - return qopt; -} - -/* ---------------------------------------------------------------------- - Compute qopt for the ad differentiation scheme and Coulomb interaction -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt_ad() -{ - double qopt = 0.0; - int k,l,m; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int nx,ny,nz,kper,lper,mper; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double u2, sqk; - double sum1,sum2,sum3,sum4,dot2; - double numerator; - - int nbx = 2; - int nby = 2; - int nbz = 2; - double form = 1.0; - - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - - sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + - pow(unitkz*mper,2.0); - - if (sqk != 0.0) { - numerator = form*12.5663706; - - sum1 = 0.0; - sum2 = 0.0; - sum3 = 0.0; - sum4 = 0.0; - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm*nx); - sx = exp(-0.25*pow(qx/g_ewald,2.0)); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm; - if (argx != 0.0) wx = pow(sin(argx)/argx,order); - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm*ny); - sy = exp(-0.25*pow(qy/g_ewald,2.0)); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm; - if (argy != 0.0) wy = pow(sin(argy)/argy,order); - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm*nz); - sz = exp(-0.25*pow(qz/g_ewald,2.0)); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm; - if (argz != 0.0) wz = pow(sin(argz)/argz,order); - - dot2 = qx*qx+qy*qy+qz*qz; - u2 = pow(wx*wy*wz,2.0); - sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI; - sum2 += sx*sy*sz * u2*4.0*MY_PI; - sum3 += u2; - sum4 += dot2*u2; - } - } - } - sum2 *= sum2; - qopt += sum1 - sum2/(sum3*sum4); - } - } - } - } - return qopt; -} - -/* ---------------------------------------------------------------------- - Compute qopt for the ik differentiation scheme and Dispersion interaction -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt_6_ik() -{ - double qopt = 0.0; - int k,l,m,n; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int nx,ny,nz,kper,lper,mper; - double sqk, u2; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double sum1,sum2, sum3; - double dot1,dot2, rtdot2, term; - double inv2ew = 2*g_ewald_6; - inv2ew = 1.0/inv2ew; - double rtpi = sqrt(MY_PI); - - int nbx = 2; - int nby = 2; - int nbz = 2; - - n = 0; - for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { - mper = m - nz_pppm_6*(2*m/nz_pppm_6); - - for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { - lper = l - ny_pppm_6*(2*l/ny_pppm_6); - - for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { - kper = k - nx_pppm_6*(2*k/nx_pppm_6); - - sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + - pow(unitkz*mper,2.0); - - if (sqk != 0.0) { - sum1 = 0.0; - sum2 = 0.0; - sum3 = 0.0; - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm_6*nx); - sx = exp(-qx*qx*inv2ew*inv2ew); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm_6; - if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm_6*ny); - sy = exp(-qy*qy*inv2ew*inv2ew); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm_6; - if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm_6*nz); - sz = exp(-qz*qz*inv2ew*inv2ew); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm_6; - if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); - - dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; - dot2 = qx*qx+qy*qy+qz*qz; - rtdot2 = sqrt(dot2); - term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz + - 2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew); - term *= g_ewald_6*g_ewald_6*g_ewald_6; - u2 = pow(wx*wy*wz,2.0); - sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2; - sum2 += -u2*term*MY_PI*rtpi/3.0*dot1; - sum3 += u2; - } - } - } - sum2 *= sum2; - sum3 *= sum3*sqk; - qopt += sum1 -sum2/sum3; - } - } - } - } - return qopt; -} - -/* ---------------------------------------------------------------------- - Compute qopt for the ad differentiation scheme and Dispersion interaction -------------------------------------------------------------------------- */ - -double PPPMDisp::compute_qopt_6_ad() -{ - double qopt = 0.0; - int k,l,m; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int nx,ny,nz,kper,lper,mper; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double u2, sqk; - double sum1,sum2,sum3,sum4; - double dot2, rtdot2, term; - double inv2ew = 2*g_ewald_6; - inv2ew = 1/inv2ew; - double rtpi = sqrt(MY_PI); - - int nbx = 2; - int nby = 2; - int nbz = 2; - - for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { - mper = m - nz_pppm_6*(2*m/nz_pppm_6); - - for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { - lper = l - ny_pppm_6*(2*l/ny_pppm_6); - - for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { - kper = k - nx_pppm_6*(2*k/nx_pppm_6); - - sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + - pow(unitkz*mper,2.0); - - if (sqk != 0.0) { - - sum1 = 0.0; - sum2 = 0.0; - sum3 = 0.0; - sum4 = 0.0; - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm_6*nx); - sx = exp(-qx*qx*inv2ew*inv2ew); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm_6; - if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm_6*ny); - sy = exp(-qy*qy*inv2ew*inv2ew); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm_6; - if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm_6*nz); - sz = exp(-qz*qz*inv2ew*inv2ew); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm_6; - if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); - - dot2 = qx*qx+qy*qy+qz*qz; - rtdot2 = sqrt(dot2); - term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz + - 2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew); - term *= g_ewald_6*g_ewald_6*g_ewald_6; - u2 = pow(wx*wy*wz,2.0); - sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2; - sum2 += -term*MY_PI*rtpi/3.0 * u2 * dot2; - sum3 += u2; - sum4 += dot2*u2; - } - } - } - sum2 *= sum2; - qopt += sum1 - sum2/(sum3*sum4); - } - } - } - } - return qopt; -} - -/* ---------------------------------------------------------------------- - set size of FFT grid and g_ewald_6 - for Dispersion interactions -------------------------------------------------------------------------- */ - -void PPPMDisp::set_grid_6() -{ - // Calculate csum - if (!csumflag) calc_csum(); - if (!gewaldflag_6) set_init_g6(); - if (!gridflag_6) set_n_pppm_6(); - while (!factorable(nx_pppm_6)) nx_pppm_6++; - while (!factorable(ny_pppm_6)) ny_pppm_6++; - while (!factorable(nz_pppm_6)) nz_pppm_6++; - -} - -/* ---------------------------------------------------------------------- - Calculate the sum of the squared dispersion coefficients and other - related quantities required for the calculations -------------------------------------------------------------------------- */ - -void PPPMDisp::calc_csum() -{ - csumij = 0.0; - csum = 0.0; - - int ntypes = atom->ntypes; - int i,j,k; - - delete [] cii; - cii = new double[ntypes +1]; - for (i = 0; i<=ntypes; i++) cii[i] = 0.0; - delete [] csumi; - csumi = new double[ntypes +1]; - for (i = 0; i<=ntypes; i++) csumi[i] = 0.0; - int *neach = new int[ntypes+1]; - for (i = 0; i<=ntypes; i++) neach[i] = 0; - - //the following variables are needed to distinguish between arithmetic - // and geometric mixing - - double mix1; // scales 20/16 to 4 - int mix2; // shifts the value to the sigma^3 value - int mix3; // shifts the value to the right atom type - if (function[1]) { - for (i = 1; i <= ntypes; i++) - cii[i] = B[i]*B[i]; - int tmp; - for (i = 0; i < atom->nlocal; i++) { - tmp = atom->type[i]; - neach[tmp]++; - csum += B[tmp]*B[tmp]; - } - } - if (function[2]) { - for (i = 1; i <= ntypes; i++) - cii[i] = 64.0/20.0*B[7*i+3]*B[7*i+3]; - int tmp; - for (i = 0; i < atom->nlocal; i++) { - tmp = atom->type[i]; - neach[tmp]++; - csum += 64.0/20.0*B[7*tmp+3]*B[7*tmp+3]; - } - } - if (function[3]) { - for (i = 1; i <= ntypes; i++) - for (j = 0; j < nsplit; j++) - cii[i] += B[j]*B[nsplit*i + j]*B[nsplit*i + j]; - int tmp; - for (i = 0; i < atom->nlocal; i++) { - tmp = atom->type[i]; - neach[tmp]++; - for (j = 0; j < nsplit; j++) - csum += B[j]*B[nsplit*tmp + j]*B[nsplit*tmp + j]; - } - } - - - double tmp2; - MPI_Allreduce(&csum,&tmp2,1,MPI_DOUBLE,MPI_SUM,world); - csum = tmp2; - csumflag = 1; - - int *neach_all = new int[ntypes+1]; - MPI_Allreduce(neach,neach_all,ntypes+1,MPI_INT,MPI_SUM,world); - - // copmute csumij and csumi - double d1, d2; - if (function[1]){ - for (i=1; i<=ntypes; i++) { - for (j=1; j<=ntypes; j++) { - csumi[i] += neach_all[j]*B[i]*B[j]; - d1 = neach_all[i]*B[i]; - d2 = neach_all[j]*B[j]; - csumij += d1*d2; - //csumij += neach_all[i]*neach_all[j]*B[i]*B[j]; - } - } - } - if (function[2]) { - for (i=1; i<=ntypes; i++) { - for (j=1; j<=ntypes; j++) { - for (k=0; k<=6; k++) { - csumi[i] += neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1]; - d1 = neach_all[i]*B[7*i + k]; - d2 = neach_all[j]*B[7*(j+1)-k-1]; - csumij += d1*d2; - //csumij += neach_all[i]*neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1]; - } - } - } - } - if (function[3]) { - for (i=1; i<=ntypes; i++) { - for (j=1; j<=ntypes; j++) { - for (k=0; kall(FLERR, str); - -} - -/* ---------------------------------------------------------------------- - Calculate f(x) for Dispersion interaction - ------------------------------------------------------------------------- */ - -double PPPMDisp::f_6() -{ - double df_rspace, df_kspace; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - bigint natoms = atom->natoms; - - df_rspace = lj_rspace_error(); - - double qopt = compute_qopt_6(); - df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); - - return df_rspace - df_kspace; -} - -/* ---------------------------------------------------------------------- - Calculate numerical derivative f'(x) using forward difference - [f(x + h) - f(x)] / h - ------------------------------------------------------------------------- */ - -double PPPMDisp::derivf_6() -{ - double h = 0.000001; //Derivative step-size - double df,f1,f2,g_ewald_old; - - f1 = f_6(); - g_ewald_old = g_ewald_6; - g_ewald_6 += h; - f2 = f_6(); - g_ewald_6 = g_ewald_old; - df = (f2 - f1)/h; - - return df; -} - - -/* ---------------------------------------------------------------------- - calculate an initial value for g_ewald_6 - ---------------------------------------------------------------------- */ - -void PPPMDisp::set_init_g6() -{ - // use xprd,yprd,zprd even if triclinic so grid size is the same - // adjust z dimension for 2d slab PPPM - // 3d PPPM just uses zprd since slab_volfactor = 1.0 - - // make initial g_ewald estimate - // based on desired error and real space cutoff - - // compute initial value for df_real with g_ewald_6 = 1/cutoff_lj - // if df_real > 0, repeat divide g_ewald_6 by 2 until df_real < 0 - // else, repeat multiply g_ewald_6 by 2 until df_real > 0 - // perform bisection for the last two values of - double df_real; - double g_ewald_old; - double gmin, gmax; - - // check if there is a user defined accuracy - double acc_rspace = accuracy; - if (accuracy_real_6 > 0) acc_rspace = accuracy_real_6; - - g_ewald_6 = 1.0/cutoff_lj; - df_real = lj_rspace_error() - acc_rspace; - int counter = 0; - if (df_real > 0) { - while (df_real > 0 && counter < LARGE) { - counter++; - g_ewald_old = g_ewald_6; - g_ewald_6 *= 2; - df_real = lj_rspace_error() - acc_rspace; - } - } - - if (df_real < 0) { - while (df_real < 0 && counter < LARGE) { - counter++; - g_ewald_old = g_ewald_6; - g_ewald_6 *= 0.5; - df_real = lj_rspace_error() - acc_rspace; - } - } - - if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp"); - - gmin = MIN(g_ewald_6, g_ewald_old); - gmax = MAX(g_ewald_6, g_ewald_old); - g_ewald_6 = gmin + 0.5*(gmax-gmin); - counter = 0; - while (gmax-gmin > SMALL && counter < LARGE) { - counter++; - df_real = lj_rspace_error() -acc_rspace; - if (df_real < 0) gmax = g_ewald_6; - else gmin = g_ewald_6; - g_ewald_6 = gmin + 0.5*(gmax-gmin); - } - if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp"); - -} - -/* ---------------------------------------------------------------------- - calculate nx_pppm, ny_pppm, nz_pppm for dispersion interaction - ---------------------------------------------------------------------- */ - -void PPPMDisp::set_n_pppm_6() -{ - bigint natoms = atom->natoms; - - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - double h, h_x,h_y,h_z; - - double acc_kspace = accuracy; - if (accuracy_kspace_6 > 0.0) acc_kspace = accuracy_kspace_6; - - // initial value for the grid spacing - h = h_x = h_y = h_z = 4.0/g_ewald_6; - // decrease grid spacing untill required precision is obtained - int count = 0; - while(1) { - - // set grid dimension - nx_pppm_6 = static_cast (xprd/h_x); - ny_pppm_6 = static_cast (yprd/h_y); - nz_pppm_6 = static_cast (zprd_slab/h_z); - - if (nx_pppm_6 <= 1) nx_pppm_6 = 2; - if (ny_pppm_6 <= 1) ny_pppm_6 = 2; - if (nz_pppm_6 <= 1) nz_pppm_6 = 2; - - //set local grid dimension - int npey_fft,npez_fft; - if (nz_pppm_6 >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm_6,nz_pppm_6,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft_6 = 0; - nxhi_fft_6 = nx_pppm_6 - 1; - nylo_fft_6 = me_y*ny_pppm_6/npey_fft; - nyhi_fft_6 = (me_y+1)*ny_pppm_6/npey_fft - 1; - nzlo_fft_6 = me_z*nz_pppm_6/npez_fft; - nzhi_fft_6 = (me_z+1)*nz_pppm_6/npez_fft - 1; - - double qopt = compute_qopt_6(); - - double df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); - - count++; - - // break loop if the accuracy has been reached or too many loops have been performed - if (df_kspace <= acc_kspace) break; - if (count > 500) error->all(FLERR, "Could not compute grid size for Dispersion"); - h *= 0.95; - h_x = h_y = h_z = h; - } -} - -/* ---------------------------------------------------------------------- - calculate the real space error for dispersion interactions - ---------------------------------------------------------------------- */ - -double PPPMDisp::lj_rspace_error() -{ - bigint natoms = atom->natoms; - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - double deltaf; - double rgs = (cutoff_lj*g_ewald_6); - rgs *= rgs; - double rgs_inv = 1.0/rgs; - deltaf = csum/sqrt(natoms*xprd*yprd*zprd_slab*cutoff_lj)*sqrt(MY_PI)*pow(g_ewald_6, 5)* - exp(-rgs)*(1+rgs_inv*(3+rgs_inv*(6+rgs_inv*6))); - return deltaf; -} - - -/* ---------------------------------------------------------------------- - Compyute the modified (hockney-eastwood) coulomb green function - ---------------------------------------------------------------------- */ - -void PPPMDisp::compute_gf() -{ - int k,l,m,n; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int kper,lper,mper; - double snx,sny,snz,snx2,sny2,snz2; - double sqk; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double numerator,denominator; - - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - qz = unitkz*mper; - snz = sin(0.5*qz*zprd_slab/nz_pppm); - snz2 = snz*snz; - sz = exp(-0.25*pow(qz/g_ewald,2.0)); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm; - if (argz != 0.0) wz = pow(sin(argz)/argz,order); - wz *= wz; - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - qy = unitky*lper; - sny = sin(0.5*qy*yprd/ny_pppm); - sny2 = sny*sny; - sy = exp(-0.25*pow(qy/g_ewald,2.0)); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm; - if (argy != 0.0) wy = pow(sin(argy)/argy,order); - wy *= wy; - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - qx = unitkx*kper; - snx = sin(0.5*qx*xprd/nx_pppm); - snx2 = snx*snx; - sx = exp(-0.25*pow(qx/g_ewald,2.0)); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm; - if (argx != 0.0) wx = pow(sin(argx)/argx,order); - wx *= wx; - - sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); - - if (sqk != 0.0) { - numerator = 4.0*MY_PI/sqk; - denominator = gf_denom(snx2,sny2,snz2, gf_b, order); - greensfn[n++] = numerator*sx*sy*sz*wx*wy*wz/denominator; - } else greensfn[n++] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - compute self force coefficients for ad-differentiation scheme - and Coulomb interaction -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_sf_precoeff(int nxp, int nyp, int nzp, int ord, - int nxlo_ft, int nylo_ft, int nzlo_ft, - int nxhi_ft, int nyhi_ft, int nzhi_ft, - double *sf_pre1, double *sf_pre2, double *sf_pre3, - double *sf_pre4, double *sf_pre5, double *sf_pre6) -{ - - int i,k,l,m,n; - double *prd; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int nx,ny,nz,kper,lper,mper; - double argx,argy,argz; - double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5]; - double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2; - double u0,u1,u2,u3,u4,u5,u6; - double sum1,sum2,sum3,sum4,sum5,sum6; - - int nb = 2; - - n = 0; - for (m = nzlo_ft; m <= nzhi_ft; m++) { - mper = m - nzp*(2*m/nzp); - - for (l = nylo_ft; l <= nyhi_ft; l++) { - lper = l - nyp*(2*l/nyp); - - for (k = nxlo_ft; k <= nxhi_ft; k++) { - kper = k - nxp*(2*k/nxp); - - sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0; - for (i = -nb; i <= nb; i++) { - - qx0 = unitkx*(kper+nxp*i); - qx1 = unitkx*(kper+nxp*(i+1)); - qx2 = unitkx*(kper+nxp*(i+2)); - wx0[i+2] = 1.0; - wx1[i+2] = 1.0; - wx2[i+2] = 1.0; - argx = 0.5*qx0*xprd/nxp; - if (argx != 0.0) wx0[i+2] = pow(sin(argx)/argx,ord); - argx = 0.5*qx1*xprd/nxp; - if (argx != 0.0) wx1[i+2] = pow(sin(argx)/argx,ord); - argx = 0.5*qx2*xprd/nxp; - if (argx != 0.0) wx2[i+2] = pow(sin(argx)/argx,ord); - - qy0 = unitky*(lper+nyp*i); - qy1 = unitky*(lper+nyp*(i+1)); - qy2 = unitky*(lper+nyp*(i+2)); - wy0[i+2] = 1.0; - wy1[i+2] = 1.0; - wy2[i+2] = 1.0; - argy = 0.5*qy0*yprd/nyp; - if (argy != 0.0) wy0[i+2] = pow(sin(argy)/argy,ord); - argy = 0.5*qy1*yprd/nyp; - if (argy != 0.0) wy1[i+2] = pow(sin(argy)/argy,ord); - argy = 0.5*qy2*yprd/nyp; - if (argy != 0.0) wy2[i+2] = pow(sin(argy)/argy,ord); - - qz0 = unitkz*(mper+nzp*i); - qz1 = unitkz*(mper+nzp*(i+1)); - qz2 = unitkz*(mper+nzp*(i+2)); - wz0[i+2] = 1.0; - wz1[i+2] = 1.0; - wz2[i+2] = 1.0; - argz = 0.5*qz0*zprd_slab/nzp; - if (argz != 0.0) wz0[i+2] = pow(sin(argz)/argz,ord); - argz = 0.5*qz1*zprd_slab/nzp; - if (argz != 0.0) wz1[i+2] = pow(sin(argz)/argz,ord); - argz = 0.5*qz2*zprd_slab/nzp; - if (argz != 0.0) wz2[i+2] = pow(sin(argz)/argz,ord); - } - - for (nx = 0; nx <= 4; nx++) { - for (ny = 0; ny <= 4; ny++) { - for (nz = 0; nz <= 4; nz++) { - u0 = wx0[nx]*wy0[ny]*wz0[nz]; - u1 = wx1[nx]*wy0[ny]*wz0[nz]; - u2 = wx2[nx]*wy0[ny]*wz0[nz]; - u3 = wx0[nx]*wy1[ny]*wz0[nz]; - u4 = wx0[nx]*wy2[ny]*wz0[nz]; - u5 = wx0[nx]*wy0[ny]*wz1[nz]; - u6 = wx0[nx]*wy0[ny]*wz2[nz]; - - sum1 += u0*u1; - sum2 += u0*u2; - sum3 += u0*u3; - sum4 += u0*u4; - sum5 += u0*u5; - sum6 += u0*u6; - } - } - } - - // store values - - sf_pre1[n] = sum1; - sf_pre2[n] = sum2; - sf_pre3[n] = sum3; - sf_pre4[n] = sum4; - sf_pre5[n] = sum5; - sf_pre6[n++] = sum6; - } - } - } -} - -/* ---------------------------------------------------------------------- - Compute the modified (hockney-eastwood) dispersion green function - ---------------------------------------------------------------------- */ - -void PPPMDisp::compute_gf_6() -{ - double *prd; - int k,l,m,n; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - int kper,lper,mper; - double sqk; - double snx,sny,snz,snx2,sny2,snz2; - double argx,argy,argz,wx,wy,wz,sx,sy,sz; - double qx,qy,qz; - double rtsqk, term; - double numerator,denominator; - double inv2ew = 2*g_ewald_6; - inv2ew = 1/inv2ew; - double rtpi = sqrt(MY_PI); - - numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0); - - n = 0; - for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { - mper = m - nz_pppm_6*(2*m/nz_pppm_6); - qz = unitkz*mper; - snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6); - snz2 = snz*snz; - sz = exp(-qz*qz*inv2ew*inv2ew); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm_6; - if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); - wz *= wz; - - for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { - lper = l - ny_pppm_6*(2*l/ny_pppm_6); - qy = unitky*lper; - sny = sin(0.5*unitky*lper*yprd/ny_pppm_6); - sny2 = sny*sny; - sy = exp(-qy*qy*inv2ew*inv2ew); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm_6; - if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); - wy *= wy; - - for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { - kper = k - nx_pppm_6*(2*k/nx_pppm_6); - qx = unitkx*kper; - snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6); - snx2 = snx*snx; - sx = exp(-qx*qx*inv2ew*inv2ew); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm_6; - if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); - wx *= wx; - - sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); - - if (sqk != 0.0) { - denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6); - rtsqk = sqrt(sqk); - term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz + - 2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew); - greensfn_6[n++] = numerator*term*wx*wy*wz/denominator; - } else greensfn_6[n++] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - compute self force coefficients for ad-differentiation scheme - and Coulomb interaction -------------------------------------------------------------------------- */ -void PPPMDisp::compute_sf_coeff() -{ - int i,k,l,m,n; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - for (i = 0; i <= 5; i++) sf_coeff[i] = 0.0; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - for (l = nylo_fft; l <= nyhi_fft; l++) { - for (k = nxlo_fft; k <= nxhi_fft; k++) { - sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; - sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; - sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; - sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; - sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; - sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; - ++n; - } - } - } - - // Compute the coefficients for the self-force correction - - double prex, prey, prez; - prex = prey = prez = MY_PI/volume; - prex *= nx_pppm/xprd; - prey *= ny_pppm/yprd; - prez *= nz_pppm/zprd_slab; - sf_coeff[0] *= prex; - sf_coeff[1] *= prex*2; - sf_coeff[2] *= prey; - sf_coeff[3] *= prey*2; - sf_coeff[4] *= prez; - sf_coeff[5] *= prez*2; - - // communicate values with other procs - - double tmp[6]; - MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world); - for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n]; -} - -/* ---------------------------------------------------------------------- - compute self force coefficients for ad-differentiation scheme - and Dispersion interaction -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_sf_coeff_6() -{ - int i,k,l,m,n; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - for (i = 0; i <= 5; i++) sf_coeff_6[i] = 0.0; - - n = 0; - for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { - for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { - for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { - sf_coeff_6[0] += sf_precoeff1_6[n]*greensfn_6[n]; - sf_coeff_6[1] += sf_precoeff2_6[n]*greensfn_6[n]; - sf_coeff_6[2] += sf_precoeff3_6[n]*greensfn_6[n]; - sf_coeff_6[3] += sf_precoeff4_6[n]*greensfn_6[n]; - sf_coeff_6[4] += sf_precoeff5_6[n]*greensfn_6[n]; - sf_coeff_6[5] += sf_precoeff6_6[n]*greensfn_6[n]; - ++n; - } - } - } - - - // perform multiplication with prefactors - - double prex, prey, prez; - prex = prey = prez = MY_PI/volume; - prex *= nx_pppm_6/xprd; - prey *= ny_pppm_6/yprd; - prez *= nz_pppm_6/zprd_slab; - sf_coeff_6[0] *= prex; - sf_coeff_6[1] *= prex*2; - sf_coeff_6[2] *= prey; - sf_coeff_6[3] *= prey*2; - sf_coeff_6[4] *= prez; - sf_coeff_6[5] *= prez*2; - - // communicate values with other procs - - double tmp[6]; - MPI_Allreduce(sf_coeff_6,tmp,6,MPI_DOUBLE,MPI_SUM,world); - for (n = 0; n < 6; n++) sf_coeff_6[n] = tmp[n]; - -} - -/* ---------------------------------------------------------------------- - denominator for Hockney-Eastwood Green's function - of x,y,z = sin(kx*deltax/2), etc - - inf n-1 - S(n,k) = Sum W(k+pi*j)**2 = Sum b(l)*(z*z)**l - j=-inf l=0 - - = -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x) at z = sin(x) - gf_b = denominator expansion coeffs -------------------------------------------------------------------------- */ - -double PPPMDisp::gf_denom(double x, double y, double z, double *g_b, int ord) -{ - double sx,sy,sz; - sz = sy = sx = 0.0; - for (int l = ord-1; l >= 0; l--) { - sx = g_b[l] + sx*x; - sy = g_b[l] + sy*y; - sz = g_b[l] + sz*z; - } - double s = sx*sy*sz; - return s*s; -} - -/* ---------------------------------------------------------------------- - pre-compute Green's function denominator expansion coeffs, Gamma(2n) -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_gf_denom(double* gf, int ord) -{ - int k,l,m; - - for (l = 1; l < ord; l++) gf[l] = 0.0; - gf[0] = 1.0; - - for (m = 1; m < ord; m++) { - for (l = m; l > 0; l--) - gf[l] = 4.0 * (gf[l]*(l-m)*(l-m-0.5)-gf[l-1]*(l-m-1)*(l-m-1)); - gf[0] = 4.0 * (gf[0]*(l-m)*(l-m-0.5)); - } - - bigint ifact = 1; - for (k = 1; k < 2*ord; k++) ifact *= k; - double gaminv = 1.0/ifact; - for (l = 0; l < ord; l++) gf[l] *= gaminv; -} - -/* ---------------------------------------------------------------------- - ghost-swap to accumulate full density in brick decomposition - remap density from 3d brick decomposition to FFTdecomposition - for coulomb interaction or dispersion interaction with geometric - mixing -------------------------------------------------------------------------- */ - -void PPPMDisp::brick2fft(int nxlo_i, int nylo_i, int nzlo_i, - int nxhi_i, int nyhi_i, int nzhi_i, - FFT_SCALAR*** dbrick, FFT_SCALAR* dfft, FFT_SCALAR* work, - LAMMPS_NS::Remap* rmp) -{ - int n,ix,iy,iz; - - // copy grabs inner portion of density from 3d brick - // remap could be done as pre-stage of FFT, - // but this works optimally on only double values, not complex values - - n = 0; - for (iz = nzlo_i; iz <= nzhi_i; iz++) - for (iy = nylo_i; iy <= nyhi_i; iy++) - for (ix = nxlo_i; ix <= nxhi_i; ix++) - dfft[n++] = dbrick[iz][iy][ix]; - - rmp->perform(dfft,dfft,work); -} - - -/* ---------------------------------------------------------------------- - ghost-swap to accumulate full density in brick decomposition - remap density from 3d brick decomposition to FFTdecomposition - for dispersion with arithmetic mixing rule -------------------------------------------------------------------------- */ - -void PPPMDisp::brick2fft_a() -{ - int n,ix,iy,iz; - - // copy grabs inner portion of density from 3d brick - // remap could be done as pre-stage of FFT, - // but this works optimally on only double values, not complex values - - n = 0; - for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++) - for (iy = nylo_in_6; iy <= nyhi_in_6; iy++) - for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) { - density_fft_a0[n] = density_brick_a0[iz][iy][ix]; - density_fft_a1[n] = density_brick_a1[iz][iy][ix]; - density_fft_a2[n] = density_brick_a2[iz][iy][ix]; - density_fft_a3[n] = density_brick_a3[iz][iy][ix]; - density_fft_a4[n] = density_brick_a4[iz][iy][ix]; - density_fft_a5[n] = density_brick_a5[iz][iy][ix]; - density_fft_a6[n++] = density_brick_a6[iz][iy][ix]; - } - - remap_6->perform(density_fft_a0,density_fft_a0,work1_6); - remap_6->perform(density_fft_a1,density_fft_a1,work1_6); - remap_6->perform(density_fft_a2,density_fft_a2,work1_6); - remap_6->perform(density_fft_a3,density_fft_a3,work1_6); - remap_6->perform(density_fft_a4,density_fft_a4,work1_6); - remap_6->perform(density_fft_a5,density_fft_a5,work1_6); - remap_6->perform(density_fft_a6,density_fft_a6,work1_6); - -} - -/* ---------------------------------------------------------------------- - ghost-swap to accumulate full density in brick decomposition - remap density from 3d brick decomposition to FFTdecomposition - for dispersion with special case -------------------------------------------------------------------------- */ - -void PPPMDisp::brick2fft_none() -{ - int k,n,ix,iy,iz; - - // copy grabs inner portion of density from 3d brick - // remap could be done as pre-stage of FFT, - // but this works optimally on only double values, not complex values - - for (k = 0; kperform(density_fft_none[k],density_fft_none[k],work1_6); -} - -/* ---------------------------------------------------------------------- - find center grid pt for each of my particles - check that full stencil for the particle will fit in my 3d brick - store central grid pt indices in part2grid array -------------------------------------------------------------------------- */ - -void PPPMDisp::particle_map(double delx, double dely, double delz, - double sft, int** p2g, int nup, int nlow, - int nxlo, int nylo, int nzlo, - int nxhi, int nyhi, int nzhi) -{ - int nx,ny,nz; - - double **x = atom->x; - int nlocal = atom->nlocal; - - int flag = 0; - for (int i = 0; i < nlocal; i++) { - - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // current particle coord can be outside global and local box - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - nx = static_cast ((x[i][0]-boxlo[0])*delx+sft) - OFFSET; - ny = static_cast ((x[i][1]-boxlo[1])*dely+sft) - OFFSET; - nz = static_cast ((x[i][2]-boxlo[2])*delz+sft) - OFFSET; - - p2g[i][0] = nx; - p2g[i][1] = ny; - p2g[i][2] = nz; - - // check that entire stencil around nx,ny,nz will fit in my 3d brick - - if (nx+nlow < nxlo || nx+nup > nxhi || - ny+nlow < nylo || ny+nup > nyhi || - nz+nlow < nzlo || nz+nup > nzhi) - flag = 1; - } - - if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPMDisp"); -} - - -void PPPMDisp::particle_map_c(double delx, double dely, double delz, - double sft, int** p2g, int nup, int nlow, - int nxlo, int nylo, int nzlo, - int nxhi, int nyhi, int nzhi) -{ - particle_map(delx, dely, delz, sft, p2g, nup, nlow, - nxlo, nylo, nzlo, nxhi, nyhi, nzhi); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid -------------------------------------------------------------------------- */ - -void PPPMDisp::make_rho_c() -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density array - - memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - density_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = dispersion "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid --- geometric mixing -------------------------------------------------------------------------- */ - -void PPPMDisp::make_rho_g() -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density array - - memset(&(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - int type; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - type = atom->type[i]; - z0 = delvolinv_6 * B[type]; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - y0 = z0*rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - x0 = y0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - density_brick_g[mz][my][mx] += x0*rho1d_6[0][l]; - } - } - } - } -} - - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = dispersion "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid --- arithmetic mixing -------------------------------------------------------------------------- */ - -void PPPMDisp::make_rho_a() -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0,w; - - // clear 3d density array - - memset(&(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - memset(&(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - - // loop over my particles, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - int type; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - //do the following for all 4 grids - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - type = atom->type[i]; - z0 = delvolinv_6; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - y0 = z0*rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - x0 = y0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - w = x0*rho1d_6[0][l]; - density_brick_a0[mz][my][mx] += w*B[7*type]; - density_brick_a1[mz][my][mx] += w*B[7*type+1]; - density_brick_a2[mz][my][mx] += w*B[7*type+2]; - density_brick_a3[mz][my][mx] += w*B[7*type+3]; - density_brick_a4[mz][my][mx] += w*B[7*type+4]; - density_brick_a5[mz][my][mx] += w*B[7*type+5]; - density_brick_a6[mz][my][mx] += w*B[7*type+6]; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = dispersion "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid --- case when mixing rules don't apply -------------------------------------------------------------------------- */ - -void PPPMDisp::make_rho_none() -{ - int k,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0,w; - - // clear 3d density array - for (k = 0; k < nsplit_alloc; k++) - memset(&(density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]),0, - ngrid_6*sizeof(FFT_SCALAR)); - - - // loop over my particles, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - int type; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - //do the following for all 4 grids - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - type = atom->type[i]; - z0 = delvolinv_6; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - y0 = z0*rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - x0 = y0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - w = x0*rho1d_6[0][l]; - for (k = 0; k < nsplit; k++) - density_brick_none[k][mz][my][mx] += w*B[nsplit*type + k]; - } - } - } - } -} - - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for ik differentiation -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_ik(FFT_SCALAR* wk1, FFT_SCALAR* wk2, - FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, - int nx_p, int ny_p, int nz_p, int nft, - int nxlo_ft, int nylo_ft, int nzlo_ft, - int nxhi_ft, int nyhi_ft, int nzhi_ft, - int nxlo_i, int nylo_i, int nzlo_i, - int nxhi_i, int nyhi_i, int nzhi_i, - double& egy, double* gfn, - double* kx, double* ky, double* kz, - double* kx2, double* ky2, double* kz2, - FFT_SCALAR*** vx_brick, FFT_SCALAR*** vy_brick, FFT_SCALAR*** vz_brick, - double* vir, double** vcoeff, double** vcoeff2, - FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, - FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) - - -{ - int i,j,k,n; - double eng; - - // transform charge/dispersion density (r -> k) - n = 0; - for (i = 0; i < nft; i++) { - wk1[n++] = dfft[i]; - wk1[n++] = ZEROF; - } - - ft1->compute(wk1,wk1,1); - - // if requested, compute energy and virial contribution - - double scaleinv = 1.0/(nx_p*ny_p*nz_p); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nft; i++) { - eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); - for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j]; - if (eflag_global) egy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nft; i++) { - egy += - s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nft; i++) { - wk1[n++] *= scaleinv * gfn[i]; - wk1[n++] *= scaleinv * gfn[i]; - } - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x & y direction gradient - - n = 0; - for (k = nzlo_ft; k <= nzhi_ft; k++) - for (j = nylo_ft; j <= nyhi_ft; j++) - for (i = nxlo_ft; i <= nxhi_ft; i++) { - wk2[n] = 0.5*(kx[i]-kx2[i])*wk1[n+1] + 0.5*(ky[j]-ky2[j])*wk1[n]; - wk2[n+1] = -0.5*(kx[i]-kx2[i])*wk1[n] + 0.5*(ky[j]-ky2[j])*wk1[n+1]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - vx_brick[k][j][i] = wk2[n++]; - vy_brick[k][j][i] = wk2[n++]; - } - - if (!eflag_atom) { - // z direction gradient only - - n = 0; - for (k = nzlo_ft; k <= nzhi_ft; k++) - for (j = nylo_ft; j <= nyhi_ft; j++) - for (i = nxlo_ft; i <= nxhi_ft; i++) { - wk2[n] = kz[k]*wk1[n+1]; - wk2[n+1] = -kz[k]*wk1[n]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - vz_brick[k][j][i] = wk2[n]; - n += 2; - } - - } - - else { - // z direction gradient & per-atom energy - - n = 0; - for (k = nzlo_ft; k <= nzhi_ft; k++) - for (j = nylo_ft; j <= nyhi_ft; j++) - for (i = nxlo_ft; i <= nxhi_ft; i++) { - wk2[n] = 0.5*(kz[k]-kz2[k])*wk1[n+1] - wk1[n+1]; - wk2[n+1] = -0.5*(kz[k]-kz2[k])*wk1[n] + wk1[n]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - vz_brick[k][j][i] = wk2[n++]; - u_pa[k][j][i] = wk2[n++];; - } - } - - if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft, - nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i, - v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa); -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for ad differentiation -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_ad(FFT_SCALAR* wk1, FFT_SCALAR* wk2, - FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, - int nx_p, int ny_p, int nz_p, int nft, - int nxlo_ft, int nylo_ft, int nzlo_ft, - int nxhi_ft, int nyhi_ft, int nzhi_ft, - int nxlo_i, int nylo_i, int nzlo_i, - int nxhi_i, int nyhi_i, int nzhi_i, - double& egy, double* gfn, - double* vir, double** vcoeff, double** vcoeff2, - FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, - FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) - - -{ - int i,j,k,n; - double eng; - - // transform charge/dispersion density (r -> k) - n = 0; - for (i = 0; i < nft; i++) { - wk1[n++] = dfft[i]; - wk1[n++] = ZEROF; - } - - ft1->compute(wk1,wk1,1); - - // if requested, compute energy and virial contribution - - double scaleinv = 1.0/(nx_p*ny_p*nz_p); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nft; i++) { - eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); - for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j]; - if (eflag_global) egy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nft; i++) { - egy += - s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nft; i++) { - wk1[n++] *= scaleinv * gfn[i]; - wk1[n++] *= scaleinv * gfn[i]; - } - - - n = 0; - for (k = nzlo_ft; k <= nzhi_ft; k++) - for (j = nylo_ft; j <= nyhi_ft; j++) - for (i = nxlo_ft; i <= nxhi_ft; i++) { - wk2[n] = wk1[n]; - wk2[n+1] = wk1[n+1]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - u_pa[k][j][i] = wk2[n++]; - n++; - } - - - if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft, - nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i, - v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa); - -} - -/* ---------------------------------------------------------------------- - Fourier Transform for per atom virial calculations -------------------------------------------------------------------------- */ - -void PPPMDisp:: poisson_peratom(FFT_SCALAR* wk1, FFT_SCALAR* wk2, LAMMPS_NS::FFT3d* ft2, - double** vcoeff, double** vcoeff2, int nft, - int nxlo_i, int nylo_i, int nzlo_i, - int nxhi_i, int nyhi_i, int nzhi_i, - FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, - FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) -{ - //v0 & v1 term - int n, i, j, k; - n = 0; - for (i = 0; i < nft; i++) { - wk2[n] = wk1[n]*vcoeff[i][0] - wk1[n+1]*vcoeff[i][1]; - wk2[n+1] = wk1[n+1]*vcoeff[i][0] + wk1[n]*vcoeff[i][1]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - v0_pa[k][j][i] = wk2[n++]; - v1_pa[k][j][i] = wk2[n++]; - } - - //v2 & v3 term - - n = 0; - for (i = 0; i < nft; i++) { - wk2[n] = wk1[n]*vcoeff[i][2] - wk1[n+1]*vcoeff2[i][0]; - wk2[n+1] = wk1[n+1]*vcoeff[i][2] + wk1[n]*vcoeff2[i][0]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - v2_pa[k][j][i] = wk2[n++]; - v3_pa[k][j][i] = wk2[n++]; - } - - //v4 & v5 term - - n = 0; - for (i = 0; i < nft; i++) { - wk2[n] = wk1[n]*vcoeff2[i][1] - wk1[n+1]*vcoeff2[i][2]; - wk2[n+1] = wk1[n+1]*vcoeff2[i][1] + wk1[n]*vcoeff2[i][2]; - n += 2; - } - - ft2->compute(wk2,wk2,-1); - - n = 0; - for (k = nzlo_i; k <= nzhi_i; k++) - for (j = nylo_i; j <= nyhi_i; j++) - for (i = nxlo_i; i <= nxhi_i; i++) { - v4_pa[k][j][i] = wk2[n++]; - v5_pa[k][j][i] = wk2[n++]; - } - -} - -/* ---------------------------------------------------------------------- - Poisson solver for one mesh with 2 different dispersion densities - for ik scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_2s_ik(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, - FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1, - FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2, - FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, - FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, - FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, - FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) - -{ - int i,j,k,n; - double eng; - - double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); - - // transform charge/dispersion density (r -> k) - // only one tansform required when energies and pressures do not - // need to be calculated - if (eflag_global + vflag_global == 0) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] = dfft_1[i]; - work1_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - } - // two transforms are required when energies and pressures are - // calculated - else { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n] = dfft_1[i]; - work2_6[n++] = ZEROF; - work1_6[n] = ZEROF; - work2_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - fft1_6->compute(work2_6,work2_6,1); - - double s2 = scaleinv*scaleinv; - - if (vflag_global) { - n = 0; - for (i = 0; i < nfft_6; i++) { - eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); - for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; - if (eflag_global)energy_6 += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft_6; i++) { - energy_6 += - 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); - n += 2; - } - } - // unify the two transformed vectors for efficient calculations later - for ( i = 0; i < 2*nfft_6; i++) { - work1_6[i] += work2_6[i]; - } - } - - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] *= scaleinv * greensfn_6[i]; - work1_6[n++] *= scaleinv * greensfn_6[i]; - } - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vxbrick_1[k][j][i] = work2_6[n++]; - vxbrick_2[k][j][i] = work2_6[n++]; - } - - // y direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vybrick_1[k][j][i] = work2_6[n++]; - vybrick_2[k][j][i] = work2_6[n++]; - } - - // z direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vzbrick_1[k][j][i] = work2_6[n++]; - vzbrick_2[k][j][i] = work2_6[n++]; - } - - //Per-atom energy - - if (eflag_atom) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]; - work2_6[n+1] = work1_6[n+1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - u_pa_1[k][j][i] = work2_6[n++]; - u_pa_2[k][j][i] = work2_6[n++]; - } - } - - if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1, - v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2); -} - - -/* ---------------------------------------------------------------------- - Poisson solver for one mesh with 2 different dispersion densities - for ik scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_none_ik(int n1, int n2,FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, - FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1, - FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2, - FFT_SCALAR**** u_pa, FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa, - FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa) -{ - int i,j,k,n; - double eng; - - double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); - - // transform charge/dispersion density (r -> k) - // only one tansform required when energies and pressures do not - // need to be calculated - if (eflag_global + vflag_global == 0) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] = dfft_1[i]; - work1_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - } - - - // two transforms are required when energies and pressures are - // calculated - else { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n] = dfft_1[i]; - work2_6[n++] = ZEROF; - work1_6[n] = ZEROF; - work2_6[n++] = dfft_2[i]; - } - - - fft1_6->compute(work1_6,work1_6,1); - fft1_6->compute(work2_6,work2_6,1); - - double s2 = scaleinv*scaleinv; - - if (vflag_global) { - n = 0; - for (i = 0; i < nfft_6; i++) { - eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); - for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; - if (eflag_global)energy_6 += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft_6; i++) { - energy_6 += - s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); - n += 2; - } - } - // unify the two transformed vectors for efficient calculations later - for ( i = 0; i < 2*nfft_6; i++) { - work1_6[i] += work2_6[i]; - } - } - - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] *= scaleinv * greensfn_6[i]; - work1_6[n++] *= scaleinv * greensfn_6[i]; - } - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vxbrick_1[k][j][i] = B[n1]*work2_6[n++]; - vxbrick_2[k][j][i] = B[n2]*work2_6[n++]; - } - - // y direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vybrick_1[k][j][i] = B[n1]*work2_6[n++]; - vybrick_2[k][j][i] = B[n2]*work2_6[n++]; - } - - // z direction gradient - - n = 0; - for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) - for (j = nylo_fft_6; j <= nyhi_fft_6; j++) - for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { - work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1]; - work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - vzbrick_1[k][j][i] = B[n1]*work2_6[n++]; - vzbrick_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Per-atom energy - - if (eflag_atom) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]; - work2_6[n+1] = work1_6[n+1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - u_pa[n1][k][j][i] = B[n1]*work2_6[n++]; - u_pa[n2][k][j][i] = B[n2]*work2_6[n++]; - } - } - - if (vflag_atom) poisson_none_peratom(n1,n2, - v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1], - v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]); -} - -/* ---------------------------------------------------------------------- - Poisson solver for one mesh with 2 different dispersion densities - for ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_2s_ad(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, - FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, - FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, - FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, - FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) - -{ - int i,j,k,n; - double eng; - - double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); - - // transform charge/dispersion density (r -> k) - // only one tansform required when energies and pressures do not - // need to be calculated - if (eflag_global + vflag_global == 0) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] = dfft_1[i]; - work1_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - } - // two transforms are required when energies and pressures are - // calculated - else { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n] = dfft_1[i]; - work2_6[n++] = ZEROF; - work1_6[n] = ZEROF; - work2_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - fft1_6->compute(work2_6,work2_6,1); - - double s2 = scaleinv*scaleinv; - - if (vflag_global) { - n = 0; - for (i = 0; i < nfft_6; i++) { - eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); - for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; - if (eflag_global)energy_6 += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft_6; i++) { - energy_6 += - 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); - n += 2; - } - } - // unify the two transformed vectors for efficient calculations later - for ( i = 0; i < 2*nfft_6; i++) { - work1_6[i] += work2_6[i]; - } - } - - - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] *= scaleinv * greensfn_6[i]; - work1_6[n++] *= scaleinv * greensfn_6[i]; - } - - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]; - work2_6[n+1] = work1_6[n+1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - u_pa_1[k][j][i] = work2_6[n++]; - u_pa_2[k][j][i] = work2_6[n++]; - } - - if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1, - v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2); -} - -/* ---------------------------------------------------------------------- - Poisson solver for one mesh with 2 different dispersion densities - for ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_none_ad(int n1, int n2, FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, - FFT_SCALAR*** u_pa_1, FFT_SCALAR*** u_pa_2, - FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa, - FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa) -{ - int i,j,k,n; - double eng; - - double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); - - // transform charge/dispersion density (r -> k) - // only one tansform required when energies and pressures do not - // need to be calculated - if (eflag_global + vflag_global == 0) { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] = dfft_1[i]; - work1_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - } - // two transforms are required when energies and pressures are - // calculated - else { - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n] = dfft_1[i]; - work2_6[n++] = ZEROF; - work1_6[n] = ZEROF; - work2_6[n++] = dfft_2[i]; - } - - fft1_6->compute(work1_6,work1_6,1); - fft1_6->compute(work2_6,work2_6,1); - - double s2 = scaleinv*scaleinv; - - if (vflag_global) { - n = 0; - for (i = 0; i < nfft_6; i++) { - eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); - for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; - if (eflag_global)energy_6 += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft_6; i++) { - energy_6 += - s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); - n += 2; - } - } - // unify the two transformed vectors for efficient calculations later - for ( i = 0; i < 2*nfft_6; i++) { - work1_6[i] += work2_6[i]; - } - } - - - n = 0; - for (i = 0; i < nfft_6; i++) { - work1_6[n++] *= scaleinv * greensfn_6[i]; - work1_6[n++] *= scaleinv * greensfn_6[i]; - } - - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]; - work2_6[n+1] = work1_6[n+1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - u_pa_1[k][j][i] = B[n1]*work2_6[n++]; - u_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - if (vflag_atom) poisson_none_peratom(n1,n2, - v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1], - v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]); -} - -/* ---------------------------------------------------------------------- - Fourier Transform for per atom virial calculations -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_2s_peratom(FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, - FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, - FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, - FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) -{ - //Compute first virial term v0 - int n, i, j, k; - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][0]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][0]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v0_pa_1[k][j][i] = work2_6[n++]; - v0_pa_2[k][j][i] = work2_6[n++]; - } - - //Compute second virial term v1 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][1]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v1_pa_1[k][j][i] = work2_6[n++]; - v1_pa_2[k][j][i] = work2_6[n++]; - } - - //Compute third virial term v2 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][2]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][2]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v2_pa_1[k][j][i] = work2_6[n++]; - v2_pa_2[k][j][i] = work2_6[n++]; - } - - //Compute fourth virial term v3 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][0]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][0]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v3_pa_1[k][j][i] = work2_6[n++]; - v3_pa_2[k][j][i] = work2_6[n++]; - } - - //Compute fifth virial term v4 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][1]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v4_pa_1[k][j][i] = work2_6[n++]; - v4_pa_2[k][j][i] = work2_6[n++]; - } - - //Compute last virial term v5 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][2]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][2]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v5_pa_1[k][j][i] = work2_6[n++]; - v5_pa_2[k][j][i] = work2_6[n++]; - } -} - -/* ---------------------------------------------------------------------- - Fourier Transform for per atom virial calculations -------------------------------------------------------------------------- */ - -void PPPMDisp::poisson_none_peratom(int n1, int n2, - FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, - FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, - FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, - FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) -{ - //Compute first virial term v0 - int n, i, j, k; - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][0]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][0]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v0_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v0_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Compute second virial term v1 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][1]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v1_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v1_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Compute third virial term v2 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg_6[i][2]; - work2_6[n+1] = work1_6[n+1]*vg_6[i][2]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v2_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v2_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Compute fourth virial term v3 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][0]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][0]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v3_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v3_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Compute fifth virial term v4 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][1]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][1]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v4_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v4_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } - - //Compute last virial term v5 - - n = 0; - for (i = 0; i < nfft_6; i++) { - work2_6[n] = work1_6[n]*vg2_6[i][2]; - work2_6[n+1] = work1_6[n+1]*vg2_6[i][2]; - n += 2; - } - - fft2_6->compute(work2_6,work2_6,-1); - - n = 0; - for (k = nzlo_in_6; k <= nzhi_in_6; k++) - for (j = nylo_in_6; j <= nyhi_in_6; j++) - for (i = nxlo_in_6; i <= nxhi_in_6; i++) { - v5_pa_1[k][j][i] = B[n1]*work2_6[n++]; - v5_pa_2[k][j][i] = B[n2]*work2_6[n++]; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles - for ik scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_c_ik() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - ekx -= x0*vdx_brick[mz][my][mx]; - eky -= x0*vdy_brick[mz][my][mx]; - ekz -= x0*vdz_brick[mz][my][mx]; - } - } - } - - // convert E-field to force - - const double qfactor = force->qqrd2e * scale * q[i]; - f[i][0] += qfactor*ekx; - f[i][1] += qfactor*eky; - if (slabflag != 2) f[i][2] += qfactor*ekz; - } -} -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles - for ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_c_ad() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz; - FFT_SCALAR ekx,eky,ekz; - double s1,s2,s3; - double sf = 0.0; - - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double hx_inv = nx_pppm/xprd; - double hy_inv = ny_pppm/yprd; - double hz_inv = nz_pppm/zprd_slab; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); - compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; - eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; - ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx]; - } - } - } - ekx *= hx_inv; - eky *= hy_inv; - ekz *= hz_inv; - // convert E-field to force and substract self forces - const double qfactor = force->qqrd2e * scale; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - sf = sf_coeff[0]*sin(2*MY_PI*s1); - sf += sf_coeff[1]*sin(4*MY_PI*s1); - sf *= 2*q[i]*q[i]; - f[i][0] += qfactor*(ekx*q[i] - sf); - - sf = sf_coeff[2]*sin(2*MY_PI*s2); - sf += sf_coeff[3]*sin(4*MY_PI*s2); - sf *= 2*q[i]*q[i]; - f[i][1] += qfactor*(eky*q[i] - sf); - - - sf = sf_coeff[4]*sin(2*MY_PI*s3); - sf += sf_coeff[5]*sin(4*MY_PI*s3); - sf *= 2*q[i]*q[i]; - if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_c_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); - - u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - if (eflag_atom) u_pa += x0*u_brick[mz][my][mx]; - if (vflag_atom) { - v0 += x0*v0_brick[mz][my][mx]; - v1 += x0*v1_brick[mz][my][mx]; - v2 += x0*v2_brick[mz][my][mx]; - v3 += x0*v3_brick[mz][my][mx]; - v4 += x0*v4_brick[mz][my][mx]; - v5 += x0*v5_brick[mz][my][mx]; - } - } - } - } - - // convert E-field to force - - const double qfactor = 0.5*force->qqrd2e * scale * q[i]; - - if (eflag_atom) eatom[i] += u_pa*qfactor; - if (vflag_atom) { - vatom[i][0] += v0*qfactor; - vatom[i][1] += v1*qfactor; - vatom[i][2] += v2*qfactor; - vatom[i][3] += v3*qfactor; - vatom[i][4] += v4*qfactor; - vatom[i][5] += v5*qfactor; - } - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for geometric mixing rule -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_g_ik() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - - ekx = eky = ekz = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - ekx -= x0*vdx_brick_g[mz][my][mx]; - eky -= x0*vdy_brick_g[mz][my][mx]; - ekz -= x0*vdz_brick_g[mz][my][mx]; - } - } - } - - // convert E-field to force - type = atom->type[i]; - lj = B[type]; - f[i][0] += lj*ekx; - f[i][1] += lj*eky; - if (slabflag != 2) f[i][2] += lj*ekz; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for geometric mixing rule for ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_g_ad() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz; - FFT_SCALAR ekx,eky,ekz; - double s1,s2,s3; - double sf = 0.0; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double hx_inv = nx_pppm_6/xprd; - double hy_inv = ny_pppm_6/yprd; - double hz_inv = nz_pppm_6/zprd_slab; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj; - - int nlocal = atom->nlocal; - - - for (i = 0; i < nlocal; i++) { - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); - - - ekx = eky = ekz = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - ekx += drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx]; - eky += rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx]; - ekz += rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]*u_brick_g[mz][my][mx]; - } - } - } - ekx *= hx_inv; - eky *= hy_inv; - ekz *= hz_inv; - - // convert E-field to force - type = atom->type[i]; - lj = B[type]; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - - sf = sf_coeff_6[0]*sin(2*MY_PI*s1); - sf += sf_coeff_6[1]*sin(4*MY_PI*s1); - sf *= 2*lj*lj; - f[i][0] += ekx*lj - sf; - - sf = sf_coeff_6[2]*sin(2*MY_PI*s2); - sf += sf_coeff_6[3]*sin(4*MY_PI*s2); - sf *= 2*lj*lj; - f[i][1] += eky*lj - sf; - - - sf = sf_coeff_6[4]*sin(2*MY_PI*s3); - sf += sf_coeff_6[5]*sin(4*MY_PI*s3); - sf *= 2*lj*lj; - if (slabflag != 2) f[i][2] += ekz*lj - sf; - - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for geometric mixing rule for per atom quantities -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_g_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - int type; - double lj; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - - u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - if (eflag_atom) u_pa += x0*u_brick_g[mz][my][mx]; - if (vflag_atom) { - v0 += x0*v0_brick_g[mz][my][mx]; - v1 += x0*v1_brick_g[mz][my][mx]; - v2 += x0*v2_brick_g[mz][my][mx]; - v3 += x0*v3_brick_g[mz][my][mx]; - v4 += x0*v4_brick_g[mz][my][mx]; - v5 += x0*v5_brick_g[mz][my][mx]; - } - } - } - } - - // convert E-field to force - type = atom->type[i]; - lj = B[type]*0.5; - - if (eflag_atom) eatom[i] += u_pa*lj; - if (vflag_atom) { - vatom[i][0] += v0*lj; - vatom[i][1] += v1*lj; - vatom[i][2] += v2*lj; - vatom[i][3] += v3*lj; - vatom[i][4] += v4*lj; - vatom[i][5] += v5*lj; - } - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule and ik scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_a_ik() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; - FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; - FFT_SCALAR ekx6, eky6, ekz6; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj0, lj1, lj2, lj3, lj4, lj5, lj6; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - ekx0 = eky0 = ekz0 = ZEROF; - ekx1 = eky1 = ekz1 = ZEROF; - ekx2 = eky2 = ekz2 = ZEROF; - ekx3 = eky3 = ekz3 = ZEROF; - ekx4 = eky4 = ekz4 = ZEROF; - ekx5 = eky5 = ekz5 = ZEROF; - ekx6 = eky6 = ekz6 = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - ekx0 -= x0*vdx_brick_a0[mz][my][mx]; - eky0 -= x0*vdy_brick_a0[mz][my][mx]; - ekz0 -= x0*vdz_brick_a0[mz][my][mx]; - ekx1 -= x0*vdx_brick_a1[mz][my][mx]; - eky1 -= x0*vdy_brick_a1[mz][my][mx]; - ekz1 -= x0*vdz_brick_a1[mz][my][mx]; - ekx2 -= x0*vdx_brick_a2[mz][my][mx]; - eky2 -= x0*vdy_brick_a2[mz][my][mx]; - ekz2 -= x0*vdz_brick_a2[mz][my][mx]; - ekx3 -= x0*vdx_brick_a3[mz][my][mx]; - eky3 -= x0*vdy_brick_a3[mz][my][mx]; - ekz3 -= x0*vdz_brick_a3[mz][my][mx]; - ekx4 -= x0*vdx_brick_a4[mz][my][mx]; - eky4 -= x0*vdy_brick_a4[mz][my][mx]; - ekz4 -= x0*vdz_brick_a4[mz][my][mx]; - ekx5 -= x0*vdx_brick_a5[mz][my][mx]; - eky5 -= x0*vdy_brick_a5[mz][my][mx]; - ekz5 -= x0*vdz_brick_a5[mz][my][mx]; - ekx6 -= x0*vdx_brick_a6[mz][my][mx]; - eky6 -= x0*vdy_brick_a6[mz][my][mx]; - ekz6 -= x0*vdz_brick_a6[mz][my][mx]; - } - } - } - // convert D-field to force - type = atom->type[i]; - lj0 = B[7*type+6]; - lj1 = B[7*type+5]; - lj2 = B[7*type+4]; - lj3 = B[7*type+3]; - lj4 = B[7*type+2]; - lj5 = B[7*type+1]; - lj6 = B[7*type]; - f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6; - f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6; - if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule for the ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_a_ad() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; - FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; - FFT_SCALAR ekx6, eky6, ekz6; - - double s1,s2,s3; - double sf = 0.0; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double hx_inv = nx_pppm_6/xprd; - double hy_inv = ny_pppm_6/yprd; - double hz_inv = nz_pppm_6/zprd_slab; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj0, lj1, lj2, lj3, lj4, lj5, lj6; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); - - ekx0 = eky0 = ekz0 = ZEROF; - ekx1 = eky1 = ekz1 = ZEROF; - ekx2 = eky2 = ekz2 = ZEROF; - ekx3 = eky3 = ekz3 = ZEROF; - ekx4 = eky4 = ekz4 = ZEROF; - ekx5 = eky5 = ekz5 = ZEROF; - ekx6 = eky6 = ekz6 = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]; - y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]; - z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]; - - ekx0 += x0*u_brick_a0[mz][my][mx]; - eky0 += y0*u_brick_a0[mz][my][mx]; - ekz0 += z0*u_brick_a0[mz][my][mx]; - - ekx1 += x0*u_brick_a1[mz][my][mx]; - eky1 += y0*u_brick_a1[mz][my][mx]; - ekz1 += z0*u_brick_a1[mz][my][mx]; - - ekx2 += x0*u_brick_a2[mz][my][mx]; - eky2 += y0*u_brick_a2[mz][my][mx]; - ekz2 += z0*u_brick_a2[mz][my][mx]; - - ekx3 += x0*u_brick_a3[mz][my][mx]; - eky3 += y0*u_brick_a3[mz][my][mx]; - ekz3 += z0*u_brick_a3[mz][my][mx]; - - ekx4 += x0*u_brick_a4[mz][my][mx]; - eky4 += y0*u_brick_a4[mz][my][mx]; - ekz4 += z0*u_brick_a4[mz][my][mx]; - - ekx5 += x0*u_brick_a5[mz][my][mx]; - eky5 += y0*u_brick_a5[mz][my][mx]; - ekz5 += z0*u_brick_a5[mz][my][mx]; - - ekx6 += x0*u_brick_a6[mz][my][mx]; - eky6 += y0*u_brick_a6[mz][my][mx]; - ekz6 += z0*u_brick_a6[mz][my][mx]; - } - } - } - - ekx0 *= hx_inv; - eky0 *= hy_inv; - ekz0 *= hz_inv; - - ekx1 *= hx_inv; - eky1 *= hy_inv; - ekz1 *= hz_inv; - - ekx2 *= hx_inv; - eky2 *= hy_inv; - ekz2 *= hz_inv; - - ekx3 *= hx_inv; - eky3 *= hy_inv; - ekz3 *= hz_inv; - - ekx4 *= hx_inv; - eky4 *= hy_inv; - ekz4 *= hz_inv; - - ekx5 *= hx_inv; - eky5 *= hy_inv; - ekz5 *= hz_inv; - - ekx6 *= hx_inv; - eky6 *= hy_inv; - ekz6 *= hz_inv; - - // convert D-field to force - type = atom->type[i]; - lj0 = B[7*type+6]; - lj1 = B[7*type+5]; - lj2 = B[7*type+4]; - lj3 = B[7*type+3]; - lj4 = B[7*type+2]; - lj5 = B[7*type+1]; - lj6 = B[7*type]; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - - sf = sf_coeff_6[0]*sin(2*MY_PI*s1); - sf += sf_coeff_6[1]*sin(4*MY_PI*s1); - sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; - f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf; - - sf = sf_coeff_6[2]*sin(2*MY_PI*s2); - sf += sf_coeff_6[3]*sin(4*MY_PI*s2); - sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; - f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf; - - sf = sf_coeff_6[4]*sin(2*MY_PI*s3); - sf += sf_coeff_6[5]*sin(4*MY_PI*s3); - sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; - if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule for per atom quantities -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_a_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u_pa0,v00,v10,v20,v30,v40,v50; - FFT_SCALAR u_pa1,v01,v11,v21,v31,v41,v51; - FFT_SCALAR u_pa2,v02,v12,v22,v32,v42,v52; - FFT_SCALAR u_pa3,v03,v13,v23,v33,v43,v53; - FFT_SCALAR u_pa4,v04,v14,v24,v34,v44,v54; - FFT_SCALAR u_pa5,v05,v15,v25,v35,v45,v55; - FFT_SCALAR u_pa6,v06,v16,v26,v36,v46,v56; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - int type; - double lj0, lj1, lj2, lj3, lj4, lj5, lj6; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - - u_pa0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF; - u_pa1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF; - u_pa2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF; - u_pa3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF; - u_pa4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF; - u_pa5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF; - u_pa6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - if (eflag_atom) { - u_pa0 += x0*u_brick_a0[mz][my][mx]; - u_pa1 += x0*u_brick_a1[mz][my][mx]; - u_pa2 += x0*u_brick_a2[mz][my][mx]; - u_pa3 += x0*u_brick_a3[mz][my][mx]; - u_pa4 += x0*u_brick_a4[mz][my][mx]; - u_pa5 += x0*u_brick_a5[mz][my][mx]; - u_pa6 += x0*u_brick_a6[mz][my][mx]; - } - if (vflag_atom) { - v00 += x0*v0_brick_a0[mz][my][mx]; - v10 += x0*v1_brick_a0[mz][my][mx]; - v20 += x0*v2_brick_a0[mz][my][mx]; - v30 += x0*v3_brick_a0[mz][my][mx]; - v40 += x0*v4_brick_a0[mz][my][mx]; - v50 += x0*v5_brick_a0[mz][my][mx]; - v01 += x0*v0_brick_a1[mz][my][mx]; - v11 += x0*v1_brick_a1[mz][my][mx]; - v21 += x0*v2_brick_a1[mz][my][mx]; - v31 += x0*v3_brick_a1[mz][my][mx]; - v41 += x0*v4_brick_a1[mz][my][mx]; - v51 += x0*v5_brick_a1[mz][my][mx]; - v02 += x0*v0_brick_a2[mz][my][mx]; - v12 += x0*v1_brick_a2[mz][my][mx]; - v22 += x0*v2_brick_a2[mz][my][mx]; - v32 += x0*v3_brick_a2[mz][my][mx]; - v42 += x0*v4_brick_a2[mz][my][mx]; - v52 += x0*v5_brick_a2[mz][my][mx]; - v03 += x0*v0_brick_a3[mz][my][mx]; - v13 += x0*v1_brick_a3[mz][my][mx]; - v23 += x0*v2_brick_a3[mz][my][mx]; - v33 += x0*v3_brick_a3[mz][my][mx]; - v43 += x0*v4_brick_a3[mz][my][mx]; - v53 += x0*v5_brick_a3[mz][my][mx]; - v04 += x0*v0_brick_a4[mz][my][mx]; - v14 += x0*v1_brick_a4[mz][my][mx]; - v24 += x0*v2_brick_a4[mz][my][mx]; - v34 += x0*v3_brick_a4[mz][my][mx]; - v44 += x0*v4_brick_a4[mz][my][mx]; - v54 += x0*v5_brick_a4[mz][my][mx]; - v05 += x0*v0_brick_a5[mz][my][mx]; - v15 += x0*v1_brick_a5[mz][my][mx]; - v25 += x0*v2_brick_a5[mz][my][mx]; - v35 += x0*v3_brick_a5[mz][my][mx]; - v45 += x0*v4_brick_a5[mz][my][mx]; - v55 += x0*v5_brick_a5[mz][my][mx]; - v06 += x0*v0_brick_a6[mz][my][mx]; - v16 += x0*v1_brick_a6[mz][my][mx]; - v26 += x0*v2_brick_a6[mz][my][mx]; - v36 += x0*v3_brick_a6[mz][my][mx]; - v46 += x0*v4_brick_a6[mz][my][mx]; - v56 += x0*v5_brick_a6[mz][my][mx]; - } - } - } - } - // convert D-field to force - type = atom->type[i]; - lj0 = B[7*type+6]*0.5; - lj1 = B[7*type+5]*0.5; - lj2 = B[7*type+4]*0.5; - lj3 = B[7*type+3]*0.5; - lj4 = B[7*type+2]*0.5; - lj5 = B[7*type+1]*0.5; - lj6 = B[7*type]*0.5; - - - if (eflag_atom) - eatom[i] += u_pa0*lj0 + u_pa1*lj1 + u_pa2*lj2 + - u_pa3*lj3 + u_pa4*lj4 + u_pa5*lj5 + u_pa6*lj6; - if (vflag_atom) { - vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 + - v04*lj4 + v05*lj5 + v06*lj6; - vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 + - v14*lj4 + v15*lj5 + v16*lj6; - vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 + - v24*lj4 + v25*lj5 + v26*lj6; - vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 + - v34*lj4 + v35*lj5 + v36*lj6; - vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 + - v44*lj4 + v45*lj5 + v46*lj6; - vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 + - v54*lj4 + v55*lj5 + v56*lj6; - } - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule and ik scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_none_ik() -{ - int i,k,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR *ekx, *eky, *ekz; - - ekx = new FFT_SCALAR[nsplit]; - eky = new FFT_SCALAR[nsplit]; - ekz = new FFT_SCALAR[nsplit]; - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - for (k = 0; k < nsplit; k++) - ekx[k] = eky[k] = ekz[k] = ZEROF; - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - for (k = 0; k < nsplit; k++) { - ekx[k] -= x0*vdx_brick_none[k][mz][my][mx]; - eky[k] -= x0*vdy_brick_none[k][mz][my][mx]; - ekz[k] -= x0*vdz_brick_none[k][mz][my][mx]; - } - } - } - } - // convert D-field to force - type = atom->type[i]; - for (k = 0; k < nsplit; k++) { - lj = B[nsplit*type + k]; - f[i][0] += lj*ekx[k]; - f[i][1] +=lj*eky[k]; - if (slabflag != 2) f[i][2] +=lj*ekz[k]; - } - } - - delete [] ekx; - delete [] eky; - delete [] ekz; -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule for the ad scheme -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_none_ad() -{ - int i,k,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR *ekx, *eky, *ekz; - - ekx = new FFT_SCALAR[nsplit]; - eky = new FFT_SCALAR[nsplit]; - ekz = new FFT_SCALAR[nsplit]; - - - double s1,s2,s3; - double sf1,sf2,sf3; - double sf = 0.0; - double *prd; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double hx_inv = nx_pppm_6/xprd; - double hy_inv = ny_pppm_6/yprd; - double hz_inv = nz_pppm_6/zprd_slab; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - double **f = atom->f; - int type; - double lj; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); - - for (k = 0; k < nsplit; k++) - ekx[k] = eky[k] = ekz[k] = ZEROF; - - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]; - y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]; - z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]; - - for (k = 0; k < nsplit; k++) { - ekx[k] += x0*u_brick_none[k][mz][my][mx]; - eky[k] += y0*u_brick_none[k][mz][my][mx]; - ekz[k] += z0*u_brick_none[k][mz][my][mx]; - } - } - } - } - - for (k = 0; k < nsplit; k++) { - ekx[k] *= hx_inv; - eky[k] *= hy_inv; - ekz[k] *= hz_inv; - } - - // convert D-field to force - type = atom->type[i]; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - - sf1 = sf_coeff_6[0]*sin(2*MY_PI*s1); - sf1 += sf_coeff_6[1]*sin(4*MY_PI*s1); - - sf2 = sf_coeff_6[2]*sin(2*MY_PI*s2); - sf2 += sf_coeff_6[3]*sin(4*MY_PI*s2); - - sf3 = sf_coeff_6[4]*sin(2*MY_PI*s3); - sf3 += sf_coeff_6[5]*sin(4*MY_PI*s3); - - for (k = 0; k < nsplit; k++) { - lj = B[nsplit*type + k]; - - sf = sf1*B[k]*2*lj*lj; - f[i][0] += lj*ekx[k] - sf; - - - sf = sf2*B[k]*2*lj*lj; - f[i][1] += lj*eky[k] - sf; - - sf = sf3*B[k]*2*lj*lj; - if (slabflag != 2) f[i][2] += lj*ekz[k] - sf; - } - } - - delete [] ekx; - delete [] eky; - delete [] ekz; -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get dispersion field & force on my particles - for arithmetic mixing rule for per atom quantities -------------------------------------------------------------------------- */ - -void PPPMDisp::fieldforce_none_peratom() -{ - int i,k,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR *u_pa,*v0,*v1,*v2,*v3,*v4,*v5; - - u_pa = new FFT_SCALAR[nsplit]; - v0 = new FFT_SCALAR[nsplit]; - v1 = new FFT_SCALAR[nsplit]; - v2 = new FFT_SCALAR[nsplit]; - v3 = new FFT_SCALAR[nsplit]; - v4 = new FFT_SCALAR[nsplit]; - v5 = new FFT_SCALAR[nsplit]; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of dispersion field on particle - - double **x = atom->x; - int type; - double lj; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - - nx = part2grid_6[i][0]; - ny = part2grid_6[i][1]; - nz = part2grid_6[i][2]; - dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; - dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; - dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; - compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); - - for (k = 0; k < nsplit; k++) - u_pa[k] = v0[k] = v1[k] = v2[k] = v3[k] = v4[k] = v5[k] = ZEROF; - - for (n = nlower_6; n <= nupper_6; n++) { - mz = n+nz; - z0 = rho1d_6[2][n]; - for (m = nlower_6; m <= nupper_6; m++) { - my = m+ny; - y0 = z0*rho1d_6[1][m]; - for (l = nlower_6; l <= nupper_6; l++) { - mx = l+nx; - x0 = y0*rho1d_6[0][l]; - if (eflag_atom) { - for (k = 0; k < nsplit; k++) - u_pa[k] += x0*u_brick_none[k][mz][my][mx]; - } - if (vflag_atom) { - for (k = 0; k < nsplit; k++) { - v0[k] += x0*v0_brick_none[k][mz][my][mx]; - v1[k] += x0*v1_brick_none[k][mz][my][mx]; - v2[k] += x0*v2_brick_none[k][mz][my][mx]; - v3[k] += x0*v3_brick_none[k][mz][my][mx]; - v4[k] += x0*v4_brick_none[k][mz][my][mx]; - v5[k] += x0*v5_brick_none[k][mz][my][mx]; - } - } - } - } - } - // convert D-field to force - type = atom->type[i]; - for (k = 0; k < nsplit; k++) { - lj = B[nsplit*type + k]*0.5; - - if (eflag_atom) { - eatom[i] += u_pa[k]*lj; - } - if (vflag_atom) { - vatom[i][0] += v0[k]*lj; - vatom[i][1] += v1[k]*lj; - vatom[i][2] += v2[k]*lj; - vatom[i][3] += v3[k]*lj; - vatom[i][4] += v4[k]*lj; - vatom[i][5] += v5[k]*lj; - } - } - } - - delete [] u_pa; - delete [] v0; - delete [] v1; - delete [] v2; - delete [] v3; - delete [] v4; - delete [] v5; -} - -/* ---------------------------------------------------------------------- - pack values to buf to send to another proc -------------------------------------------------------------------------- */ - -void PPPMDisp::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - switch (flag) { - - // Coulomb interactions - - case FORWARD_IK: { - FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - buf[n++] = xsrc[list[i]]; - buf[n++] = ysrc[list[i]]; - buf[n++] = zsrc[list[i]]; - } - break; - } - - case FORWARD_AD: { - FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - break; - } - - case FORWARD_IK_PERATOM: { - FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) buf[n++] = esrc[list[i]]; - if (vflag_atom) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } - break; - } - - case FORWARD_AD_PERATOM: { - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - break; - } - - // Dispersion interactions, geometric mixing - - case FORWARD_IK_G: { - FFT_SCALAR *xsrc = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = xsrc[list[i]]; - buf[n++] = ysrc[list[i]]; - buf[n++] = zsrc[list[i]]; - } - break; - } - - case FORWARD_AD_G: { - FFT_SCALAR *src = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - break; - } - - case FORWARD_IK_PERATOM_G: { - FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) buf[n++] = esrc[list[i]]; - if (vflag_atom) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } - break; - } - - case FORWARD_AD_PERATOM_G: { - FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - break; - } - - // Dispersion interactions, arithmetic mixing - - case FORWARD_IK_A: { - FFT_SCALAR *xsrc0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xsrc6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - buf[n++] = xsrc0[list[i]]; - buf[n++] = ysrc0[list[i]]; - buf[n++] = zsrc0[list[i]]; - - buf[n++] = xsrc1[list[i]]; - buf[n++] = ysrc1[list[i]]; - buf[n++] = zsrc1[list[i]]; - - buf[n++] = xsrc2[list[i]]; - buf[n++] = ysrc2[list[i]]; - buf[n++] = zsrc2[list[i]]; - - buf[n++] = xsrc3[list[i]]; - buf[n++] = ysrc3[list[i]]; - buf[n++] = zsrc3[list[i]]; - - buf[n++] = xsrc4[list[i]]; - buf[n++] = ysrc4[list[i]]; - buf[n++] = zsrc4[list[i]]; - - buf[n++] = xsrc5[list[i]]; - buf[n++] = ysrc5[list[i]]; - buf[n++] = zsrc5[list[i]]; - - buf[n++] = xsrc6[list[i]]; - buf[n++] = ysrc6[list[i]]; - buf[n++] = zsrc6[list[i]]; - } - break; - } - - case FORWARD_AD_A: { - FFT_SCALAR *src0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - buf[n++] = src0[list[i]]; - buf[n++] = src1[list[i]]; - buf[n++] = src2[list[i]]; - buf[n++] = src3[list[i]]; - buf[n++] = src4[list[i]]; - buf[n++] = src5[list[i]]; - buf[n++] = src6[list[i]]; - } - break; - } - - case FORWARD_IK_PERATOM_A: { - FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - if (eflag_atom) { - buf[n++] = esrc0[list[i]]; - buf[n++] = esrc1[list[i]]; - buf[n++] = esrc2[list[i]]; - buf[n++] = esrc3[list[i]]; - buf[n++] = esrc4[list[i]]; - buf[n++] = esrc5[list[i]]; - buf[n++] = esrc6[list[i]]; - } - if (vflag_atom) { - buf[n++] = v0src0[list[i]]; - buf[n++] = v1src0[list[i]]; - buf[n++] = v2src0[list[i]]; - buf[n++] = v3src0[list[i]]; - buf[n++] = v4src0[list[i]]; - buf[n++] = v5src0[list[i]]; - - buf[n++] = v0src1[list[i]]; - buf[n++] = v1src1[list[i]]; - buf[n++] = v2src1[list[i]]; - buf[n++] = v3src1[list[i]]; - buf[n++] = v4src1[list[i]]; - buf[n++] = v5src1[list[i]]; - - buf[n++] = v0src2[list[i]]; - buf[n++] = v1src2[list[i]]; - buf[n++] = v2src2[list[i]]; - buf[n++] = v3src2[list[i]]; - buf[n++] = v4src2[list[i]]; - buf[n++] = v5src2[list[i]]; - - buf[n++] = v0src3[list[i]]; - buf[n++] = v1src3[list[i]]; - buf[n++] = v2src3[list[i]]; - buf[n++] = v3src3[list[i]]; - buf[n++] = v4src3[list[i]]; - buf[n++] = v5src3[list[i]]; - - buf[n++] = v0src4[list[i]]; - buf[n++] = v1src4[list[i]]; - buf[n++] = v2src4[list[i]]; - buf[n++] = v3src4[list[i]]; - buf[n++] = v4src4[list[i]]; - buf[n++] = v5src4[list[i]]; - - buf[n++] = v0src5[list[i]]; - buf[n++] = v1src5[list[i]]; - buf[n++] = v2src5[list[i]]; - buf[n++] = v3src5[list[i]]; - buf[n++] = v4src5[list[i]]; - buf[n++] = v5src5[list[i]]; - - buf[n++] = v0src6[list[i]]; - buf[n++] = v1src6[list[i]]; - buf[n++] = v2src6[list[i]]; - buf[n++] = v3src6[list[i]]; - buf[n++] = v4src6[list[i]]; - buf[n++] = v5src6[list[i]]; - } - } - break; - } - - case FORWARD_AD_PERATOM_A: { - FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - buf[n++] = v0src0[list[i]]; - buf[n++] = v1src0[list[i]]; - buf[n++] = v2src0[list[i]]; - buf[n++] = v3src0[list[i]]; - buf[n++] = v4src0[list[i]]; - buf[n++] = v5src0[list[i]]; - - buf[n++] = v0src1[list[i]]; - buf[n++] = v1src1[list[i]]; - buf[n++] = v2src1[list[i]]; - buf[n++] = v3src1[list[i]]; - buf[n++] = v4src1[list[i]]; - buf[n++] = v5src1[list[i]]; - - buf[n++] = v0src2[list[i]]; - buf[n++] = v1src2[list[i]]; - buf[n++] = v2src2[list[i]]; - buf[n++] = v3src2[list[i]]; - buf[n++] = v4src2[list[i]]; - buf[n++] = v5src2[list[i]]; - - buf[n++] = v0src3[list[i]]; - buf[n++] = v1src3[list[i]]; - buf[n++] = v2src3[list[i]]; - buf[n++] = v3src3[list[i]]; - buf[n++] = v4src3[list[i]]; - buf[n++] = v5src3[list[i]]; - - buf[n++] = v0src4[list[i]]; - buf[n++] = v1src4[list[i]]; - buf[n++] = v2src4[list[i]]; - buf[n++] = v3src4[list[i]]; - buf[n++] = v4src4[list[i]]; - buf[n++] = v5src4[list[i]]; - - buf[n++] = v0src5[list[i]]; - buf[n++] = v1src5[list[i]]; - buf[n++] = v2src5[list[i]]; - buf[n++] = v3src5[list[i]]; - buf[n++] = v4src5[list[i]]; - buf[n++] = v5src5[list[i]]; - - buf[n++] = v0src6[list[i]]; - buf[n++] = v1src6[list[i]]; - buf[n++] = v2src6[list[i]]; - buf[n++] = v3src6[list[i]]; - buf[n++] = v4src6[list[i]]; - buf[n++] = v5src6[list[i]]; - } - break; - } - - // Dispersion interactions, no mixing - - case FORWARD_IK_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *xsrc = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ysrc = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zsrc = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = xsrc[list[i]]; - buf[n++] = ysrc[list[i]]; - buf[n++] = zsrc[list[i]]; - } - } - break; - } - - case FORWARD_AD_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *src = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - buf[n++] = src[list[i]]; - } - break; - } - - case FORWARD_IK_PERATOM_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) buf[n++] = esrc[list[i]]; - if (vflag_atom) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } - } - break; - } - - case FORWARD_AD_PERATOM_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = v0src[list[i]]; - buf[n++] = v1src[list[i]]; - buf[n++] = v2src[list[i]]; - buf[n++] = v3src[list[i]]; - buf[n++] = v4src[list[i]]; - buf[n++] = v5src[list[i]]; - } - } - break; - } - - } -} - -/* ---------------------------------------------------------------------- - unpack another proc's own values from buf and set own ghost values -------------------------------------------------------------------------- */ - -void PPPMDisp::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - switch (flag) { - - // Coulomb interactions - - case FORWARD_IK: { - FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - xdest[list[i]] = buf[n++]; - ydest[list[i]] = buf[n++]; - zdest[list[i]] = buf[n++]; - } - break; - } - - case FORWARD_AD: { - FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - dest[list[i]] = buf[n++]; - break; - } - - case FORWARD_IK_PERATOM: { - FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) esrc[list[i]] = buf[n++]; - if (vflag_atom) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } - break; - } - - case FORWARD_AD_PERATOM: { - FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; - FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - break; - } - - // Disperion interactions, geometric mixing - - case FORWARD_IK_G: { - FFT_SCALAR *xdest = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - xdest[list[i]] = buf[n++]; - ydest[list[i]] = buf[n++]; - zdest[list[i]] = buf[n++]; - } - break; - } - - case FORWARD_AD_G: { - FFT_SCALAR *dest = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - dest[list[i]] = buf[n++]; - break; - } - - case FORWARD_IK_PERATOM_G: { - FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) esrc[list[i]] = buf[n++]; - if (vflag_atom) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } - break; - } - - case FORWARD_AD_PERATOM_G: { - FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - break; - } - - // Disperion interactions, arithmetic mixing - - case FORWARD_IK_A: { - FFT_SCALAR *xdest0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *xdest6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - xdest0[list[i]] = buf[n++]; - ydest0[list[i]] = buf[n++]; - zdest0[list[i]] = buf[n++]; - - xdest1[list[i]] = buf[n++]; - ydest1[list[i]] = buf[n++]; - zdest1[list[i]] = buf[n++]; - - xdest2[list[i]] = buf[n++]; - ydest2[list[i]] = buf[n++]; - zdest2[list[i]] = buf[n++]; - - xdest3[list[i]] = buf[n++]; - ydest3[list[i]] = buf[n++]; - zdest3[list[i]] = buf[n++]; - - xdest4[list[i]] = buf[n++]; - ydest4[list[i]] = buf[n++]; - zdest4[list[i]] = buf[n++]; - - xdest5[list[i]] = buf[n++]; - ydest5[list[i]] = buf[n++]; - zdest5[list[i]] = buf[n++]; - - xdest6[list[i]] = buf[n++]; - ydest6[list[i]] = buf[n++]; - zdest6[list[i]] = buf[n++]; - } - break; - } - - case FORWARD_AD_A: { - FFT_SCALAR *dest0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - dest0[list[i]] = buf[n++]; - dest1[list[i]] = buf[n++]; - dest2[list[i]] = buf[n++]; - dest3[list[i]] = buf[n++]; - dest4[list[i]] = buf[n++]; - dest5[list[i]] = buf[n++]; - dest6[list[i]] = buf[n++]; - } - break; - } - - case FORWARD_IK_PERATOM_A: { - FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - if (eflag_atom) { - esrc0[list[i]] = buf[n++]; - esrc1[list[i]] = buf[n++]; - esrc2[list[i]] = buf[n++]; - esrc3[list[i]] = buf[n++]; - esrc4[list[i]] = buf[n++]; - esrc5[list[i]] = buf[n++]; - esrc6[list[i]] = buf[n++]; - } - if (vflag_atom) { - v0src0[list[i]] = buf[n++]; - v1src0[list[i]] = buf[n++]; - v2src0[list[i]] = buf[n++]; - v3src0[list[i]] = buf[n++]; - v4src0[list[i]] = buf[n++]; - v5src0[list[i]] = buf[n++]; - - v0src1[list[i]] = buf[n++]; - v1src1[list[i]] = buf[n++]; - v2src1[list[i]] = buf[n++]; - v3src1[list[i]] = buf[n++]; - v4src1[list[i]] = buf[n++]; - v5src1[list[i]] = buf[n++]; - - v0src2[list[i]] = buf[n++]; - v1src2[list[i]] = buf[n++]; - v2src2[list[i]] = buf[n++]; - v3src2[list[i]] = buf[n++]; - v4src2[list[i]] = buf[n++]; - v5src2[list[i]] = buf[n++]; - - v0src3[list[i]] = buf[n++]; - v1src3[list[i]] = buf[n++]; - v2src3[list[i]] = buf[n++]; - v3src3[list[i]] = buf[n++]; - v4src3[list[i]] = buf[n++]; - v5src3[list[i]] = buf[n++]; - - v0src4[list[i]] = buf[n++]; - v1src4[list[i]] = buf[n++]; - v2src4[list[i]] = buf[n++]; - v3src4[list[i]] = buf[n++]; - v4src4[list[i]] = buf[n++]; - v5src4[list[i]] = buf[n++]; - - v0src5[list[i]] = buf[n++]; - v1src5[list[i]] = buf[n++]; - v2src5[list[i]] = buf[n++]; - v3src5[list[i]] = buf[n++]; - v4src5[list[i]] = buf[n++]; - v5src5[list[i]] = buf[n++]; - - v0src6[list[i]] = buf[n++]; - v1src6[list[i]] = buf[n++]; - v2src6[list[i]] = buf[n++]; - v3src6[list[i]] = buf[n++]; - v4src6[list[i]] = buf[n++]; - v5src6[list[i]] = buf[n++]; - } - } - break; - } - - case FORWARD_AD_PERATOM_A: { - FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - - for (int i = 0; i < nlist; i++) { - v0src0[list[i]] = buf[n++]; - v1src0[list[i]] = buf[n++]; - v2src0[list[i]] = buf[n++]; - v3src0[list[i]] = buf[n++]; - v4src0[list[i]] = buf[n++]; - v5src0[list[i]] = buf[n++]; - - v0src1[list[i]] = buf[n++]; - v1src1[list[i]] = buf[n++]; - v2src1[list[i]] = buf[n++]; - v3src1[list[i]] = buf[n++]; - v4src1[list[i]] = buf[n++]; - v5src1[list[i]] = buf[n++]; - - v0src2[list[i]] = buf[n++]; - v1src2[list[i]] = buf[n++]; - v2src2[list[i]] = buf[n++]; - v3src2[list[i]] = buf[n++]; - v4src2[list[i]] = buf[n++]; - v5src2[list[i]] = buf[n++]; - - v0src3[list[i]] = buf[n++]; - v1src3[list[i]] = buf[n++]; - v2src3[list[i]] = buf[n++]; - v3src3[list[i]] = buf[n++]; - v4src3[list[i]] = buf[n++]; - v5src3[list[i]] = buf[n++]; - - v0src4[list[i]] = buf[n++]; - v1src4[list[i]] = buf[n++]; - v2src4[list[i]] = buf[n++]; - v3src4[list[i]] = buf[n++]; - v4src4[list[i]] = buf[n++]; - v5src4[list[i]] = buf[n++]; - - v0src5[list[i]] = buf[n++]; - v1src5[list[i]] = buf[n++]; - v2src5[list[i]] = buf[n++]; - v3src5[list[i]] = buf[n++]; - v4src5[list[i]] = buf[n++]; - v5src5[list[i]] = buf[n++]; - - v0src6[list[i]] = buf[n++]; - v1src6[list[i]] = buf[n++]; - v2src6[list[i]] = buf[n++]; - v3src6[list[i]] = buf[n++]; - v4src6[list[i]] = buf[n++]; - v5src6[list[i]] = buf[n++]; - } - break; - } - - // Disperion interactions, geometric mixing - - case FORWARD_IK_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *xdest = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *ydest = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *zdest = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - xdest[list[i]] = buf[n++]; - ydest[list[i]] = buf[n++]; - zdest[list[i]] = buf[n++]; - } - } - break; - } - - case FORWARD_AD_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *dest = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - dest[list[i]] = buf[n++]; - } - break; - } - - case FORWARD_IK_PERATOM_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - if (eflag_atom) esrc[list[i]] = buf[n++]; - if (vflag_atom) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } - } - break; - } - - case FORWARD_AD_PERATOM_NONE: { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - v0src[list[i]] = buf[n++]; - v1src[list[i]] = buf[n++]; - v2src[list[i]] = buf[n++]; - v3src[list[i]] = buf[n++]; - v4src[list[i]] = buf[n++]; - v5src[list[i]] = buf[n++]; - } - } - break; - } - - } -} - -/* ---------------------------------------------------------------------- - pack ghost values into buf to send to another proc -------------------------------------------------------------------------- */ - -void PPPMDisp::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - //Coulomb interactions - - if (flag == REVERSE_RHO) { - FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - - //Dispersion interactions, geometric mixing - - } else if (flag == REVERSE_RHO_G) { - FFT_SCALAR *src = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - buf[i] = src[list[i]]; - - //Dispersion interactions, arithmetic mixing - - } else if (flag == REVERSE_RHO_A) { - FFT_SCALAR *src0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *src6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = src0[list[i]]; - buf[n++] = src1[list[i]]; - buf[n++] = src2[list[i]]; - buf[n++] = src3[list[i]]; - buf[n++] = src4[list[i]]; - buf[n++] = src5[list[i]]; - buf[n++] = src6[list[i]]; - } - - //Dispersion interactions, no mixing - - } else if (flag == REVERSE_RHO_NONE) { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *src = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - buf[n++] = src[list[i]]; - } - } - } -} - -/* ---------------------------------------------------------------------- - unpack another proc's ghost values from buf and add to own values -------------------------------------------------------------------------- */ - -void PPPMDisp::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) -{ - int n = 0; - - //Coulomb interactions - - if (flag == REVERSE_RHO) { - FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out]; - for (int i = 0; i < nlist; i++) - dest[list[i]] += buf[i]; - - //Dispersion interactions, geometric mixing - - } else if (flag == REVERSE_RHO_G) { - FFT_SCALAR *dest = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - dest[list[i]] += buf[i]; - - //Dispersion interactions, arithmetic mixing - - } else if (flag == REVERSE_RHO_A) { - FFT_SCALAR *dest0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; - FFT_SCALAR *dest6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) { - dest0[list[i]] += buf[n++]; - dest1[list[i]] += buf[n++]; - dest2[list[i]] += buf[n++]; - dest3[list[i]] += buf[n++]; - dest4[list[i]] += buf[n++]; - dest5[list[i]] += buf[n++]; - dest6[list[i]] += buf[n++]; - } - - //Dispersion interactions, no mixing - - } else if (flag == REVERSE_RHO_NONE) { - for (int k = 0; k < nsplit_alloc; k++) { - FFT_SCALAR *dest = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; - for (int i = 0; i < nlist; i++) - dest[list[i]] += buf[n++]; - } - } -} - -/* ---------------------------------------------------------------------- - map nprocs to NX by NY grid as PX by PY procs - return optimal px,py -------------------------------------------------------------------------- */ - -void PPPMDisp::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) -{ - // loop thru all possible factorizations of nprocs - // surf = surface area of largest proc sub-domain - // innermost if test minimizes surface area and surface/volume ratio - - int bestsurf = 2 * (nx + ny); - int bestboxx = 0; - int bestboxy = 0; - - int boxx,boxy,surf,ipx,ipy; - - ipx = 1; - while (ipx <= nprocs) { - if (nprocs % ipx == 0) { - ipy = nprocs/ipx; - boxx = nx/ipx; - if (nx % ipx) boxx++; - boxy = ny/ipy; - if (ny % ipy) boxy++; - surf = boxx + boxy; - if (surf < bestsurf || - (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { - bestsurf = surf; - bestboxx = boxx; - bestboxy = boxy; - *px = ipx; - *py = ipy; - } - } - ipx++; - } -} - -/* ---------------------------------------------------------------------- - charge assignment into rho1d - dx,dy,dz = distance of particle from "lower left" grid point -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz, int ord, - FFT_SCALAR **rho_c, FFT_SCALAR **r1d) -{ - int k,l; - FFT_SCALAR r1,r2,r3; - - for (k = (1-ord)/2; k <= ord/2; k++) { - r1 = r2 = r3 = ZEROF; - - for (l = ord-1; l >= 0; l--) { - r1 = rho_c[l][k] + r1*dx; - r2 = rho_c[l][k] + r2*dy; - r3 = rho_c[l][k] + r3*dz; - } - r1d[0][k] = r1; - r1d[1][k] = r2; - r1d[2][k] = r3; - } -} - -/* ---------------------------------------------------------------------- - charge assignment into drho1d - dx,dy,dz = distance of particle from "lower left" grid point -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz, int ord, - FFT_SCALAR **drho_c, FFT_SCALAR **dr1d) -{ - int k,l; - FFT_SCALAR r1,r2,r3; - - for (k = (1-ord)/2; k <= ord/2; k++) { - r1 = r2 = r3 = ZEROF; - - for (l = ord-2; l >= 0; l--) { - r1 = drho_c[l][k] + r1*dx; - r2 = drho_c[l][k] + r2*dy; - r3 = drho_c[l][k] + r3*dz; - } - dr1d[0][k] = r1; - dr1d[1][k] = r2; - dr1d[2][k] = r3; - } -} - -/* ---------------------------------------------------------------------- - generate coeffients for the weight function of order n - - (n-1) - Wn(x) = Sum wn(k,x) , Sum is over every other integer - k=-(n-1) - For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 - k is odd integers if n is even and even integers if n is odd - --- - | n-1 - | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 - wn(k,x) = < l=0 - | - | 0 otherwise - --- - a coeffients are packed into the array rho_coeff to eliminate zeros - rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) -------------------------------------------------------------------------- */ - -void PPPMDisp::compute_rho_coeff(FFT_SCALAR **coeff , FFT_SCALAR **dcoeff, - int ord) -{ - int j,k,l,m; - FFT_SCALAR s; - - FFT_SCALAR **a; - memory->create2d_offset(a,ord,-ord,ord,"pppm/disp:a"); - - for (k = -ord; k <= ord; k++) - for (l = 0; l < ord; l++) - a[l][k] = 0.0; - - a[0][0] = 1.0; - for (j = 1; j < ord; j++) { - for (k = -j; k <= j; k += 2) { - s = 0.0; - for (l = 0; l < j; l++) { - a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); -#ifdef FFT_SINGLE - s += powf(0.5,(float) l+1) * - (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); -#else - s += pow(0.5,(double) l+1) * - (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); -#endif - } - a[0][k] = s; - } - } - - m = (1-ord)/2; - for (k = -(ord-1); k < ord; k += 2) { - for (l = 0; l < ord; l++) - coeff[l][m] = a[l][k]; - for (l = 1; l < ord; l++) - dcoeff[l-1][m] = l*a[l][k]; - m++; - } - - memory->destroy2d_offset(a,-ord); -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void PPPMDisp::slabcorr(int eflag) -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int nlocal = atom->nlocal; - - double dipole = 0.0; - for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // need to make non-neutral systems and/or - // per-atom energy translationally invariant - - double dipole_r2 = 0.0; - if (eflag_atom || fabs(qsum) > SMALL) { - for (int i = 0; i < nlocal; i++) - dipole_r2 += q[i]*x[i][2]*x[i][2]; - - // sum local contributions - - double tmp; - MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2 = tmp; - } - - // compute corrections - - const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - - qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; - const double qscale = force->qqrd2e * scale; - - if (eflag_global) energy_1 += qscale * e_slabcorr; - - // per-atom energy - - if (eflag_atom) { - double efact = qscale * MY_2PI/volume; - for (int i = 0; i < nlocal; i++) - eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + - qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); - } - - // add on force corrections - - double ffact = qscale * (-4.0*MY_PI/volume); - double **f = atom->f; - - for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); -} - -/* ---------------------------------------------------------------------- - perform and time the 1d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMDisp::timing_1d(int n, double &time1d) -{ - double time1,time2; - int mixing = 1; - if (function[2]) mixing = 4; - if (function[3]) mixing = nsplit_alloc/2; - - if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - if (function[1] + function[2] + function[3]) - for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - if (function[0]) { - for (int i = 0; i < n; i++) { - fft1->timing1d(work1,nfft_both,1); - fft2->timing1d(work1,nfft_both,-1); - if (differentiation_flag != 1){ - fft2->timing1d(work1,nfft_both,-1); - fft2->timing1d(work1,nfft_both,-1); - } - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time1d = time2 - time1; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - if (function[1] + function[2] + function[3]) { - for (int i = 0; i < n; i++) { - fft1_6->timing1d(work1_6,nfft_both_6,1); - fft2_6->timing1d(work1_6,nfft_both_6,-1); - if (differentiation_flag != 1){ - fft2_6->timing1d(work1_6,nfft_both_6,-1); - fft2_6->timing1d(work1_6,nfft_both_6,-1); - } - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time1d += (time2 - time1)*mixing; - - if (differentiation_flag) return 2; - return 4; -} - -/* ---------------------------------------------------------------------- - perform and time the 3d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMDisp::timing_3d(int n, double &time3d) -{ - double time1,time2; - int mixing = 1; - if (function[2]) mixing = 4; - if (function[3]) mixing = nsplit_alloc/2; - - if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - if (function[1] + function[2] + function[3]) - for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF; - - - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - if (function[0]) { - for (int i = 0; i < n; i++) { - fft1->compute(work1,work1,1); - fft2->compute(work1,work1,-1); - if (differentiation_flag != 1) { - fft2->compute(work1,work1,-1); - fft2->compute(work1,work1,-1); - } - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time3d = time2 - time1; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - if (function[1] + function[2] + function[3]) { - for (int i = 0; i < n; i++) { - fft1_6->compute(work1_6,work1_6,1); - fft2_6->compute(work1_6,work1_6,-1); - if (differentiation_flag != 1) { - fft2_6->compute(work1_6,work1_6,-1); - fft2_6->compute(work1_6,work1_6,-1); - } - } - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time3d += (time2 - time1) * mixing; - - if (differentiation_flag) return 2; - return 4; -} - -/* ---------------------------------------------------------------------- - memory usage of local arrays -------------------------------------------------------------------------- */ - -double PPPMDisp::memory_usage() -{ - double bytes = nmax*3 * sizeof(double); - int mixing = 1; - int diff = 3; //depends on differentiation - int per = 7; //depends on per atom calculations - if (differentiation_flag) { - diff = 1; - per = 6; - } - if (!evflag_atom) per = 0; - if (function[2]) mixing = 7; - if (function[3]) mixing = nsplit_alloc; - - if (function[0]) { - int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - bytes += (1 + diff + per) * nbrick * sizeof(FFT_SCALAR); //brick memory - bytes += 6 * nfft_both * sizeof(double); // vg - bytes += nfft_both * sizeof(double); // greensfn - bytes += nfft_both * 3 * sizeof(FFT_SCALAR); // density_FFT, work1, work2 - bytes += cg->memory_usage(); - } - - if (function[1] + function[2] + function[3]) { - int nbrick = (nxhi_out_6-nxlo_out_6+1) * (nyhi_out_6-nylo_out_6+1) * - (nzhi_out_6-nzlo_out_6+1); - bytes += (1 + diff + per ) * nbrick * sizeof(FFT_SCALAR) * mixing; // density_brick + vd_brick + per atom bricks - bytes += 6 * nfft_both_6 * sizeof(double); // vg - bytes += nfft_both_6 * sizeof(double); // greensfn - bytes += nfft_both_6 * (mixing + 2) * sizeof(FFT_SCALAR); // density_FFT, work1, work2 - bytes += cg_6->memory_usage(); - } - return bytes; -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Rolf Isele-Holder (Aachen University) + Paul Crozier (SNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "string.h" +#include "stdio.h" +#include "stdlib.h" +#include "math.h" +#include "pppm_disp.h" +#include "math_const.h" +#include "atom.h" +#include "comm.h" +#include "commgrid.h" +#include "neighbor.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "domain.h" +#include "fft3d_wrap.h" +#include "remap_wrap.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define MAXORDER 7 +#define OFFSET 16384 +#define SMALL 0.00001 +#define LARGE 10000.0 +#define EPS_HOC 1.0e-7 + +enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER}; +enum{REVERSE_RHO, REVERSE_RHO_G, REVERSE_RHO_A, REVERSE_RHO_NONE}; +enum{FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM, + FORWARD_IK_G, FORWARD_AD_G, FORWARD_IK_PERATOM_G, FORWARD_AD_PERATOM_G, + FORWARD_IK_A, FORWARD_AD_A, FORWARD_IK_PERATOM_A, FORWARD_AD_PERATOM_A, + FORWARD_IK_NONE, FORWARD_AD_NONE, FORWARD_IK_PERATOM_NONE, FORWARD_AD_PERATOM_NONE}; + + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPMDisp::PPPMDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) +{ + if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm/disp command"); + + triclinic_support = 0; + pppmflag = dispersionflag = 1; + accuracy_relative = fabs(force->numeric(FLERR,arg[0])); + + nfactors = 3; + factors = new int[nfactors]; + factors[0] = 2; + factors[1] = 3; + factors[2] = 5; + + MPI_Comm_rank(world,&me); + MPI_Comm_size(world,&nprocs); + + csumflag = 0; + B = NULL; + cii = NULL; + csumi = NULL; + peratom_allocate_flag = 0; + + density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; + density_fft = NULL; + u_brick = v0_brick = v1_brick = v2_brick = v3_brick = + v4_brick = v5_brick = NULL; + + density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL; + density_fft_g = NULL; + u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = + v4_brick_g = v5_brick_g = NULL; + + density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL; + density_fft_a0 = NULL; + u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = + v4_brick_a0 = v5_brick_a0 = NULL; + + density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL; + density_fft_a1 = NULL; + u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = + v4_brick_a1 = v5_brick_a1 = NULL; + + density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL; + density_fft_a2 = NULL; + u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = + v4_brick_a2 = v5_brick_a2 = NULL; + + density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL; + density_fft_a3 = NULL; + u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = + v4_brick_a3 = v5_brick_a3 = NULL; + + density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL; + density_fft_a4 = NULL; + u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = + v4_brick_a4 = v5_brick_a4 = NULL; + + density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL; + density_fft_a5 = NULL; + u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = + v4_brick_a5 = v5_brick_a5 = NULL; + + density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL; + density_fft_a6 = NULL; + u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = + v4_brick_a6 = v5_brick_a6 = NULL; + + density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL; + density_fft_none = NULL; + u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = + v4_brick_none = v5_brick_none = NULL; + + greensfn = NULL; + greensfn_6 = NULL; + work1 = work2 = NULL; + work1_6 = work2_6 = NULL; + vg = NULL; + vg2 = NULL; + vg_6 = NULL; + vg2_6 = NULL; + fkx = fky = fkz = NULL; + fkx2 = fky2 = fkz2 = NULL; + fkx_6 = fky_6 = fkz_6 = NULL; + fkx2_6 = fky2_6 = fkz2_6 = NULL; + + sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = + sf_precoeff5 = sf_precoeff6 = NULL; + sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = + sf_precoeff5_6 = sf_precoeff6_6 = NULL; + + gf_b = NULL; + gf_b_6 = NULL; + rho1d = rho_coeff = NULL; + drho1d = drho_coeff = NULL; + rho1d_6 = rho_coeff_6 = NULL; + drho1d_6 = drho_coeff_6 = NULL; + fft1 = fft2 = NULL; + fft1_6 = fft2_6 = NULL; + remap = NULL; + remap_6 = NULL; + + nmax = 0; + part2grid = NULL; + part2grid_6 = NULL; + + cg = NULL; + cg_peratom = NULL; + cg_6 = NULL; + cg_peratom_6 = NULL; + + memset(function, 0, EWALD_FUNCS*sizeof(int)); +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +PPPMDisp::~PPPMDisp() +{ + delete [] factors; + delete [] B; + B = NULL; + delete [] cii; + cii = NULL; + delete [] csumi; + csumi = NULL; + deallocate(); + deallocate_peratom(); + memory->destroy(part2grid); + memory->destroy(part2grid_6); + part2grid = part2grid_6 = NULL; +} + +/* ---------------------------------------------------------------------- + called once before run +------------------------------------------------------------------------- */ + +void PPPMDisp::init() +{ + if (me == 0) { + if (screen) fprintf(screen,"PPPMDisp initialization ...\n"); + if (logfile) fprintf(logfile,"PPPMDisp initialization ...\n"); + } + + triclinic_check(); + if (domain->dimension == 2) + error->all(FLERR,"Cannot use PPPMDisp with 2d simulation"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp"); + if (slabflag == 1) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab PPPMDisp"); + } + + if (order > MAXORDER || order_6 > MAXORDER) { + char str[128]; + sprintf(str,"PPPMDisp coulomb order cannot be greater than %d",MAXORDER); + error->all(FLERR,str); + } + + // free all arrays previously allocated + + deallocate(); + deallocate_peratom(); + + // set scale + + scale = 1.0; + + triclinic = domain->triclinic; + + // check whether cutoff and pair style are set + + pair_check(); + + int tmp; + Pair *pair = force->pair; + int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL; + double *p_cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL; + double *p_cutoff_lj = pair ? (double *) pair->extract("cut_LJ",tmp) : NULL; + if (!(ptr||*p_cutoff||*p_cutoff_lj)) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + cutoff = *p_cutoff; + cutoff_lj = *p_cutoff_lj; + + double tmp2; + MPI_Allreduce(&cutoff, &tmp2,1,MPI_DOUBLE,MPI_SUM,world); + + // check out which types of potentials will have to be calculated + + int ewald_order = ptr ? *((int *) ptr) : 1<<1; + int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC; + memset(function, 0, EWALD_FUNCS*sizeof(int)); + for (int i=0; i<=EWALD_MAXORDER; ++i) // transcribe order + if (ewald_order&(1<pair_style); + error->all(FLERR,str); + } + function[k] = 1; + } + + + // warn, if function[0] is not set but charge attribute is set! + if (!function[0] && atom->q_flag && me == 0) { + char str[128]; + sprintf(str, "Charges are set, but coulombic solver is not used"); + error->warning(FLERR, str); + } + + // compute qsum & qsqsum, if function[0] is set, print error if no charges are set or warn if not charge-neutral + + if (function[0]) { + if (!atom->q_flag) + error->all(FLERR,"Kspace style with selected options " + "requires atom attribute q"); + + qsum = qsqsum = 0.0; + for (int i = 0; i < atom->nlocal; i++) { + qsum += atom->q[i]; + qsqsum += atom->q[i]*atom->q[i]; + + } + + double tmp; + MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum = tmp; + MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsqsum = tmp; + + if (qsqsum == 0.0) + error->all(FLERR,"Cannot use kspace solver with selected options " + "on system with no charge"); + if (fabs(qsum) > SMALL && me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + } + + // if kspace is TIP4P, extract TIP4P params from pair style + // bond/angle are not yet init(), so insure equilibrium request is valid + + qdist = 0.0; + + if (tip4pflag) { + int itmp; + double *p_qdist = (double *) force->pair->extract("qdist",itmp); + int *p_typeO = (int *) force->pair->extract("typeO",itmp); + int *p_typeH = (int *) force->pair->extract("typeH",itmp); + int *p_typeA = (int *) force->pair->extract("typeA",itmp); + int *p_typeB = (int *) force->pair->extract("typeB",itmp); + if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + qdist = *p_qdist; + typeO = *p_typeO; + typeH = *p_typeH; + int typeA = *p_typeA; + int typeB = *p_typeB; + + if (force->angle == NULL || force->bond == NULL) + error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); + if (typeA < 1 || typeA > atom->nangletypes || + force->angle->setflag[typeA] == 0) + error->all(FLERR,"Bad TIP4P angle type for PPPMDisp/TIP4P"); + if (typeB < 1 || typeB > atom->nbondtypes || + force->bond->setflag[typeB] == 0) + error->all(FLERR,"Bad TIP4P bond type for PPPMDisp/TIP4P"); + double theta = force->angle->equilibrium_angle(typeA); + double blen = force->bond->equilibrium_distance(typeB); + alpha = qdist / (cos(0.5*theta) * blen); + } + + + // initialize the pair style to get the coefficients + neighrequest_flag = 0; + pair->init(); + neighrequest_flag = 1; + init_coeffs(); + + //if g_ewald and g_ewald_6 have not been specified, set some initial value + // to avoid problems when calculating the energies! + + if (!gewaldflag) g_ewald = 1; + if (!gewaldflag_6) g_ewald_6 = 1; + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + int (*procneigh)[2] = comm->procneigh; + + int iteration = 0; + if (function[0]) { + CommGrid *cgtmp = NULL; + while (order >= minorder) { + + if (iteration && me == 0) + error->warning(FLERR,"Reducing PPPMDisp Coulomb order " + "b/c stencil extends beyond neighbor processor"); + iteration++; + + // set grid for dispersion interaction and coulomb interactions + + set_grid(); + + if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) + error->all(FLERR,"PPPMDisp Coulomb grid is too large"); + + set_fft_parameters(nx_pppm, ny_pppm, nz_pppm, + nxlo_fft, nylo_fft, nzlo_fft, + nxhi_fft, nyhi_fft, nzhi_fft, + nxlo_in, nylo_in, nzlo_in, + nxhi_in, nyhi_in, nzhi_in, + nxlo_out, nylo_out, nzlo_out, + nxhi_out, nyhi_out, nzhi_out, + nlower, nupper, + ngrid, nfft, nfft_both, + shift, shiftone, order); + + if (overlap_allowed) break; + + cgtmp = new CommGrid(lmp, world,1,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out, + nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + cgtmp->ghost_notify(); + if (!cgtmp->ghost_overlap()) break; + delete cgtmp; + + order--; + } + + if (order < minorder) + error->all(FLERR, + "Coulomb PPPMDisp order has been reduced below minorder"); + if (cgtmp) delete cgtmp; + + // adjust g_ewald + + if (!gewaldflag) adjust_gewald(); + + // calculate the final accuracy + + double acc = final_accuracy(); + + // print stats + + int ngrid_max,nfft_both_max,nbuf_max; + MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); + + if (me == 0) { + #ifdef FFT_SINGLE + const char fft_prec[] = "single"; + #else + const char fft_prec[] = "double"; + #endif + + if (screen) { + fprintf(screen," Coulomb G vector (1/distance)= %g\n",g_ewald); + fprintf(screen," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(screen," Coulomb stencil order = %d\n",order); + fprintf(screen," Coulomb estimated absolute RMS force accuracy = %g\n", + acc); + fprintf(screen," Coulomb estimated relative force accuracy = %g\n", + acc/two_charge_force); + fprintf(screen," using %s precision FFTs\n",fft_prec); + fprintf(screen," 3d grid and FFT values/proc = %d %d\n", + ngrid_max, nfft_both_max); + } + if (logfile) { + fprintf(logfile," Coulomb G vector (1/distance) = %g\n",g_ewald); + fprintf(logfile," Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(logfile," Coulomb stencil order = %d\n",order); + fprintf(logfile, + " Coulomb estimated absolute RMS force accuracy = %g\n", + acc); + fprintf(logfile," Coulomb estimated relative force accuracy = %g\n", + acc/two_charge_force); + fprintf(logfile," using %s precision FFTs\n",fft_prec); + fprintf(logfile," 3d grid and FFT values/proc = %d %d\n", + ngrid_max, nfft_both_max); + } + } + } + + iteration = 0; + if (function[1] + function[2] + function[3]) { + CommGrid *cgtmp = NULL; + while (order_6 >= minorder) { + + if (iteration && me == 0) + error->warning(FLERR,"Reducing PPPMDisp dispersion order " + "b/c stencil extends beyond neighbor processor"); + iteration++; + + set_grid_6(); + + if (nx_pppm_6 >= OFFSET || ny_pppm_6 >= OFFSET || nz_pppm_6 >= OFFSET) + error->all(FLERR,"PPPMDisp Dispersion grid is too large"); + + set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, + nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, + nxhi_in_6, nyhi_in_6, nzhi_in_6, + nxlo_out_6, nylo_out_6, nzlo_out_6, + nxhi_out_6, nyhi_out_6, nzhi_out_6, + nlower_6, nupper_6, + ngrid_6, nfft_6, nfft_both_6, + shift_6, shiftone_6, order_6); + + if (overlap_allowed) break; + + cgtmp = new CommGrid(lmp,world,1,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6, + nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6, + nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + cgtmp->ghost_notify(); + if (!cgtmp->ghost_overlap()) break; + delete cgtmp; + order_6--; + } + + if (order_6 < minorder) + error->all(FLERR,"Dispersion PPPMDisp order has been " + "reduced below minorder"); + if (cgtmp) delete cgtmp; + + // adjust g_ewald_6 + + if (!gewaldflag_6 && accuracy_kspace_6 == accuracy_real_6) + adjust_gewald_6(); + + // calculate the final accuracy + + double acc, acc_real, acc_kspace; + final_accuracy_6(acc, acc_real, acc_kspace); + + + // print stats + + int ngrid_max,nfft_both_max,nbuf_max; + MPI_Allreduce(&ngrid_6,&ngrid_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nfft_both_6,&nfft_both_max,1,MPI_INT,MPI_MAX,world); + + if (me == 0) { + #ifdef FFT_SINGLE + const char fft_prec[] = "single"; + #else + const char fft_prec[] = "double"; + #endif + + if (screen) { + fprintf(screen," Dispersion G vector (1/distance)= %g\n",g_ewald_6); + fprintf(screen," Dispersion grid = %d %d %d\n", + nx_pppm_6,ny_pppm_6,nz_pppm_6); + fprintf(screen," Dispersion stencil order = %d\n",order_6); + fprintf(screen," Dispersion estimated absolute " + "RMS force accuracy = %g\n",acc); + fprintf(screen," Dispersion estimated absolute " + "real space RMS force accuracy = %g\n",acc_real); + fprintf(screen," Dispersion estimated absolute " + "kspace RMS force accuracy = %g\n",acc_kspace); + fprintf(screen," Dispersion estimated relative force accuracy = %g\n", + acc/two_charge_force); + fprintf(screen," using %s precision FFTs\n",fft_prec); + fprintf(screen," 3d grid and FFT values/proc dispersion = %d %d\n", + ngrid_max,nfft_both_max); + } + if (logfile) { + fprintf(logfile," Dispersion G vector (1/distance) = %g\n",g_ewald_6); + fprintf(logfile," Dispersion grid = %d %d %d\n", + nx_pppm_6,ny_pppm_6,nz_pppm_6); + fprintf(logfile," Dispersion stencil order = %d\n",order_6); + fprintf(logfile," Dispersion estimated absolute " + "RMS force accuracy = %g\n",acc); + fprintf(logfile," Dispersion estimated absolute " + "real space RMS force accuracy = %g\n",acc_real); + fprintf(logfile," Dispersion estimated absolute " + "kspace RMS force accuracy = %g\n",acc_kspace); + fprintf(logfile," Disperion estimated relative force accuracy = %g\n", + acc/two_charge_force); + fprintf(logfile," using %s precision FFTs\n",fft_prec); + fprintf(logfile," 3d grid and FFT values/proc dispersion = %d %d\n", + ngrid_max,nfft_both_max); + } + } + } + + // allocate K-space dependent memory + + allocate(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + if (function[0]) { + compute_gf_denom(gf_b, order); + compute_rho_coeff(rho_coeff, drho_coeff, order); + cg->ghost_notify(); + cg->setup(); + if (differentiation_flag == 1) + compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order, + nxlo_fft, nylo_fft, nzlo_fft, + nxhi_fft, nyhi_fft, nzhi_fft, + sf_precoeff1, sf_precoeff2, sf_precoeff3, + sf_precoeff4, sf_precoeff5, sf_precoeff6); + } + if (function[1] + function[2] + function[3]) { + compute_gf_denom(gf_b_6, order_6); + compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6); + cg_6->ghost_notify(); + cg_6->setup(); + if (differentiation_flag == 1) + compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, + nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6, + sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6); + } + +} + +/* ---------------------------------------------------------------------- + adjust PPPM coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void PPPMDisp::setup() +{ + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + // compute fkx,fky,fkz for my FFT grid pts + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + //compute the virial coefficients and green functions + if (function[0]){ + + delxinv = nx_pppm/xprd; + delyinv = ny_pppm/yprd; + delzinv = nz_pppm/zprd_slab; + + delvolinv = delxinv*delyinv*delzinv; + + double per; + int i, j, k, n; + + for (i = nxlo_fft; i <= nxhi_fft; i++) { + per = i - nx_pppm*(2*i/nx_pppm); + fkx[i] = unitkx*per; + j = (nx_pppm - i) % nx_pppm; + per = j - nx_pppm*(2*j/nx_pppm); + fkx2[i] = unitkx*per; + } + + for (i = nylo_fft; i <= nyhi_fft; i++) { + per = i - ny_pppm*(2*i/ny_pppm); + fky[i] = unitky*per; + j = (ny_pppm - i) % ny_pppm; + per = j - ny_pppm*(2*j/ny_pppm); + fky2[i] = unitky*per; + } + + for (i = nzlo_fft; i <= nzhi_fft; i++) { + per = i - nz_pppm*(2*i/nz_pppm); + fkz[i] = unitkz*per; + j = (nz_pppm - i) % nz_pppm; + per = j - nz_pppm*(2*j/nz_pppm); + fkz2[i] = unitkz*per; + } + + double sqk,vterm; + double gew2inv = 1/(g_ewald*g_ewald); + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) { + for (j = nylo_fft; j <= nyhi_fft; j++) { + for (i = nxlo_fft; i <= nxhi_fft; i++) { + sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; + if (sqk == 0.0) { + vg[n][0] = 0.0; + vg[n][1] = 0.0; + vg[n][2] = 0.0; + vg[n][3] = 0.0; + vg[n][4] = 0.0; + vg[n][5] = 0.0; + } else { + vterm = -2.0 * (1.0/sqk + 0.25*gew2inv); + vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; + vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; + vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; + vg[n][3] = vterm*fkx[i]*fky[j]; + vg[n][4] = vterm*fkx[i]*fkz[k]; + vg[n][5] = vterm*fky[j]*fkz[k]; + vg2[n][0] = vterm*0.5*(fkx[i]*fky[j] + fkx2[i]*fky2[j]); + vg2[n][1] = vterm*0.5*(fkx[i]*fkz[k] + fkx2[i]*fkz2[k]); + vg2[n][2] = vterm*0.5*(fky[j]*fkz[k] + fky2[j]*fkz2[k]); + } + n++; + } + } + } + compute_gf(); + if (differentiation_flag == 1) compute_sf_coeff(); + } + + if (function[1] + function[2] + function[3]) { + delxinv_6 = nx_pppm_6/xprd; + delyinv_6 = ny_pppm_6/yprd; + delzinv_6 = nz_pppm_6/zprd_slab; + delvolinv_6 = delxinv_6*delyinv_6*delzinv_6; + + double per; + int i, j, k, n; + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + per = i - nx_pppm_6*(2*i/nx_pppm_6); + fkx_6[i] = unitkx*per; + j = (nx_pppm_6 - i) % nx_pppm_6; + per = j - nx_pppm_6*(2*j/nx_pppm_6); + fkx2_6[i] = unitkx*per; + } + for (i = nylo_fft_6; i <= nyhi_fft_6; i++) { + per = i - ny_pppm_6*(2*i/ny_pppm_6); + fky_6[i] = unitky*per; + j = (ny_pppm_6 - i) % ny_pppm_6; + per = j - ny_pppm_6*(2*j/ny_pppm_6); + fky2_6[i] = unitky*per; + } + for (i = nzlo_fft_6; i <= nzhi_fft_6; i++) { + per = i - nz_pppm_6*(2*i/nz_pppm_6); + fkz_6[i] = unitkz*per; + j = (nz_pppm_6 - i) % nz_pppm_6; + per = j - nz_pppm_6*(2*j/nz_pppm_6); + fkz2_6[i] = unitkz*per; + } + double sqk,vterm; + long double erft, expt,nom, denom; + long double b, bs, bt; + double rtpi = sqrt(MY_PI); + double gewinv = 1/g_ewald_6; + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) { + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) { + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + sqk = fkx_6[i]*fkx_6[i] + fky_6[j]*fky_6[j] + fkz_6[k]*fkz_6[k]; + if (sqk == 0.0) { + vg_6[n][0] = 0.0; + vg_6[n][1] = 0.0; + vg_6[n][2] = 0.0; + vg_6[n][3] = 0.0; + vg_6[n][4] = 0.0; + vg_6[n][5] = 0.0; + } else { + b = 0.5*sqrt(sqk)*gewinv; + bs = b*b; + bt = bs*b; + erft = 2*bt*rtpi*erfc(b); + expt = exp(-bs); + nom = erft - 2*bs*expt; + denom = nom + expt; + if (denom == 0) vterm = 3.0/sqk; + else vterm = 3.0*nom/(sqk*denom); + vg_6[n][0] = 1.0 + vterm*fkx_6[i]*fkx_6[i]; + vg_6[n][1] = 1.0 + vterm*fky_6[j]*fky_6[j]; + vg_6[n][2] = 1.0 + vterm*fkz_6[k]*fkz_6[k]; + vg_6[n][3] = vterm*fkx_6[i]*fky_6[j]; + vg_6[n][4] = vterm*fkx_6[i]*fkz_6[k]; + vg_6[n][5] = vterm*fky_6[j]*fkz_6[k]; + vg2_6[n][0] = vterm*0.5*(fkx_6[i]*fky_6[j] + fkx2_6[i]*fky2_6[j]); + vg2_6[n][1] = vterm*0.5*(fkx_6[i]*fkz_6[k] + fkx2_6[i]*fkz2_6[k]); + vg2_6[n][2] = vterm*0.5*(fky_6[j]*fkz_6[k] + fky2_6[j]*fkz2_6[k]); + } + n++; + } + } + } + compute_gf_6(); + if (differentiation_flag == 1) compute_sf_coeff_6(); + } +} + +/* ---------------------------------------------------------------------- + reset local grid arrays and communication stencils + called by fix balance b/c it changed sizes of processor sub-domains +------------------------------------------------------------------------- */ + +void PPPMDisp::setup_grid() +{ + // free all arrays previously allocated + + deallocate(); + deallocate_peratom(); + + // reset portion of global grid that each proc owns + + if (function[0]) + set_fft_parameters(nx_pppm, ny_pppm, nz_pppm, + nxlo_fft, nylo_fft, nzlo_fft, + nxhi_fft, nyhi_fft, nzhi_fft, + nxlo_in, nylo_in, nzlo_in, + nxhi_in, nyhi_in, nzhi_in, + nxlo_out, nylo_out, nzlo_out, + nxhi_out, nyhi_out, nzhi_out, + nlower, nupper, + ngrid, nfft, nfft_both, + shift, shiftone, order); + + if (function[1] + function[2] + function[3]) + set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, + nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, + nxhi_in_6, nyhi_in_6, nzhi_in_6, + nxlo_out_6, nylo_out_6, nzlo_out_6, + nxhi_out_6, nyhi_out_6, nzhi_out_6, + nlower_6, nupper_6, + ngrid_6, nfft_6, nfft_both_6, + shift_6, shiftone_6, order_6); + + // reallocate K-space dependent memory + // check if grid communication is now overlapping if not allowed + // don't invoke allocate_peratom(), compute() will allocate when needed + + allocate(); + + if (function[0]) { + cg->ghost_notify(); + if (overlap_allowed == 0 && cg->ghost_overlap()) + error->all(FLERR,"PPPM grid stencil extends " + "beyond nearest neighbor processor"); + cg->setup(); + } + if (function[1] + function[2] + function[3]) { + cg_6->ghost_notify(); + if (overlap_allowed == 0 && cg_6->ghost_overlap()) + error->all(FLERR,"PPPM grid stencil extends " + "beyond nearest neighbor processor"); + cg_6->setup(); + } + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + if (function[0]) { + compute_gf_denom(gf_b, order); + compute_rho_coeff(rho_coeff, drho_coeff, order); + if (differentiation_flag == 1) + compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order, + nxlo_fft, nylo_fft, nzlo_fft, + nxhi_fft, nyhi_fft, nzhi_fft, + sf_precoeff1, sf_precoeff2, sf_precoeff3, + sf_precoeff4, sf_precoeff5, sf_precoeff6); + } + if (function[1] + function[2] + function[3]) { + compute_gf_denom(gf_b_6, order_6); + compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6); + if (differentiation_flag == 1) + compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, + nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6, + sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6); + } + + // pre-compute volume-dependent coeffs + + setup(); +} + +/* ---------------------------------------------------------------------- + compute the PPPM long-range force, energy, virial +------------------------------------------------------------------------- */ + +void PPPMDisp::compute(int eflag, int vflag) +{ + + int i; + // convert atoms from box to lamda coords + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + if (evflag_atom && !peratom_allocate_flag) { + allocate_peratom(); + if (function[0]) { + cg_peratom->ghost_notify(); + cg_peratom->setup(); + } + if (function[1] + function[2] + function[3]) { + cg_peratom_6->ghost_notify(); + cg_peratom_6->setup(); + } + peratom_allocate_flag = 1; + } + + if (triclinic == 0) boxlo = domain->boxlo; + else { + boxlo = domain->boxlo_lamda; + domain->x2lamda(atom->nlocal); + } + // extend size of per-atom arrays if necessary + + if (atom->nlocal > nmax) { + + if (function[0]) memory->destroy(part2grid); + if (function[1] + function[2] + function[3]) memory->destroy(part2grid_6); + nmax = atom->nmax; + if (function[0]) memory->create(part2grid,nmax,3,"pppm/disp:part2grid"); + if (function[1] + function[2] + function[3]) + memory->create(part2grid_6,nmax,3,"pppm/disp:part2grid_6"); + } + + + energy = 0.0; + energy_1 = 0.0; + energy_6 = 0.0; + if (vflag) for (i = 0; i < 6; i++) virial_6[i] = virial_1[i] = 0.0; + + // find grid points for all my particles + // distribute partcles' charges/dispersion coefficients on the grid + // communication between processors and remapping two fft + // Solution of poissons equation in k-space and backtransformation + // communication between processors + // calculation of forces + + if (function[0]) { + + //perfrom calculations for coulomb interactions only + + particle_map_c(delxinv, delyinv, delzinv, shift, part2grid, nupper, nlower, + nxlo_out, nylo_out, nzlo_out, nxhi_out, nyhi_out, nzhi_out); + + make_rho_c(); + + cg->reverse_comm(this,REVERSE_RHO); + + brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, + density_brick, density_fft, work1,remap); + + if (differentiation_flag == 1) { + + poisson_ad(work1, work2, density_fft, fft1, fft2, + nx_pppm, ny_pppm, nz_pppm, nfft, + nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft, + nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, + energy_1, greensfn, + virial_1, vg,vg2, + u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick); + + cg->forward_comm(this,FORWARD_AD); + + fieldforce_c_ad(); + + if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM); + + } else { + poisson_ik(work1, work2, density_fft, fft1, fft2, + nx_pppm, ny_pppm, nz_pppm, nfft, + nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft, + nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, + energy_1, greensfn, + fkx, fky, fkz,fkx2, fky2, fkz2, + vdx_brick, vdy_brick, vdz_brick, virial_1, vg,vg2, + u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick); + + cg->forward_comm(this, FORWARD_IK); + + fieldforce_c_ik(); + + if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM); + } + if (evflag_atom) fieldforce_c_peratom(); + } + + if (function[1]) { + //perfrom calculations for geometric mixing + particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, + nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); + make_rho_g(); + + + cg_6->reverse_comm(this, REVERSE_RHO_G); + + brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, + density_brick_g, density_fft_g, work1_6,remap_6); + + if (differentiation_flag == 1) { + + poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6, + nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, + energy_6, greensfn_6, + virial_6, vg_6, vg2_6, + u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g); + + cg_6->forward_comm(this,FORWARD_AD_G); + + fieldforce_g_ad(); + + if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G); + + } else { + poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6, + nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, + energy_6, greensfn_6, + fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6, + vdx_brick_g, vdy_brick_g, vdz_brick_g, virial_6, vg_6, vg2_6, + u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g); + + cg_6->forward_comm(this,FORWARD_IK_G); + + fieldforce_g_ik(); + + + if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G); + } + if (evflag_atom) fieldforce_g_peratom(); + } + + if (function[2]) { + //perform calculations for arithmetic mixing + particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, + nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); + make_rho_a(); + + cg_6->reverse_comm(this, REVERSE_RHO_A); + + brick2fft_a(); + + if ( differentiation_flag == 1) { + + poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6, + nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, + energy_6, greensfn_6, + virial_6, vg_6, vg2_6, + u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3); + poisson_2s_ad(density_fft_a0, density_fft_a6, + u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0, + u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6); + poisson_2s_ad(density_fft_a1, density_fft_a5, + u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1, + u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5); + poisson_2s_ad(density_fft_a2, density_fft_a4, + u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2, + u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4); + + cg_6->forward_comm(this, FORWARD_AD_A); + + fieldforce_a_ad(); + + if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A); + + } else { + + poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6, + nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, + energy_6, greensfn_6, + fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6, + vdx_brick_a3, vdy_brick_a3, vdz_brick_a3, virial_6, vg_6, vg2_6, + u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3); + poisson_2s_ik(density_fft_a0, density_fft_a6, + vdx_brick_a0, vdy_brick_a0, vdz_brick_a0, + vdx_brick_a6, vdy_brick_a6, vdz_brick_a6, + u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0, + u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6); + poisson_2s_ik(density_fft_a1, density_fft_a5, + vdx_brick_a1, vdy_brick_a1, vdz_brick_a1, + vdx_brick_a5, vdy_brick_a5, vdz_brick_a5, + u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1, + u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5); + poisson_2s_ik(density_fft_a2, density_fft_a4, + vdx_brick_a2, vdy_brick_a2, vdz_brick_a2, + vdx_brick_a4, vdy_brick_a4, vdz_brick_a4, + u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2, + u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4); + + cg_6->forward_comm(this, FORWARD_IK_A); + + fieldforce_a_ik(); + + if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A); + } + if (evflag_atom) fieldforce_a_peratom(); + } + + if (function[3]) { + //perfrom calculations if no mixing rule applies + particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6, + nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6); + + make_rho_none(); + + cg_6->reverse_comm(this, REVERSE_RHO_NONE); + + brick2fft_none(); + + if (differentiation_flag == 1) { + + int n = 0; + for (int k = 0; kforward_comm(this,FORWARD_AD_NONE); + + fieldforce_none_ad(); + + if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_NONE); + + } else { + int n = 0; + for (int k = 0; kforward_comm(this,FORWARD_IK_NONE); + + fieldforce_none_ik(); + + + if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_NONE); + } + if (evflag_atom) fieldforce_none_peratom(); + } + + // sum energy across procs and add in volume-dependent term + + const double qscale = force->qqrd2e * scale; + if (eflag_global) { + double energy_all; + MPI_Allreduce(&energy_1,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy_1 = energy_all; + MPI_Allreduce(&energy_6,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy_6 = energy_all; + + energy_1 *= 0.5*volume; + energy_6 *= 0.5*volume; + + energy_1 -= g_ewald*qsqsum/MY_PIS + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy_6 += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij + + 1.0/12.0*pow(g_ewald_6,6)*csum; + energy_1 *= qscale; + } + + // sum virial across procs + + if (vflag_global) { + double virial_all[6]; + MPI_Allreduce(virial_1,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; + MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i]; + if (function[1]+function[2]+function[3]){ + double a = MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij; + virial[0] -= a; + virial[1] -= a; + virial[2] -= a; + } + } + + if (eflag_atom) { + if (function[0]) { + double *q = atom->q; + for (i = 0; i < atom->nlocal; i++) { + eatom[i] -= qscale*g_ewald*q[i]*q[i]/MY_PIS + qscale*MY_PI2*q[i]*qsum / (g_ewald*g_ewald*volume); //coulomb self energy correction + } + } + if (function[1] + function[2] + function[3]) { + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + eatom[i] += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp] + + 1.0/12.0*pow(g_ewald_6,6)*cii[tmp]; + } + } + } + + if (vflag_atom) { + if (function[1] + function[2] + function[3]) { + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + for (int n = 0; n < 3; n++) vatom[i][n] -= MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp]; //dispersion self virial correction + } + } + } + + + // 2d slab correction + + if (slabflag) slabcorr(eflag); + if (function[0]) energy += energy_1; + if (function[1] + function[2] + function[3]) energy += energy_6; + + // convert atoms back from lamda to box coords + + if (triclinic) domain->lamda2x(atom->nlocal); +} + +/* ---------------------------------------------------------------------- + initialize coefficients needed for the dispersion density on the grids +------------------------------------------------------------------------- */ + +void PPPMDisp::init_coeffs() // local pair coeffs +{ + int tmp; + int n = atom->ntypes; + int converged; + delete [] B; + if (function[3] + function[2]) { // no mixing rule or arithmetic + if (function[2] && me == 0) { + if (screen) fprintf(screen," Optimizing splitting of Dispersion coefficients\n"); + if (logfile) fprintf(logfile," Optimizing splitting of Dispersion coefficients\n"); + } + // get dispersion coefficients + double **b = (double **) force->pair->extract("B",tmp); + // allocate data for eigenvalue decomposition + double **A; + double **Q; + memory->create(A,n,n,"pppm/disp:A"); + memory->create(Q,n,n,"pppm/disp:Q"); + // fill coefficients to matrix a + for (int i = 1; i <= n; i++) + for (int j = 1; j <= n; j++) + A[i-1][j-1] = b[i][j]; + // transform q to a unity matrix + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + Q[i][j] = 0.0; + for (int i = 0; i < n; i++) + Q[i][i] = 1.0; + // perfrom eigenvalue decomposition with QR algorithm + converged = qr_alg(A,Q,n); + if (function[3] && !converged) { + error->all(FLERR,"Matrix factorization to split dispersion coefficients failed"); + } + // determine number of used eigenvalues + // based on maximum allowed number or cutoff criterion + // sort eigenvalues according to their size with bubble sort + double t; + for (int i = 0; i < n; i++) { + for (int j = 0; j < n-1-i; j++) { + if (fabs(A[j][j]) < fabs(A[j+1][j+1])) { + t = A[j][j]; + A[j][j] = A[j+1][j+1]; + A[j+1][j+1] = t; + for (int k = 0; k < n; k++) { + t = Q[k][j]; + Q[k][j] = Q[k][j+1]; + Q[k][j+1] = t; + } + } + } + } + + // check which eigenvalue is the first that is smaller + // than a specified tolerance + // check how many are maximum allowed by the user + double amax = fabs(A[0][0]); + double acrit = amax*splittol; + double bmax = 0; + double err = 0; + nsplit = 0; + for (int i = 0; i < n; i++) { + if (fabs(A[i][i]) > acrit) nsplit++; + else { + bmax = fabs(A[i][i]); + break; + } + } + + err = bmax/amax; + if (err > 1.0e-4) { + char str[128]; + sprintf(str,"Error in splitting of dispersion coeffs is estimated %g",err); + error->warning(FLERR, str); + } + // set B + B = new double[nsplit*n+nsplit]; + for (int i = 0; i< nsplit; i++) { + B[i] = A[i][i]; + for (int j = 0; j < n; j++) { + B[nsplit*(j+1) + i] = Q[j][i]; + } + } + + nsplit_alloc = nsplit; + if (nsplit%2 == 1) nsplit_alloc = nsplit + 1; + // check if the function should preferably be [1] or [2] or [3] + if (nsplit == 1) { + delete [] B; + function[3] = 0; + function[2] = 0; + function[1] = 1; + if (me == 0) { + if (screen) fprintf(screen," Using geometric mixing for reciprocal space\n"); + if (logfile) fprintf(logfile," Using geometric mixing for reciprocal space\n"); + } + } + if (function[2] && nsplit <= 6) { + if (me == 0) { + if (screen) fprintf(screen," Using %d instead of 7 structure factors\n",nsplit); + if (logfile) fprintf(logfile," Using %d instead of 7 structure factors\n",nsplit); + } + function[3] = 1; + function[2] = 0; + } + if (function[2] && (nsplit > 6)) { + if (me == 0) { + if (screen) fprintf(screen," Using 7 structure factors\n"); + if (logfile) fprintf(logfile," Using 7 structure factors\n"); + } + delete [] B; + } + if (function[3]) { + if (me == 0) { + if (screen) fprintf(screen," Using %d structure factors\n",nsplit); + if (logfile) fprintf(logfile," Using %d structure factors\n",nsplit); + } + if (nsplit > 9) error->warning(FLERR, "Simulations might be very slow because of large number of structure factors"); + } + + memory->destroy(A); + memory->destroy(Q); + } + if (function[1]) { // geometric 1/r^6 + double **b = (double **) force->pair->extract("B",tmp); + B = new double[n+1]; + for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i])); + } + if (function[2]) { // arithmetic 1/r^6 + //cannot use epsilon, because this has not been set yet + double **epsilon = (double **) force->pair->extract("epsilon",tmp); + //cannot use sigma, because this has not been set yet + double **sigma = (double **) force->pair->extract("sigma",tmp); + if (!(epsilon&&sigma)) + error->all(FLERR,"Epsilon or sigma reference not set by pair style in PPPMDisp"); + double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7]; + double c[7] = { + 1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0}; + for (int i=0; i<=n; ++i) { + eps_i = sqrt(epsilon[i][i]); + sigma_i = sigma[i][i]; + sigma_n = 1.0; + for (int j=0; j<7; ++j) { + *(bi++) = sigma_n*eps_i*c[j]*0.25; + sigma_n *= sigma_i; + } + } + } +} + +/* ---------------------------------------------------------------------- + Eigenvalue decomposition of a real, symmetric matrix with the QR + method (includes transpformation to Tridiagonal Matrix + Wilkinson + shift) +------------------------------------------------------------------------- */ + +int PPPMDisp::qr_alg(double **A, double **Q, int n) +{ + int converged = 0; + double an1, an, bn1, d, mue; + // allocate some memory for the required operations + double **A0,**Qi,**C,**D,**E; + // make a copy of A for convergence check + memory->create(A0,n,n,"pppm/disp:A0"); + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + A0[i][j] = A[i][j]; + + // allocate an auxiliary matrix Qi + memory->create(Qi,n,n,"pppm/disp:Qi"); + + // alllocate an auxillary matrices for the matrix multiplication + memory->create(C,n,n,"pppm/disp:C"); + memory->create(D,n,n,"pppm/disp:D"); + memory->create(E,n,n,"pppm/disp:E"); + + // transform Matrix A to Tridiagonal form + hessenberg(A,Q,n); + + // start loop for the matrix factorization + int count = 0; + int countmax = 100000; + while (1) { + // make a Wilkinson shift + an1 = A[n-2][n-2]; + an = A[n-1][n-1]; + bn1 = A[n-2][n-1]; + d = (an1-an)/2; + mue = an + d - copysign(1.,d)*sqrt(d*d + bn1*bn1); + for (int i = 0; i < n; i++) + A[i][i] -= mue; + + // perform a QR factorization for a tridiagonal matrix A + qr_tri(Qi,A,n); + + // update the matrices + mmult(A,Qi,C,n); + mmult(Q,Qi,C,n); + + // backward Wilkinson shift + for (int i = 0; i < n; i++) + A[i][i] += mue; + + // check the convergence + converged = check_convergence(A,Q,A0,C,D,E,n); + if (converged) break; + count = count + 1; + if (count == countmax) break; + } + + // free allocated memory + memory->destroy(Qi); + memory->destroy(A0); + memory->destroy(C); + memory->destroy(D); + memory->destroy(E); + + return converged; +} + +/* ---------------------------------------------------------------------- + Transform a Matrix to Hessenberg form (for symmetric Matrices, the + result will be a tridiagonal matrix) +------------------------------------------------------------------------- */ + +void PPPMDisp::hessenberg(double **A, double **Q, int n) +{ + double r,a,b,c,s,x1,x2; + for (int i = 0; i < n-1; i++) { + for (int j = i+2; j < n; j++) { + // compute coeffs for the rotation matrix + a = A[i+1][i]; + b = A[j][i]; + r = sqrt(a*a + b*b); + c = a/r; + s = b/r; + // update the entries of A with multiplication from the left + for (int k = 0; k < n; k++) { + x1 = A[i+1][k]; + x2 = A[j][k]; + A[i+1][k] = c*x1 + s*x2; + A[j][k] = -s*x1 + c*x2; + } + // update the entries of A and Q with a multiplication from the right + for (int k = 0; k < n; k++) { + x1 = A[k][i+1]; + x2 = A[k][j]; + A[k][i+1] = c*x1 + s*x2; + A[k][j] = -s*x1 + c*x2; + x1 = Q[k][i+1]; + x2 = Q[k][j]; + Q[k][i+1] = c*x1 + s*x2; + Q[k][j] = -s*x1 + c*x2; + } + } + } +} + +/* ---------------------------------------------------------------------- + QR factorization for a tridiagonal matrix; Result of the factorization + is stored in A and Qi +------------------------------------------------------------------------- */ + +void PPPMDisp::qr_tri(double** Qi,double** A,int n) +{ + double r,a,b,c,s,x1,x2; + int j,k,k0,kmax; + // make Qi a unity matrix + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + Qi[i][j] = 0.0; + for (int i = 0; i < n; i++) + Qi[i][i] = 1.0; + // loop over main diagonal and first of diagonal of A + for (int i = 0; i < n-1; i++) { + j = i+1; + // coefficients of the rotation matrix + a = A[i][i]; + b = A[j][i]; + r = sqrt(a*a + b*b); + c = a/r; + s = b/r; + // update the entries of A and Q + k0 = (i-1>0)?i-1:0; //min(i-1,0); + kmax = (i+3A0[i][j])?Bmax:A0[i][j]; //max(Bmax,A0[i][j]); + double epsabs = eps*Bmax; + + // reconstruct the original matrix + // store the diagonal elements in D + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + D[i][j] = 0.0; + for (int i = 0; i < n; i++) + D[i][i] = A[i][i]; + // store matrix Q in E + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + E[i][j] = Q[i][j]; + // E = Q*A + mmult(E,D,C,n); + // store transpose of Q in D + for (int i = 0; i < n; i++) + for (int j = 0; j < n; j++) + D[i][j] = Q[j][i]; + // E = Q*A*Q.t + mmult(E,D,C,n); + + //compare the original matrix and the final matrix + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + diff = A0[i][j] - E[i][j]; + epsmax = (epsmax>fabs(diff))?epsmax:fabs(diff);//max(epsmax,fabs(diff)); + } + } + if (epsmax > epsabs) converged = 0; + return converged; +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMDisp::allocate() +{ + + int (*procneigh)[2] = comm->procneigh; + + if (function[0]) { + memory->create(work1,2*nfft_both,"pppm/disp:work1"); + memory->create(work2,2*nfft_both,"pppm/disp:work2"); + + memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm/disp:fkx"); + memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm/disp:fky"); + memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm/disp:fkz"); + + memory->create1d_offset(fkx2,nxlo_fft,nxhi_fft,"pppm/disp:fkx2"); + memory->create1d_offset(fky2,nylo_fft,nyhi_fft,"pppm/disp:fky2"); + memory->create1d_offset(fkz2,nzlo_fft,nzhi_fft,"pppm/disp:fkz2"); + + + memory->create(gf_b,order,"pppm/disp:gf_b"); + memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm/disp:rho1d"); + memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm/disp:rho_coeff"); + memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm/disp:rho1d"); + memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,"pppm/disp:drho_coeff"); + + memory->create(greensfn,nfft_both,"pppm/disp:greensfn"); + memory->create(vg,nfft_both,6,"pppm/disp:vg"); + memory->create(vg2,nfft_both,3,"pppm/disp:vg2"); + + memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:density_brick"); + if ( differentiation_flag == 1) { + memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:u_brick"); + memory->create(sf_precoeff1,nfft_both,"pppm/disp:sf_precoeff1"); + memory->create(sf_precoeff2,nfft_both,"pppm/disp:sf_precoeff2"); + memory->create(sf_precoeff3,nfft_both,"pppm/disp:sf_precoeff3"); + memory->create(sf_precoeff4,nfft_both,"pppm/disp:sf_precoeff4"); + memory->create(sf_precoeff5,nfft_both,"pppm/disp:sf_precoeff5"); + memory->create(sf_precoeff6,nfft_both,"pppm/disp:sf_precoeff6"); + + } else { + memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:vdx_brick"); + memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:vdy_brick"); + memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:vdz_brick"); + } + memory->create(density_fft,nfft_both,"pppm/disp:density_fft"); + + int tmp; + + fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 0,0,&tmp); + + fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + 0,0,&tmp); + + remap = new Remap(lmp,world, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 1,0,0,FFT_PRECISION); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg = new CommGrid(lmp,world,1,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg = new CommGrid(lmp,world,3,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + } + + if (function[1]) { + memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); + memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); + + memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); + memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); + memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); + + memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); + memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); + memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); + + memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); + memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); + memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); + memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); + memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); + + memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); + memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); + memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); + + memory->create3d_offset(density_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_g"); + if ( differentiation_flag == 1) { + memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g"); + + memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); + memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); + memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); + memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); + memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); + memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); + + } else { + memory->create3d_offset(vdx_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_g"); + memory->create3d_offset(vdy_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_g"); + memory->create3d_offset(vdz_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_g"); + } + memory->create(density_fft_g,nfft_both_6,"pppm/disp:density_fft_g"); + + + int tmp; + + fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 0,0,&tmp); + + fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + 0,0,&tmp); + + remap_6 = new Remap(lmp,world, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 1,0,0,FFT_PRECISION); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_6 = new CommGrid(lmp,world,1,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_6 = new CommGrid(lmp,world,3,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + } + + if (function[2]) { + memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); + memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); + + memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); + memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); + memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); + + memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); + memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); + memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); + + memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); + memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); + memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); + memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); + memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); + + memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); + memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); + memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); + + memory->create3d_offset(density_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a0"); + memory->create3d_offset(density_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a1"); + memory->create3d_offset(density_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a2"); + memory->create3d_offset(density_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a3"); + memory->create3d_offset(density_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a4"); + memory->create3d_offset(density_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a5"); + memory->create3d_offset(density_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a6"); + + memory->create(density_fft_a0,nfft_both_6,"pppm/disp:density_fft_a0"); + memory->create(density_fft_a1,nfft_both_6,"pppm/disp:density_fft_a1"); + memory->create(density_fft_a2,nfft_both_6,"pppm/disp:density_fft_a2"); + memory->create(density_fft_a3,nfft_both_6,"pppm/disp:density_fft_a3"); + memory->create(density_fft_a4,nfft_both_6,"pppm/disp:density_fft_a4"); + memory->create(density_fft_a5,nfft_both_6,"pppm/disp:density_fft_a5"); + memory->create(density_fft_a6,nfft_both_6,"pppm/disp:density_fft_a6"); + + + if ( differentiation_flag == 1 ) { + memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0"); + memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1"); + memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2"); + memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3"); + memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4"); + memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5"); + memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6"); + + memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); + memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); + memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); + memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); + memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); + memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); + + } else { + + memory->create3d_offset(vdx_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a0"); + memory->create3d_offset(vdy_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a0"); + memory->create3d_offset(vdz_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a0"); + + memory->create3d_offset(vdx_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a1"); + memory->create3d_offset(vdy_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a1"); + memory->create3d_offset(vdz_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a1"); + + memory->create3d_offset(vdx_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a2"); + memory->create3d_offset(vdy_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a2"); + memory->create3d_offset(vdz_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a2"); + + memory->create3d_offset(vdx_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a3"); + memory->create3d_offset(vdy_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a3"); + memory->create3d_offset(vdz_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a3"); + + memory->create3d_offset(vdx_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a4"); + memory->create3d_offset(vdy_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a4"); + memory->create3d_offset(vdz_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a4"); + + memory->create3d_offset(vdx_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a5"); + memory->create3d_offset(vdy_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a5"); + memory->create3d_offset(vdz_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a5"); + + memory->create3d_offset(vdx_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a6"); + memory->create3d_offset(vdy_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a6"); + memory->create3d_offset(vdz_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a6"); + } + + + + int tmp; + + fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 0,0,&tmp); + + fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + 0,0,&tmp); + + remap_6 = new Remap(lmp,world, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 1,0,0,FFT_PRECISION); + + // create ghost grid object for rho and electric field communication + + + if (differentiation_flag == 1) + cg_6 = new CommGrid(lmp,world,7,7, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_6 = new CommGrid(lmp,world,21,7, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + } + + if (function[3]) { + memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6"); + memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6"); + + memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6"); + memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6"); + memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6"); + + memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6"); + memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6"); + memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6"); + + memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6"); + memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6"); + memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6"); + memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6"); + memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6"); + + memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6"); + memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6"); + memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6"); + + memory->create4d_offset(density_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_none"); + if ( differentiation_flag == 1) { + memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none"); + + memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6"); + memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6"); + memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6"); + memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6"); + memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6"); + memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6"); + + } else { + memory->create4d_offset(vdx_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_none"); + memory->create4d_offset(vdy_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_none"); + memory->create4d_offset(vdz_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_none"); + } + memory->create(density_fft_none,nsplit_alloc,nfft_both_6,"pppm/disp:density_fft_none"); + + + int tmp; + + fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 0,0,&tmp); + + fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + 0,0,&tmp); + + remap_6 = new Remap(lmp,world, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6, + 1,0,0,FFT_PRECISION); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_6 = new CommGrid(lmp,world,nsplit_alloc,nsplit_alloc, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_6 = new CommGrid(lmp,world,3*nsplit_alloc,nsplit_alloc, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + } + +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order + for per atom calculations +------------------------------------------------------------------------- */ + +void PPPMDisp::allocate_peratom() +{ + + int (*procneigh)[2] = comm->procneigh; + + if (function[0]) { + + if (differentiation_flag != 1) + memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:u_brick"); + + memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v0_brick"); + memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v1_brick"); + memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v2_brick"); + memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v3_brick"); + memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v4_brick"); + memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm/disp:v5_brick"); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_peratom = + new CommGrid(lmp,world,6,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_peratom = + new CommGrid(lmp,world,7,1, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + + } + + + if (function[1]) { + + if ( differentiation_flag != 1 ) + memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g"); + + memory->create3d_offset(v0_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_g"); + memory->create3d_offset(v1_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_g"); + memory->create3d_offset(v2_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_g"); + memory->create3d_offset(v3_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_g"); + memory->create3d_offset(v4_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_g"); + memory->create3d_offset(v5_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_g"); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_peratom_6 = + new CommGrid(lmp,world,6,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_peratom_6 = + new CommGrid(lmp,world,7,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + + } + + if (function[2]) { + + if ( differentiation_flag != 1 ) { + memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0"); + memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1"); + memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2"); + memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3"); + memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4"); + memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5"); + memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6"); + } + + memory->create3d_offset(v0_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a0"); + memory->create3d_offset(v1_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a0"); + memory->create3d_offset(v2_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a0"); + memory->create3d_offset(v3_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a0"); + memory->create3d_offset(v4_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a0"); + memory->create3d_offset(v5_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a0"); + + memory->create3d_offset(v0_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a1"); + memory->create3d_offset(v1_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a1"); + memory->create3d_offset(v2_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a1"); + memory->create3d_offset(v3_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a1"); + memory->create3d_offset(v4_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a1"); + memory->create3d_offset(v5_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a1"); + + memory->create3d_offset(v0_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a2"); + memory->create3d_offset(v1_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a2"); + memory->create3d_offset(v2_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a2"); + memory->create3d_offset(v3_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a2"); + memory->create3d_offset(v4_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a2"); + memory->create3d_offset(v5_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a2"); + + memory->create3d_offset(v0_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a3"); + memory->create3d_offset(v1_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a3"); + memory->create3d_offset(v2_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a3"); + memory->create3d_offset(v3_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a3"); + memory->create3d_offset(v4_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a3"); + memory->create3d_offset(v5_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a3"); + + memory->create3d_offset(v0_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a4"); + memory->create3d_offset(v1_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a4"); + memory->create3d_offset(v2_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a4"); + memory->create3d_offset(v3_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a4"); + memory->create3d_offset(v4_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a4"); + memory->create3d_offset(v5_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a4"); + + memory->create3d_offset(v0_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a5"); + memory->create3d_offset(v1_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a5"); + memory->create3d_offset(v2_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a5"); + memory->create3d_offset(v3_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a5"); + memory->create3d_offset(v4_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a5"); + memory->create3d_offset(v5_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a5"); + + memory->create3d_offset(v0_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a6"); + memory->create3d_offset(v1_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a6"); + memory->create3d_offset(v2_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a6"); + memory->create3d_offset(v3_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a6"); + memory->create3d_offset(v4_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a6"); + memory->create3d_offset(v5_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a6"); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_peratom_6 = + new CommGrid(lmp,world,42,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_peratom_6 = + new CommGrid(lmp,world,49,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + + } + + if (function[3]) { + + if ( differentiation_flag != 1 ) + memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none"); + + memory->create4d_offset(v0_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_none"); + memory->create4d_offset(v1_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_none"); + memory->create4d_offset(v2_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_none"); + memory->create4d_offset(v3_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_none"); + memory->create4d_offset(v4_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_none"); + memory->create4d_offset(v5_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6, + nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_none"); + + // create ghost grid object for rho and electric field communication + + if (differentiation_flag == 1) + cg_peratom_6 = + new CommGrid(lmp,world,6*nsplit_alloc,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + else + cg_peratom_6 = + new CommGrid(lmp,world,7*nsplit_alloc,1, + nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6, + nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + + } +} + + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMDisp::deallocate() +{ + memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy(density_fft); + density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; + density_fft = NULL; + + memory->destroy3d_offset(density_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_g); + density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL; + density_fft_g = NULL; + + memory->destroy3d_offset(density_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a0); + density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL; + density_fft_a0 = NULL; + + memory->destroy3d_offset(density_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a1); + density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL; + density_fft_a1 = NULL; + + memory->destroy3d_offset(density_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a2); + density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL; + density_fft_a2 = NULL; + + memory->destroy3d_offset(density_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a3); + density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL; + density_fft_a3 = NULL; + + memory->destroy3d_offset(density_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a4); + density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL; + density_fft_a4 = NULL; + + memory->destroy3d_offset(density_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a5); + density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL; + density_fft_a5 = NULL; + + memory->destroy3d_offset(density_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdx_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdy_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy3d_offset(vdz_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_a6); + density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL; + density_fft_a6 = NULL; + + memory->destroy4d_offset(density_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy4d_offset(vdx_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy4d_offset(vdy_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy4d_offset(vdz_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6); + memory->destroy(density_fft_none); + density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL; + density_fft_none = NULL; + + memory->destroy(sf_precoeff1); + memory->destroy(sf_precoeff2); + memory->destroy(sf_precoeff3); + memory->destroy(sf_precoeff4); + memory->destroy(sf_precoeff5); + memory->destroy(sf_precoeff6); + sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL; + + memory->destroy(sf_precoeff1_6); + memory->destroy(sf_precoeff2_6); + memory->destroy(sf_precoeff3_6); + memory->destroy(sf_precoeff4_6); + memory->destroy(sf_precoeff5_6); + memory->destroy(sf_precoeff6_6); + sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = sf_precoeff5_6 = sf_precoeff6_6 = NULL; + + memory->destroy(greensfn); + memory->destroy(greensfn_6); + memory->destroy(work1); + memory->destroy(work2); + memory->destroy(work1_6); + memory->destroy(work2_6); + memory->destroy(vg); + memory->destroy(vg2); + memory->destroy(vg_6); + memory->destroy(vg2_6); + greensfn = greensfn_6 = NULL; + work1 = work2 = work1_6 = work2_6 = NULL; + vg = vg2 = vg_6 = vg2_6 = NULL; + + memory->destroy1d_offset(fkx,nxlo_fft); + memory->destroy1d_offset(fky,nylo_fft); + memory->destroy1d_offset(fkz,nzlo_fft); + fkx = fky = fkz = NULL; + + memory->destroy1d_offset(fkx2,nxlo_fft); + memory->destroy1d_offset(fky2,nylo_fft); + memory->destroy1d_offset(fkz2,nzlo_fft); + fkx2 = fky2 = fkz2 = NULL; + + memory->destroy1d_offset(fkx_6,nxlo_fft_6); + memory->destroy1d_offset(fky_6,nylo_fft_6); + memory->destroy1d_offset(fkz_6,nzlo_fft_6); + fkx_6 = fky_6 = fkz_6 = NULL; + + memory->destroy1d_offset(fkx2_6,nxlo_fft_6); + memory->destroy1d_offset(fky2_6,nylo_fft_6); + memory->destroy1d_offset(fkz2_6,nzlo_fft_6); + fkx2_6 = fky2_6 = fkz2_6 = NULL; + + + memory->destroy(gf_b); + memory->destroy2d_offset(rho1d,-order/2); + memory->destroy2d_offset(rho_coeff,(1-order)/2); + memory->destroy2d_offset(drho1d,-order/2); + memory->destroy2d_offset(drho_coeff, (1-order)/2); + gf_b = NULL; + rho1d = rho_coeff = drho1d = drho_coeff = NULL; + + memory->destroy(gf_b_6); + memory->destroy2d_offset(rho1d_6,-order_6/2); + memory->destroy2d_offset(rho_coeff_6,(1-order_6)/2); + memory->destroy2d_offset(drho1d_6,-order_6/2); + memory->destroy2d_offset(drho_coeff_6,(1-order_6)/2); + gf_b_6 = NULL; + rho1d_6 = rho_coeff_6 = drho1d_6 = drho_coeff_6 = NULL; + + delete fft1; + delete fft2; + delete remap; + delete cg; + fft1 = fft2 = NULL; + remap = NULL; + cg = NULL; + + delete fft1_6; + delete fft2_6; + delete remap_6; + delete cg_6; + fft1_6 = fft2_6 = NULL; + remap_6 = NULL; + cg_6 = NULL; +} + + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors and order + for per atom calculations +------------------------------------------------------------------------- */ + +void PPPMDisp::deallocate_peratom() +{ + peratom_allocate_flag = 0; + + memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v0_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v1_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v2_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v3_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v4_brick, nzlo_out, nylo_out, nxlo_out); + memory->destroy3d_offset(v5_brick, nzlo_out, nylo_out, nxlo_out); + u_brick = v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; + + memory->destroy3d_offset(u_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = v4_brick_g = v5_brick_g = NULL; + + memory->destroy3d_offset(u_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = v4_brick_a0 = v5_brick_a0 = NULL; + + memory->destroy3d_offset(u_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = v4_brick_a1 = v5_brick_a1 = NULL; + + memory->destroy3d_offset(u_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = v4_brick_a2 = v5_brick_a2 = NULL; + + memory->destroy3d_offset(u_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = v4_brick_a3 = v5_brick_a3 = NULL; + + memory->destroy3d_offset(u_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = v4_brick_a4 = v5_brick_a4 = NULL; + + memory->destroy3d_offset(u_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = v4_brick_a5 = v5_brick_a5 = NULL; + + memory->destroy3d_offset(u_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v0_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v1_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v2_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v3_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v4_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy3d_offset(v5_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = v4_brick_a6 = v5_brick_a6 = NULL; + + memory->destroy4d_offset(u_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v0_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v1_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v2_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v3_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v4_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + memory->destroy4d_offset(v5_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6); + u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = v4_brick_none = v5_brick_none = NULL; + + delete cg_peratom; + delete cg_peratom_6; + cg_peratom = cg_peratom_6 = NULL; +} + +/* ---------------------------------------------------------------------- + set size of FFT grid (nx,ny,nz_pppm) and g_ewald + for Coulomb interactions +------------------------------------------------------------------------- */ + +void PPPMDisp::set_grid() +{ + double q2 = qsqsum * force->qqrd2e; + + // use xprd,yprd,zprd even if triclinic so grid size is the same + // adjust z dimension for 2d slab PPPM + // 3d PPPM just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired accuracy and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + double h, h_x,h_y,h_z; + bigint natoms = atom->natoms; + + if (!gewaldflag) { + g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); + if (g_ewald >= 1.0) + error->all(FLERR,"KSpace accuracy too large to estimate G vector"); + g_ewald = sqrt(-log(g_ewald)) / cutoff; + } + + // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy + // nz_pppm uses extended zprd_slab instead of zprd + // reduce it until accuracy target is met + + if (!gridflag) { + h = h_x = h_y = h_z = 4.0/g_ewald; + int count = 0; + while (1) { + + // set grid dimension + nx_pppm = static_cast (xprd/h_x); + ny_pppm = static_cast (yprd/h_y); + nz_pppm = static_cast (zprd_slab/h_z); + + if (nx_pppm <= 1) nx_pppm = 2; + if (ny_pppm <= 1) ny_pppm = 2; + if (nz_pppm <= 1) nz_pppm = 2; + + //set local grid dimension + int npey_fft,npez_fft; + if (nz_pppm >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft = 0; + nxhi_fft = nx_pppm - 1; + nylo_fft = me_y*ny_pppm/npey_fft; + nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; + nzlo_fft = me_z*nz_pppm/npez_fft; + nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; + + double qopt = compute_qopt(); + + double dfkspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); + + count++; + + // break loop if the accuracy has been reached or too many loops have been performed + if (dfkspace <= accuracy) break; + if (count > 500) error->all(FLERR, "Could not compute grid size for Coulomb interaction"); + h *= 0.95; + h_x = h_y = h_z = h; + } + } + + // boost grid size until it is factorable + + while (!factorable(nx_pppm)) nx_pppm++; + while (!factorable(ny_pppm)) ny_pppm++; + while (!factorable(nz_pppm)) nz_pppm++; +} + +/* ---------------------------------------------------------------------- + set the FFT parameters +------------------------------------------------------------------------- */ + +void PPPMDisp::set_fft_parameters(int& nx_p,int& ny_p,int& nz_p, + int& nxlo_f,int& nylo_f,int& nzlo_f, + int& nxhi_f,int& nyhi_f,int& nzhi_f, + int& nxlo_i,int& nylo_i,int& nzlo_i, + int& nxhi_i,int& nyhi_i,int& nzhi_i, + int& nxlo_o,int& nylo_o,int& nzlo_o, + int& nxhi_o,int& nyhi_o,int& nzhi_o, + int& nlow, int& nupp, + int& ng, int& nf, int& nfb, + double& sft,double& sftone, int& ord) +{ + // global indices of PPPM grid range from 0 to N-1 + // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of + // global PPPM grid that I own without ghost cells + // for slab PPPM, assign z grid as if it were not extended + + nxlo_i = static_cast (comm->xsplit[comm->myloc[0]] * nx_p); + nxhi_i = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_p) - 1; + + nylo_i = static_cast (comm->ysplit[comm->myloc[1]] * ny_p); + nyhi_i = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_p) - 1; + + nzlo_i = static_cast + (comm->zsplit[comm->myloc[2]] * nz_p/slab_volfactor); + nzhi_i = static_cast + (comm->zsplit[comm->myloc[2]+1] * nz_p/slab_volfactor) - 1; + + + // nlow,nupp = stencil size for mapping particles to PPPM grid + + nlow = -(ord-1)/2; + nupp = ord/2; + + // sft values for particle <-> grid mapping + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + if (ord % 2) sft = OFFSET + 0.5; + else sft = OFFSET; + if (ord % 2) sftone = 0.0; + else sftone = 0.5; + + // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of + // global PPPM grid that my particles can contribute charge to + // effectively nlo_in,nhi_in + ghost cells + // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest + // position a particle in my box can be at + // dist[3] = particle position bound = subbox + skin/2.0 + qdist + // qdist = offset due to TIP4P fictitious charge + // convert to triclinic if necessary + // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping + // for slab PPPM, assign z grid as if it were not extended + + double *prd,*sublo,*subhi; + + if (triclinic == 0) { + prd = domain->prd; + boxlo = domain->boxlo; + sublo = domain->sublo; + subhi = domain->subhi; + } else { + prd = domain->prd_lamda; + boxlo = domain->boxlo_lamda; + sublo = domain->sublo_lamda; + subhi = domain->subhi_lamda; + } + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double dist[3]; + double cuthalf = 0.5*neighbor->skin + qdist; + if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; + else { + dist[0] = cuthalf/domain->prd[0]; + dist[1] = cuthalf/domain->prd[1]; + dist[2] = cuthalf/domain->prd[2]; + } + + int nlo,nhi; + + nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * + nx_p/xprd + sft) - OFFSET; + nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * + nx_p/xprd + sft) - OFFSET; + nxlo_o = nlo + nlow; + nxhi_o = nhi + nupp; + + nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * + ny_p/yprd + sft) - OFFSET; + nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * + ny_p/yprd + sft) - OFFSET; + nylo_o = nlo + nlow; + nyhi_o = nhi + nupp; + + nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * + nz_p/zprd_slab + sft) - OFFSET; + nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * + nz_p/zprd_slab + sft) - OFFSET; + nzlo_o = nlo + nlow; + nzhi_o = nhi + nupp; + + // for slab PPPM, change the grid boundary for processors at +z end + // to include the empty volume between periodically repeating slabs + // for slab PPPM, want charge data communicated from -z proc to +z proc, + // but not vice versa, also want field data communicated from +z proc to + // -z proc, but not vice versa + // this is accomplished by nzhi_i = nzhi_o on +z end (no ghost cells) + + if (slabflag && (comm->myloc[2] == comm->procgrid[2]-1)) { + nzhi_i = nz_p - 1; + nzhi_o = nz_p - 1; + } + + // decomposition of FFT mesh + // global indices range from 0 to N-1 + // proc owns entire x-dimension, clump of columns in y,z dimensions + // npey_fft,npez_fft = # of procs in y,z dims + // if nprocs is small enough, proc can own 1 or more entire xy planes, + // else proc owns 2d sub-blocks of yz plane + // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions + // nlo_fft,nhi_fft = lower/upper limit of the section + // of the global FFT mesh that I own + + int npey_fft,npez_fft; + if (nz_p >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_p,nz_p,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_f = 0; + nxhi_f = nx_p - 1; + nylo_f = me_y*ny_p/npey_fft; + nyhi_f = (me_y+1)*ny_p/npey_fft - 1; + nzlo_f = me_z*nz_p/npez_fft; + nzhi_f = (me_z+1)*nz_p/npez_fft - 1; + + // PPPM grid for this proc, including ghosts + + ng = (nxhi_o-nxlo_o+1) * (nyhi_o-nylo_o+1) * + (nzhi_o-nzlo_o+1); + + // FFT arrays on this proc, without ghosts + // nfft = FFT points in FFT decomposition on this proc + // nfft_brick = FFT points in 3d brick-decomposition on this proc + // nfft_both = greater of 2 values + + nf = (nxhi_f-nxlo_f+1) * (nyhi_f-nylo_f+1) * + (nzhi_f-nzlo_f+1); + int nfft_brick = (nxhi_i-nxlo_i+1) * (nyhi_i-nylo_i+1) * + (nzhi_i-nzlo_i+1); + nfb = MAX(nf,nfft_brick); + +} + +/* ---------------------------------------------------------------------- + check if all factors of n are in list of factors + return 1 if yes, 0 if no +------------------------------------------------------------------------- */ + +int PPPMDisp::factorable(int n) +{ + int i; + + while (n > 1) { + for (i = 0; i < nfactors; i++) { + if (n % factors[i] == 0) { + n /= factors[i]; + break; + } + } + if (i == nfactors) return 0; + } + + return 1; +} + +/* ---------------------------------------------------------------------- + pre-compute Green's function denominator expansion coeffs, Gamma(2n) +------------------------------------------------------------------------- */ +void PPPMDisp::adjust_gewald() +{ + + // Use Newton solver to find g_ewald + + double dx; + + // Begin algorithm + + for (int i = 0; i < LARGE; i++) { + dx = f() / derivf(); + g_ewald -= dx; //Update g_ewald + if (fabs(f()) < SMALL) return; + } + + // Failed to converge + + char str[128]; + sprintf(str, "Could not compute g_ewald"); + error->all(FLERR, str); + +} + +/* ---------------------------------------------------------------------- + Calculate f(x) + ------------------------------------------------------------------------- */ + +double PPPMDisp::f() +{ + double df_rspace, df_kspace; + double q2 = qsqsum * force->qqrd2e; + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + + df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) / + sqrt(natoms*cutoff*xprd*yprd*zprd); + + double qopt = compute_qopt(); + df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); + + return df_rspace - df_kspace; +} + +/* ---------------------------------------------------------------------- + Calculate numerical derivative f'(x) using forward difference + [f(x + h) - f(x)] / h + ------------------------------------------------------------------------- */ + +double PPPMDisp::derivf() +{ + double h = 0.000001; //Derivative step-size + double df,f1,f2,g_ewald_old; + + f1 = f(); + g_ewald_old = g_ewald; + g_ewald += h; + f2 = f(); + g_ewald = g_ewald_old; + df = (f2 - f1)/h; + + return df; +} + +/* ---------------------------------------------------------------------- + Calculate the final estimator for the accuracy +------------------------------------------------------------------------- */ + +double PPPMDisp::final_accuracy() +{ + double df_rspace, df_kspace; + double q2 = qsqsum * force->qqrd2e; + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + df_rspace = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / + sqrt(natoms*cutoff*xprd*yprd*zprd); + + double qopt = compute_qopt(); + + df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab); + + double acc = sqrt(df_rspace*df_rspace + df_kspace*df_kspace); + return acc; +} + +/* ---------------------------------------------------------------------- + Calculate the final estimator for the Dispersion accuracy +------------------------------------------------------------------------- */ + +void PPPMDisp::final_accuracy_6(double& acc, double& acc_real, double& acc_kspace) +{ + double df_rspace, df_kspace; + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + acc_real = lj_rspace_error(); + + double qopt = compute_qopt_6(); + + acc_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); + + acc = sqrt(acc_real*acc_real + acc_kspace*acc_kspace); + return; +} + +/* ---------------------------------------------------------------------- + Compute qopt for Coulomb interactions +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt() +{ + double qopt; + if (differentiation_flag == 1) { + qopt = compute_qopt_ad(); + } else { + qopt = compute_qopt_ik(); + } + double qopt_all; + MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); + return qopt_all; +} + +/* ---------------------------------------------------------------------- + Compute qopt for Dispersion interactions +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt_6() +{ + double qopt; + if (differentiation_flag == 1) { + qopt = compute_qopt_6_ad(); + } else { + qopt = compute_qopt_6_ik(); + } + double qopt_all; + MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world); + return qopt_all; +} + +/* ---------------------------------------------------------------------- + Compute qopt for the ik differentiation scheme and Coulomb interaction +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt_ik() +{ + double qopt = 0.0; + int k,l,m; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int nx,ny,nz,kper,lper,mper; + double sqk, u2; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,sum2, sum3,dot1,dot2; + + int nbx = 2; + int nby = 2; + int nbz = 2; + + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + + sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + + pow(unitkz*mper,2.0); + + if (sqk != 0.0) { + sum1 = 0.0; + sum2 = 0.0; + sum3 = 0.0; + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*pow(qx/g_ewald,2.0)); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm; + if (argx != 0.0) wx = pow(sin(argx)/argx,order); + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*pow(qy/g_ewald,2.0)); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm; + if (argy != 0.0) wy = pow(sin(argy)/argy,order); + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*pow(qz/g_ewald,2.0)); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm; + if (argz != 0.0) wz = pow(sin(argz)/argz,order); + + dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; + dot2 = qx*qx+qy*qy+qz*qz; + u2 = pow(wx*wy*wz,2.0); + sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI; + sum2 += u2*sx*sy*sz*4.0*MY_PI/dot2*dot1; + sum3 += u2; + } + } + } + sum2 *= sum2; + sum3 *= sum3*sqk; + qopt += sum1 -sum2/sum3; + } + } + } + } + return qopt; +} + +/* ---------------------------------------------------------------------- + Compute qopt for the ad differentiation scheme and Coulomb interaction +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt_ad() +{ + double qopt = 0.0; + int k,l,m; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int nx,ny,nz,kper,lper,mper; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double u2, sqk; + double sum1,sum2,sum3,sum4,dot2; + double numerator; + + int nbx = 2; + int nby = 2; + int nbz = 2; + double form = 1.0; + + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + + sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + + pow(unitkz*mper,2.0); + + if (sqk != 0.0) { + numerator = form*12.5663706; + + sum1 = 0.0; + sum2 = 0.0; + sum3 = 0.0; + sum4 = 0.0; + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*pow(qx/g_ewald,2.0)); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm; + if (argx != 0.0) wx = pow(sin(argx)/argx,order); + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*pow(qy/g_ewald,2.0)); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm; + if (argy != 0.0) wy = pow(sin(argy)/argy,order); + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*pow(qz/g_ewald,2.0)); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm; + if (argz != 0.0) wz = pow(sin(argz)/argz,order); + + dot2 = qx*qx+qy*qy+qz*qz; + u2 = pow(wx*wy*wz,2.0); + sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI; + sum2 += sx*sy*sz * u2*4.0*MY_PI; + sum3 += u2; + sum4 += dot2*u2; + } + } + } + sum2 *= sum2; + qopt += sum1 - sum2/(sum3*sum4); + } + } + } + } + return qopt; +} + +/* ---------------------------------------------------------------------- + Compute qopt for the ik differentiation scheme and Dispersion interaction +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt_6_ik() +{ + double qopt = 0.0; + int k,l,m,n; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int nx,ny,nz,kper,lper,mper; + double sqk, u2; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,sum2, sum3; + double dot1,dot2, rtdot2, term; + double inv2ew = 2*g_ewald_6; + inv2ew = 1.0/inv2ew; + double rtpi = sqrt(MY_PI); + + int nbx = 2; + int nby = 2; + int nbz = 2; + + n = 0; + for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { + mper = m - nz_pppm_6*(2*m/nz_pppm_6); + + for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { + lper = l - ny_pppm_6*(2*l/ny_pppm_6); + + for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { + kper = k - nx_pppm_6*(2*k/nx_pppm_6); + + sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + + pow(unitkz*mper,2.0); + + if (sqk != 0.0) { + sum1 = 0.0; + sum2 = 0.0; + sum3 = 0.0; + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm_6*nx); + sx = exp(-qx*qx*inv2ew*inv2ew); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm_6; + if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm_6*ny); + sy = exp(-qy*qy*inv2ew*inv2ew); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm_6; + if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm_6*nz); + sz = exp(-qz*qz*inv2ew*inv2ew); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm_6; + if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); + + dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; + dot2 = qx*qx+qy*qy+qz*qz; + rtdot2 = sqrt(dot2); + term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz + + 2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew); + term *= g_ewald_6*g_ewald_6*g_ewald_6; + u2 = pow(wx*wy*wz,2.0); + sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2; + sum2 += -u2*term*MY_PI*rtpi/3.0*dot1; + sum3 += u2; + } + } + } + sum2 *= sum2; + sum3 *= sum3*sqk; + qopt += sum1 -sum2/sum3; + } + } + } + } + return qopt; +} + +/* ---------------------------------------------------------------------- + Compute qopt for the ad differentiation scheme and Dispersion interaction +------------------------------------------------------------------------- */ + +double PPPMDisp::compute_qopt_6_ad() +{ + double qopt = 0.0; + int k,l,m; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int nx,ny,nz,kper,lper,mper; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double u2, sqk; + double sum1,sum2,sum3,sum4; + double dot2, rtdot2, term; + double inv2ew = 2*g_ewald_6; + inv2ew = 1/inv2ew; + double rtpi = sqrt(MY_PI); + + int nbx = 2; + int nby = 2; + int nbz = 2; + + for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { + mper = m - nz_pppm_6*(2*m/nz_pppm_6); + + for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { + lper = l - ny_pppm_6*(2*l/ny_pppm_6); + + for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { + kper = k - nx_pppm_6*(2*k/nx_pppm_6); + + sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + + pow(unitkz*mper,2.0); + + if (sqk != 0.0) { + + sum1 = 0.0; + sum2 = 0.0; + sum3 = 0.0; + sum4 = 0.0; + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm_6*nx); + sx = exp(-qx*qx*inv2ew*inv2ew); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm_6; + if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm_6*ny); + sy = exp(-qy*qy*inv2ew*inv2ew); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm_6; + if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm_6*nz); + sz = exp(-qz*qz*inv2ew*inv2ew); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm_6; + if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); + + dot2 = qx*qx+qy*qy+qz*qz; + rtdot2 = sqrt(dot2); + term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz + + 2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew); + term *= g_ewald_6*g_ewald_6*g_ewald_6; + u2 = pow(wx*wy*wz,2.0); + sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2; + sum2 += -term*MY_PI*rtpi/3.0 * u2 * dot2; + sum3 += u2; + sum4 += dot2*u2; + } + } + } + sum2 *= sum2; + qopt += sum1 - sum2/(sum3*sum4); + } + } + } + } + return qopt; +} + +/* ---------------------------------------------------------------------- + set size of FFT grid and g_ewald_6 + for Dispersion interactions +------------------------------------------------------------------------- */ + +void PPPMDisp::set_grid_6() +{ + // Calculate csum + if (!csumflag) calc_csum(); + if (!gewaldflag_6) set_init_g6(); + if (!gridflag_6) set_n_pppm_6(); + while (!factorable(nx_pppm_6)) nx_pppm_6++; + while (!factorable(ny_pppm_6)) ny_pppm_6++; + while (!factorable(nz_pppm_6)) nz_pppm_6++; + +} + +/* ---------------------------------------------------------------------- + Calculate the sum of the squared dispersion coefficients and other + related quantities required for the calculations +------------------------------------------------------------------------- */ + +void PPPMDisp::calc_csum() +{ + csumij = 0.0; + csum = 0.0; + + int ntypes = atom->ntypes; + int i,j,k; + + delete [] cii; + cii = new double[ntypes +1]; + for (i = 0; i<=ntypes; i++) cii[i] = 0.0; + delete [] csumi; + csumi = new double[ntypes +1]; + for (i = 0; i<=ntypes; i++) csumi[i] = 0.0; + int *neach = new int[ntypes+1]; + for (i = 0; i<=ntypes; i++) neach[i] = 0; + + //the following variables are needed to distinguish between arithmetic + // and geometric mixing + + double mix1; // scales 20/16 to 4 + int mix2; // shifts the value to the sigma^3 value + int mix3; // shifts the value to the right atom type + if (function[1]) { + for (i = 1; i <= ntypes; i++) + cii[i] = B[i]*B[i]; + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + neach[tmp]++; + csum += B[tmp]*B[tmp]; + } + } + if (function[2]) { + for (i = 1; i <= ntypes; i++) + cii[i] = 64.0/20.0*B[7*i+3]*B[7*i+3]; + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + neach[tmp]++; + csum += 64.0/20.0*B[7*tmp+3]*B[7*tmp+3]; + } + } + if (function[3]) { + for (i = 1; i <= ntypes; i++) + for (j = 0; j < nsplit; j++) + cii[i] += B[j]*B[nsplit*i + j]*B[nsplit*i + j]; + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + neach[tmp]++; + for (j = 0; j < nsplit; j++) + csum += B[j]*B[nsplit*tmp + j]*B[nsplit*tmp + j]; + } + } + + + double tmp2; + MPI_Allreduce(&csum,&tmp2,1,MPI_DOUBLE,MPI_SUM,world); + csum = tmp2; + csumflag = 1; + + int *neach_all = new int[ntypes+1]; + MPI_Allreduce(neach,neach_all,ntypes+1,MPI_INT,MPI_SUM,world); + + // copmute csumij and csumi + double d1, d2; + if (function[1]){ + for (i=1; i<=ntypes; i++) { + for (j=1; j<=ntypes; j++) { + csumi[i] += neach_all[j]*B[i]*B[j]; + d1 = neach_all[i]*B[i]; + d2 = neach_all[j]*B[j]; + csumij += d1*d2; + //csumij += neach_all[i]*neach_all[j]*B[i]*B[j]; + } + } + } + if (function[2]) { + for (i=1; i<=ntypes; i++) { + for (j=1; j<=ntypes; j++) { + for (k=0; k<=6; k++) { + csumi[i] += neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1]; + d1 = neach_all[i]*B[7*i + k]; + d2 = neach_all[j]*B[7*(j+1)-k-1]; + csumij += d1*d2; + //csumij += neach_all[i]*neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1]; + } + } + } + } + if (function[3]) { + for (i=1; i<=ntypes; i++) { + for (j=1; j<=ntypes; j++) { + for (k=0; kall(FLERR, str); + +} + +/* ---------------------------------------------------------------------- + Calculate f(x) for Dispersion interaction + ------------------------------------------------------------------------- */ + +double PPPMDisp::f_6() +{ + double df_rspace, df_kspace; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + bigint natoms = atom->natoms; + + df_rspace = lj_rspace_error(); + + double qopt = compute_qopt_6(); + df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); + + return df_rspace - df_kspace; +} + +/* ---------------------------------------------------------------------- + Calculate numerical derivative f'(x) using forward difference + [f(x + h) - f(x)] / h + ------------------------------------------------------------------------- */ + +double PPPMDisp::derivf_6() +{ + double h = 0.000001; //Derivative step-size + double df,f1,f2,g_ewald_old; + + f1 = f_6(); + g_ewald_old = g_ewald_6; + g_ewald_6 += h; + f2 = f_6(); + g_ewald_6 = g_ewald_old; + df = (f2 - f1)/h; + + return df; +} + + +/* ---------------------------------------------------------------------- + calculate an initial value for g_ewald_6 + ---------------------------------------------------------------------- */ + +void PPPMDisp::set_init_g6() +{ + // use xprd,yprd,zprd even if triclinic so grid size is the same + // adjust z dimension for 2d slab PPPM + // 3d PPPM just uses zprd since slab_volfactor = 1.0 + + // make initial g_ewald estimate + // based on desired error and real space cutoff + + // compute initial value for df_real with g_ewald_6 = 1/cutoff_lj + // if df_real > 0, repeat divide g_ewald_6 by 2 until df_real < 0 + // else, repeat multiply g_ewald_6 by 2 until df_real > 0 + // perform bisection for the last two values of + double df_real; + double g_ewald_old; + double gmin, gmax; + + // check if there is a user defined accuracy + double acc_rspace = accuracy; + if (accuracy_real_6 > 0) acc_rspace = accuracy_real_6; + + g_ewald_6 = 1.0/cutoff_lj; + df_real = lj_rspace_error() - acc_rspace; + int counter = 0; + if (df_real > 0) { + while (df_real > 0 && counter < LARGE) { + counter++; + g_ewald_old = g_ewald_6; + g_ewald_6 *= 2; + df_real = lj_rspace_error() - acc_rspace; + } + } + + if (df_real < 0) { + while (df_real < 0 && counter < LARGE) { + counter++; + g_ewald_old = g_ewald_6; + g_ewald_6 *= 0.5; + df_real = lj_rspace_error() - acc_rspace; + } + } + + if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp"); + + gmin = MIN(g_ewald_6, g_ewald_old); + gmax = MAX(g_ewald_6, g_ewald_old); + g_ewald_6 = gmin + 0.5*(gmax-gmin); + counter = 0; + while (gmax-gmin > SMALL && counter < LARGE) { + counter++; + df_real = lj_rspace_error() -acc_rspace; + if (df_real < 0) gmax = g_ewald_6; + else gmin = g_ewald_6; + g_ewald_6 = gmin + 0.5*(gmax-gmin); + } + if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp"); + +} + +/* ---------------------------------------------------------------------- + calculate nx_pppm, ny_pppm, nz_pppm for dispersion interaction + ---------------------------------------------------------------------- */ + +void PPPMDisp::set_n_pppm_6() +{ + bigint natoms = atom->natoms; + + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + double h, h_x,h_y,h_z; + + double acc_kspace = accuracy; + if (accuracy_kspace_6 > 0.0) acc_kspace = accuracy_kspace_6; + + // initial value for the grid spacing + h = h_x = h_y = h_z = 4.0/g_ewald_6; + // decrease grid spacing untill required precision is obtained + int count = 0; + while(1) { + + // set grid dimension + nx_pppm_6 = static_cast (xprd/h_x); + ny_pppm_6 = static_cast (yprd/h_y); + nz_pppm_6 = static_cast (zprd_slab/h_z); + + if (nx_pppm_6 <= 1) nx_pppm_6 = 2; + if (ny_pppm_6 <= 1) ny_pppm_6 = 2; + if (nz_pppm_6 <= 1) nz_pppm_6 = 2; + + //set local grid dimension + int npey_fft,npez_fft; + if (nz_pppm_6 >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm_6,nz_pppm_6,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft_6 = 0; + nxhi_fft_6 = nx_pppm_6 - 1; + nylo_fft_6 = me_y*ny_pppm_6/npey_fft; + nyhi_fft_6 = (me_y+1)*ny_pppm_6/npey_fft - 1; + nzlo_fft_6 = me_z*nz_pppm_6/npez_fft; + nzhi_fft_6 = (me_z+1)*nz_pppm_6/npez_fft - 1; + + double qopt = compute_qopt_6(); + + double df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab); + + count++; + + // break loop if the accuracy has been reached or too many loops have been performed + if (df_kspace <= acc_kspace) break; + if (count > 500) error->all(FLERR, "Could not compute grid size for Dispersion"); + h *= 0.95; + h_x = h_y = h_z = h; + } +} + +/* ---------------------------------------------------------------------- + calculate the real space error for dispersion interactions + ---------------------------------------------------------------------- */ + +double PPPMDisp::lj_rspace_error() +{ + bigint natoms = atom->natoms; + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + double deltaf; + double rgs = (cutoff_lj*g_ewald_6); + rgs *= rgs; + double rgs_inv = 1.0/rgs; + deltaf = csum/sqrt(natoms*xprd*yprd*zprd_slab*cutoff_lj)*sqrt(MY_PI)*pow(g_ewald_6, 5)* + exp(-rgs)*(1+rgs_inv*(3+rgs_inv*(6+rgs_inv*6))); + return deltaf; +} + + +/* ---------------------------------------------------------------------- + Compyute the modified (hockney-eastwood) coulomb green function + ---------------------------------------------------------------------- */ + +void PPPMDisp::compute_gf() +{ + int k,l,m,n; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int kper,lper,mper; + double snx,sny,snz,snx2,sny2,snz2; + double sqk; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double numerator,denominator; + + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + qz = unitkz*mper; + snz = sin(0.5*qz*zprd_slab/nz_pppm); + snz2 = snz*snz; + sz = exp(-0.25*pow(qz/g_ewald,2.0)); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm; + if (argz != 0.0) wz = pow(sin(argz)/argz,order); + wz *= wz; + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + qy = unitky*lper; + sny = sin(0.5*qy*yprd/ny_pppm); + sny2 = sny*sny; + sy = exp(-0.25*pow(qy/g_ewald,2.0)); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm; + if (argy != 0.0) wy = pow(sin(argy)/argy,order); + wy *= wy; + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + qx = unitkx*kper; + snx = sin(0.5*qx*xprd/nx_pppm); + snx2 = snx*snx; + sx = exp(-0.25*pow(qx/g_ewald,2.0)); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm; + if (argx != 0.0) wx = pow(sin(argx)/argx,order); + wx *= wx; + + sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); + + if (sqk != 0.0) { + numerator = 4.0*MY_PI/sqk; + denominator = gf_denom(snx2,sny2,snz2, gf_b, order); + greensfn[n++] = numerator*sx*sy*sz*wx*wy*wz/denominator; + } else greensfn[n++] = 0.0; + } + } + } +} + +/* ---------------------------------------------------------------------- + compute self force coefficients for ad-differentiation scheme + and Coulomb interaction +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_sf_precoeff(int nxp, int nyp, int nzp, int ord, + int nxlo_ft, int nylo_ft, int nzlo_ft, + int nxhi_ft, int nyhi_ft, int nzhi_ft, + double *sf_pre1, double *sf_pre2, double *sf_pre3, + double *sf_pre4, double *sf_pre5, double *sf_pre6) +{ + + int i,k,l,m,n; + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int nx,ny,nz,kper,lper,mper; + double argx,argy,argz; + double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5]; + double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2; + double u0,u1,u2,u3,u4,u5,u6; + double sum1,sum2,sum3,sum4,sum5,sum6; + + int nb = 2; + + n = 0; + for (m = nzlo_ft; m <= nzhi_ft; m++) { + mper = m - nzp*(2*m/nzp); + + for (l = nylo_ft; l <= nyhi_ft; l++) { + lper = l - nyp*(2*l/nyp); + + for (k = nxlo_ft; k <= nxhi_ft; k++) { + kper = k - nxp*(2*k/nxp); + + sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0; + for (i = -nb; i <= nb; i++) { + + qx0 = unitkx*(kper+nxp*i); + qx1 = unitkx*(kper+nxp*(i+1)); + qx2 = unitkx*(kper+nxp*(i+2)); + wx0[i+2] = 1.0; + wx1[i+2] = 1.0; + wx2[i+2] = 1.0; + argx = 0.5*qx0*xprd/nxp; + if (argx != 0.0) wx0[i+2] = pow(sin(argx)/argx,ord); + argx = 0.5*qx1*xprd/nxp; + if (argx != 0.0) wx1[i+2] = pow(sin(argx)/argx,ord); + argx = 0.5*qx2*xprd/nxp; + if (argx != 0.0) wx2[i+2] = pow(sin(argx)/argx,ord); + + qy0 = unitky*(lper+nyp*i); + qy1 = unitky*(lper+nyp*(i+1)); + qy2 = unitky*(lper+nyp*(i+2)); + wy0[i+2] = 1.0; + wy1[i+2] = 1.0; + wy2[i+2] = 1.0; + argy = 0.5*qy0*yprd/nyp; + if (argy != 0.0) wy0[i+2] = pow(sin(argy)/argy,ord); + argy = 0.5*qy1*yprd/nyp; + if (argy != 0.0) wy1[i+2] = pow(sin(argy)/argy,ord); + argy = 0.5*qy2*yprd/nyp; + if (argy != 0.0) wy2[i+2] = pow(sin(argy)/argy,ord); + + qz0 = unitkz*(mper+nzp*i); + qz1 = unitkz*(mper+nzp*(i+1)); + qz2 = unitkz*(mper+nzp*(i+2)); + wz0[i+2] = 1.0; + wz1[i+2] = 1.0; + wz2[i+2] = 1.0; + argz = 0.5*qz0*zprd_slab/nzp; + if (argz != 0.0) wz0[i+2] = pow(sin(argz)/argz,ord); + argz = 0.5*qz1*zprd_slab/nzp; + if (argz != 0.0) wz1[i+2] = pow(sin(argz)/argz,ord); + argz = 0.5*qz2*zprd_slab/nzp; + if (argz != 0.0) wz2[i+2] = pow(sin(argz)/argz,ord); + } + + for (nx = 0; nx <= 4; nx++) { + for (ny = 0; ny <= 4; ny++) { + for (nz = 0; nz <= 4; nz++) { + u0 = wx0[nx]*wy0[ny]*wz0[nz]; + u1 = wx1[nx]*wy0[ny]*wz0[nz]; + u2 = wx2[nx]*wy0[ny]*wz0[nz]; + u3 = wx0[nx]*wy1[ny]*wz0[nz]; + u4 = wx0[nx]*wy2[ny]*wz0[nz]; + u5 = wx0[nx]*wy0[ny]*wz1[nz]; + u6 = wx0[nx]*wy0[ny]*wz2[nz]; + + sum1 += u0*u1; + sum2 += u0*u2; + sum3 += u0*u3; + sum4 += u0*u4; + sum5 += u0*u5; + sum6 += u0*u6; + } + } + } + + // store values + + sf_pre1[n] = sum1; + sf_pre2[n] = sum2; + sf_pre3[n] = sum3; + sf_pre4[n] = sum4; + sf_pre5[n] = sum5; + sf_pre6[n++] = sum6; + } + } + } +} + +/* ---------------------------------------------------------------------- + Compute the modified (hockney-eastwood) dispersion green function + ---------------------------------------------------------------------- */ + +void PPPMDisp::compute_gf_6() +{ + double *prd; + int k,l,m,n; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int kper,lper,mper; + double sqk; + double snx,sny,snz,snx2,sny2,snz2; + double argx,argy,argz,wx,wy,wz,sx,sy,sz; + double qx,qy,qz; + double rtsqk, term; + double numerator,denominator; + double inv2ew = 2*g_ewald_6; + inv2ew = 1/inv2ew; + double rtpi = sqrt(MY_PI); + + numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0); + + n = 0; + for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { + mper = m - nz_pppm_6*(2*m/nz_pppm_6); + qz = unitkz*mper; + snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6); + snz2 = snz*snz; + sz = exp(-qz*qz*inv2ew*inv2ew); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm_6; + if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); + wz *= wz; + + for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { + lper = l - ny_pppm_6*(2*l/ny_pppm_6); + qy = unitky*lper; + sny = sin(0.5*unitky*lper*yprd/ny_pppm_6); + sny2 = sny*sny; + sy = exp(-qy*qy*inv2ew*inv2ew); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm_6; + if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); + wy *= wy; + + for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { + kper = k - nx_pppm_6*(2*k/nx_pppm_6); + qx = unitkx*kper; + snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6); + snx2 = snx*snx; + sx = exp(-qx*qx*inv2ew*inv2ew); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm_6; + if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); + wx *= wx; + + sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); + + if (sqk != 0.0) { + denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6); + rtsqk = sqrt(sqk); + term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz + + 2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew); + greensfn_6[n++] = numerator*term*wx*wy*wz/denominator; + } else greensfn_6[n++] = 0.0; + } + } + } +} + +/* ---------------------------------------------------------------------- + compute self force coefficients for ad-differentiation scheme + and Coulomb interaction +------------------------------------------------------------------------- */ +void PPPMDisp::compute_sf_coeff() +{ + int i,k,l,m,n; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + for (i = 0; i <= 5; i++) sf_coeff[i] = 0.0; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + for (l = nylo_fft; l <= nyhi_fft; l++) { + for (k = nxlo_fft; k <= nxhi_fft; k++) { + sf_coeff[0] += sf_precoeff1[n]*greensfn[n]; + sf_coeff[1] += sf_precoeff2[n]*greensfn[n]; + sf_coeff[2] += sf_precoeff3[n]*greensfn[n]; + sf_coeff[3] += sf_precoeff4[n]*greensfn[n]; + sf_coeff[4] += sf_precoeff5[n]*greensfn[n]; + sf_coeff[5] += sf_precoeff6[n]*greensfn[n]; + ++n; + } + } + } + + // Compute the coefficients for the self-force correction + + double prex, prey, prez; + prex = prey = prez = MY_PI/volume; + prex *= nx_pppm/xprd; + prey *= ny_pppm/yprd; + prez *= nz_pppm/zprd_slab; + sf_coeff[0] *= prex; + sf_coeff[1] *= prex*2; + sf_coeff[2] *= prey; + sf_coeff[3] *= prey*2; + sf_coeff[4] *= prez; + sf_coeff[5] *= prez*2; + + // communicate values with other procs + + double tmp[6]; + MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world); + for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n]; +} + +/* ---------------------------------------------------------------------- + compute self force coefficients for ad-differentiation scheme + and Dispersion interaction +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_sf_coeff_6() +{ + int i,k,l,m,n; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + for (i = 0; i <= 5; i++) sf_coeff_6[i] = 0.0; + + n = 0; + for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { + for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { + for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { + sf_coeff_6[0] += sf_precoeff1_6[n]*greensfn_6[n]; + sf_coeff_6[1] += sf_precoeff2_6[n]*greensfn_6[n]; + sf_coeff_6[2] += sf_precoeff3_6[n]*greensfn_6[n]; + sf_coeff_6[3] += sf_precoeff4_6[n]*greensfn_6[n]; + sf_coeff_6[4] += sf_precoeff5_6[n]*greensfn_6[n]; + sf_coeff_6[5] += sf_precoeff6_6[n]*greensfn_6[n]; + ++n; + } + } + } + + + // perform multiplication with prefactors + + double prex, prey, prez; + prex = prey = prez = MY_PI/volume; + prex *= nx_pppm_6/xprd; + prey *= ny_pppm_6/yprd; + prez *= nz_pppm_6/zprd_slab; + sf_coeff_6[0] *= prex; + sf_coeff_6[1] *= prex*2; + sf_coeff_6[2] *= prey; + sf_coeff_6[3] *= prey*2; + sf_coeff_6[4] *= prez; + sf_coeff_6[5] *= prez*2; + + // communicate values with other procs + + double tmp[6]; + MPI_Allreduce(sf_coeff_6,tmp,6,MPI_DOUBLE,MPI_SUM,world); + for (n = 0; n < 6; n++) sf_coeff_6[n] = tmp[n]; + +} + +/* ---------------------------------------------------------------------- + denominator for Hockney-Eastwood Green's function + of x,y,z = sin(kx*deltax/2), etc + + inf n-1 + S(n,k) = Sum W(k+pi*j)**2 = Sum b(l)*(z*z)**l + j=-inf l=0 + + = -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x) at z = sin(x) + gf_b = denominator expansion coeffs +------------------------------------------------------------------------- */ + +double PPPMDisp::gf_denom(double x, double y, double z, double *g_b, int ord) +{ + double sx,sy,sz; + sz = sy = sx = 0.0; + for (int l = ord-1; l >= 0; l--) { + sx = g_b[l] + sx*x; + sy = g_b[l] + sy*y; + sz = g_b[l] + sz*z; + } + double s = sx*sy*sz; + return s*s; +} + +/* ---------------------------------------------------------------------- + pre-compute Green's function denominator expansion coeffs, Gamma(2n) +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_gf_denom(double* gf, int ord) +{ + int k,l,m; + + for (l = 1; l < ord; l++) gf[l] = 0.0; + gf[0] = 1.0; + + for (m = 1; m < ord; m++) { + for (l = m; l > 0; l--) + gf[l] = 4.0 * (gf[l]*(l-m)*(l-m-0.5)-gf[l-1]*(l-m-1)*(l-m-1)); + gf[0] = 4.0 * (gf[0]*(l-m)*(l-m-0.5)); + } + + bigint ifact = 1; + for (k = 1; k < 2*ord; k++) ifact *= k; + double gaminv = 1.0/ifact; + for (l = 0; l < ord; l++) gf[l] *= gaminv; +} + +/* ---------------------------------------------------------------------- + ghost-swap to accumulate full density in brick decomposition + remap density from 3d brick decomposition to FFTdecomposition + for coulomb interaction or dispersion interaction with geometric + mixing +------------------------------------------------------------------------- */ + +void PPPMDisp::brick2fft(int nxlo_i, int nylo_i, int nzlo_i, + int nxhi_i, int nyhi_i, int nzhi_i, + FFT_SCALAR*** dbrick, FFT_SCALAR* dfft, FFT_SCALAR* work, + LAMMPS_NS::Remap* rmp) +{ + int n,ix,iy,iz; + + // copy grabs inner portion of density from 3d brick + // remap could be done as pre-stage of FFT, + // but this works optimally on only double values, not complex values + + n = 0; + for (iz = nzlo_i; iz <= nzhi_i; iz++) + for (iy = nylo_i; iy <= nyhi_i; iy++) + for (ix = nxlo_i; ix <= nxhi_i; ix++) + dfft[n++] = dbrick[iz][iy][ix]; + + rmp->perform(dfft,dfft,work); +} + + +/* ---------------------------------------------------------------------- + ghost-swap to accumulate full density in brick decomposition + remap density from 3d brick decomposition to FFTdecomposition + for dispersion with arithmetic mixing rule +------------------------------------------------------------------------- */ + +void PPPMDisp::brick2fft_a() +{ + int n,ix,iy,iz; + + // copy grabs inner portion of density from 3d brick + // remap could be done as pre-stage of FFT, + // but this works optimally on only double values, not complex values + + n = 0; + for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++) + for (iy = nylo_in_6; iy <= nyhi_in_6; iy++) + for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) { + density_fft_a0[n] = density_brick_a0[iz][iy][ix]; + density_fft_a1[n] = density_brick_a1[iz][iy][ix]; + density_fft_a2[n] = density_brick_a2[iz][iy][ix]; + density_fft_a3[n] = density_brick_a3[iz][iy][ix]; + density_fft_a4[n] = density_brick_a4[iz][iy][ix]; + density_fft_a5[n] = density_brick_a5[iz][iy][ix]; + density_fft_a6[n++] = density_brick_a6[iz][iy][ix]; + } + + remap_6->perform(density_fft_a0,density_fft_a0,work1_6); + remap_6->perform(density_fft_a1,density_fft_a1,work1_6); + remap_6->perform(density_fft_a2,density_fft_a2,work1_6); + remap_6->perform(density_fft_a3,density_fft_a3,work1_6); + remap_6->perform(density_fft_a4,density_fft_a4,work1_6); + remap_6->perform(density_fft_a5,density_fft_a5,work1_6); + remap_6->perform(density_fft_a6,density_fft_a6,work1_6); + +} + +/* ---------------------------------------------------------------------- + ghost-swap to accumulate full density in brick decomposition + remap density from 3d brick decomposition to FFTdecomposition + for dispersion with special case +------------------------------------------------------------------------- */ + +void PPPMDisp::brick2fft_none() +{ + int k,n,ix,iy,iz; + + // copy grabs inner portion of density from 3d brick + // remap could be done as pre-stage of FFT, + // but this works optimally on only double values, not complex values + + for (k = 0; kperform(density_fft_none[k],density_fft_none[k],work1_6); +} + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void PPPMDisp::particle_map(double delx, double dely, double delz, + double sft, int** p2g, int nup, int nlow, + int nxlo, int nylo, int nzlo, + int nxhi, int nyhi, int nzhi) +{ + int nx,ny,nz; + + double **x = atom->x; + int nlocal = atom->nlocal; + + int flag = 0; + for (int i = 0; i < nlocal; i++) { + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + nx = static_cast ((x[i][0]-boxlo[0])*delx+sft) - OFFSET; + ny = static_cast ((x[i][1]-boxlo[1])*dely+sft) - OFFSET; + nz = static_cast ((x[i][2]-boxlo[2])*delz+sft) - OFFSET; + + p2g[i][0] = nx; + p2g[i][1] = ny; + p2g[i][2] = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlow < nxlo || nx+nup > nxhi || + ny+nlow < nylo || ny+nup > nyhi || + nz+nlow < nzlo || nz+nup > nzhi) + flag = 1; + } + + if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPMDisp"); +} + + +void PPPMDisp::particle_map_c(double delx, double dely, double delz, + double sft, int** p2g, int nup, int nlow, + int nxlo, int nylo, int nzlo, + int nxhi, int nyhi, int nzhi) +{ + particle_map(delx, dely, delz, sft, p2g, nup, nlow, + nxlo, nylo, nzlo, nxhi, nyhi, nzhi); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMDisp::make_rho_c() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density array + + memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + density_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = dispersion "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid --- geometric mixing +------------------------------------------------------------------------- */ + +void PPPMDisp::make_rho_g() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density array + + memset(&(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + int type; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + type = atom->type[i]; + z0 = delvolinv_6 * B[type]; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + y0 = z0*rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + x0 = y0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + density_brick_g[mz][my][mx] += x0*rho1d_6[0][l]; + } + } + } + } +} + + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = dispersion "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid --- arithmetic mixing +------------------------------------------------------------------------- */ + +void PPPMDisp::make_rho_a() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0,w; + + // clear 3d density array + + memset(&(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + + // loop over my particles, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + int type; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + //do the following for all 4 grids + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + type = atom->type[i]; + z0 = delvolinv_6; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + y0 = z0*rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + x0 = y0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + w = x0*rho1d_6[0][l]; + density_brick_a0[mz][my][mx] += w*B[7*type]; + density_brick_a1[mz][my][mx] += w*B[7*type+1]; + density_brick_a2[mz][my][mx] += w*B[7*type+2]; + density_brick_a3[mz][my][mx] += w*B[7*type+3]; + density_brick_a4[mz][my][mx] += w*B[7*type+4]; + density_brick_a5[mz][my][mx] += w*B[7*type+5]; + density_brick_a6[mz][my][mx] += w*B[7*type+6]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = dispersion "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid --- case when mixing rules don't apply +------------------------------------------------------------------------- */ + +void PPPMDisp::make_rho_none() +{ + int k,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0,w; + + // clear 3d density array + for (k = 0; k < nsplit_alloc; k++) + memset(&(density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + + + // loop over my particles, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + int type; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + //do the following for all 4 grids + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + type = atom->type[i]; + z0 = delvolinv_6; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + y0 = z0*rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + x0 = y0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + w = x0*rho1d_6[0][l]; + for (k = 0; k < nsplit; k++) + density_brick_none[k][mz][my][mx] += w*B[nsplit*type + k]; + } + } + } + } +} + + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ik differentiation +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_ik(FFT_SCALAR* wk1, FFT_SCALAR* wk2, + FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, + int nx_p, int ny_p, int nz_p, int nft, + int nxlo_ft, int nylo_ft, int nzlo_ft, + int nxhi_ft, int nyhi_ft, int nzhi_ft, + int nxlo_i, int nylo_i, int nzlo_i, + int nxhi_i, int nyhi_i, int nzhi_i, + double& egy, double* gfn, + double* kx, double* ky, double* kz, + double* kx2, double* ky2, double* kz2, + FFT_SCALAR*** vx_brick, FFT_SCALAR*** vy_brick, FFT_SCALAR*** vz_brick, + double* vir, double** vcoeff, double** vcoeff2, + FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, + FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) + + +{ + int i,j,k,n; + double eng; + + // transform charge/dispersion density (r -> k) + n = 0; + for (i = 0; i < nft; i++) { + wk1[n++] = dfft[i]; + wk1[n++] = ZEROF; + } + + ft1->compute(wk1,wk1,1); + + // if requested, compute energy and virial contribution + + double scaleinv = 1.0/(nx_p*ny_p*nz_p); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + for (i = 0; i < nft; i++) { + eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); + for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j]; + if (eflag_global) egy += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nft; i++) { + egy += + s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); + n += 2; + } + } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nft; i++) { + wk1[n++] *= scaleinv * gfn[i]; + wk1[n++] *= scaleinv * gfn[i]; + } + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x & y direction gradient + + n = 0; + for (k = nzlo_ft; k <= nzhi_ft; k++) + for (j = nylo_ft; j <= nyhi_ft; j++) + for (i = nxlo_ft; i <= nxhi_ft; i++) { + wk2[n] = 0.5*(kx[i]-kx2[i])*wk1[n+1] + 0.5*(ky[j]-ky2[j])*wk1[n]; + wk2[n+1] = -0.5*(kx[i]-kx2[i])*wk1[n] + 0.5*(ky[j]-ky2[j])*wk1[n+1]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + vx_brick[k][j][i] = wk2[n++]; + vy_brick[k][j][i] = wk2[n++]; + } + + if (!eflag_atom) { + // z direction gradient only + + n = 0; + for (k = nzlo_ft; k <= nzhi_ft; k++) + for (j = nylo_ft; j <= nyhi_ft; j++) + for (i = nxlo_ft; i <= nxhi_ft; i++) { + wk2[n] = kz[k]*wk1[n+1]; + wk2[n+1] = -kz[k]*wk1[n]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + vz_brick[k][j][i] = wk2[n]; + n += 2; + } + + } + + else { + // z direction gradient & per-atom energy + + n = 0; + for (k = nzlo_ft; k <= nzhi_ft; k++) + for (j = nylo_ft; j <= nyhi_ft; j++) + for (i = nxlo_ft; i <= nxhi_ft; i++) { + wk2[n] = 0.5*(kz[k]-kz2[k])*wk1[n+1] - wk1[n+1]; + wk2[n+1] = -0.5*(kz[k]-kz2[k])*wk1[n] + wk1[n]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + vz_brick[k][j][i] = wk2[n++]; + u_pa[k][j][i] = wk2[n++];; + } + } + + if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft, + nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i, + v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa); +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ad differentiation +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_ad(FFT_SCALAR* wk1, FFT_SCALAR* wk2, + FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, + int nx_p, int ny_p, int nz_p, int nft, + int nxlo_ft, int nylo_ft, int nzlo_ft, + int nxhi_ft, int nyhi_ft, int nzhi_ft, + int nxlo_i, int nylo_i, int nzlo_i, + int nxhi_i, int nyhi_i, int nzhi_i, + double& egy, double* gfn, + double* vir, double** vcoeff, double** vcoeff2, + FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, + FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) + + +{ + int i,j,k,n; + double eng; + + // transform charge/dispersion density (r -> k) + n = 0; + for (i = 0; i < nft; i++) { + wk1[n++] = dfft[i]; + wk1[n++] = ZEROF; + } + + ft1->compute(wk1,wk1,1); + + // if requested, compute energy and virial contribution + + double scaleinv = 1.0/(nx_p*ny_p*nz_p); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + for (i = 0; i < nft; i++) { + eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); + for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j]; + if (eflag_global) egy += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nft; i++) { + egy += + s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]); + n += 2; + } + } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nft; i++) { + wk1[n++] *= scaleinv * gfn[i]; + wk1[n++] *= scaleinv * gfn[i]; + } + + + n = 0; + for (k = nzlo_ft; k <= nzhi_ft; k++) + for (j = nylo_ft; j <= nyhi_ft; j++) + for (i = nxlo_ft; i <= nxhi_ft; i++) { + wk2[n] = wk1[n]; + wk2[n+1] = wk1[n+1]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + u_pa[k][j][i] = wk2[n++]; + n++; + } + + + if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft, + nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i, + v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa); + +} + +/* ---------------------------------------------------------------------- + Fourier Transform for per atom virial calculations +------------------------------------------------------------------------- */ + +void PPPMDisp:: poisson_peratom(FFT_SCALAR* wk1, FFT_SCALAR* wk2, LAMMPS_NS::FFT3d* ft2, + double** vcoeff, double** vcoeff2, int nft, + int nxlo_i, int nylo_i, int nzlo_i, + int nxhi_i, int nyhi_i, int nzhi_i, + FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa, + FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa) +{ + //v0 & v1 term + int n, i, j, k; + n = 0; + for (i = 0; i < nft; i++) { + wk2[n] = wk1[n]*vcoeff[i][0] - wk1[n+1]*vcoeff[i][1]; + wk2[n+1] = wk1[n+1]*vcoeff[i][0] + wk1[n]*vcoeff[i][1]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + v0_pa[k][j][i] = wk2[n++]; + v1_pa[k][j][i] = wk2[n++]; + } + + //v2 & v3 term + + n = 0; + for (i = 0; i < nft; i++) { + wk2[n] = wk1[n]*vcoeff[i][2] - wk1[n+1]*vcoeff2[i][0]; + wk2[n+1] = wk1[n+1]*vcoeff[i][2] + wk1[n]*vcoeff2[i][0]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + v2_pa[k][j][i] = wk2[n++]; + v3_pa[k][j][i] = wk2[n++]; + } + + //v4 & v5 term + + n = 0; + for (i = 0; i < nft; i++) { + wk2[n] = wk1[n]*vcoeff2[i][1] - wk1[n+1]*vcoeff2[i][2]; + wk2[n+1] = wk1[n+1]*vcoeff2[i][1] + wk1[n]*vcoeff2[i][2]; + n += 2; + } + + ft2->compute(wk2,wk2,-1); + + n = 0; + for (k = nzlo_i; k <= nzhi_i; k++) + for (j = nylo_i; j <= nyhi_i; j++) + for (i = nxlo_i; i <= nxhi_i; i++) { + v4_pa[k][j][i] = wk2[n++]; + v5_pa[k][j][i] = wk2[n++]; + } + +} + +/* ---------------------------------------------------------------------- + Poisson solver for one mesh with 2 different dispersion densities + for ik scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_2s_ik(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, + FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1, + FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2, + FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, + FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, + FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, + FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) + +{ + int i,j,k,n; + double eng; + + double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); + + // transform charge/dispersion density (r -> k) + // only one tansform required when energies and pressures do not + // need to be calculated + if (eflag_global + vflag_global == 0) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] = dfft_1[i]; + work1_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + } + // two transforms are required when energies and pressures are + // calculated + else { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n] = dfft_1[i]; + work2_6[n++] = ZEROF; + work1_6[n] = ZEROF; + work2_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + fft1_6->compute(work2_6,work2_6,1); + + double s2 = scaleinv*scaleinv; + + if (vflag_global) { + n = 0; + for (i = 0; i < nfft_6; i++) { + eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); + for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; + if (eflag_global)energy_6 += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft_6; i++) { + energy_6 += + 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); + n += 2; + } + } + // unify the two transformed vectors for efficient calculations later + for ( i = 0; i < 2*nfft_6; i++) { + work1_6[i] += work2_6[i]; + } + } + + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] *= scaleinv * greensfn_6[i]; + work1_6[n++] *= scaleinv * greensfn_6[i]; + } + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vxbrick_1[k][j][i] = work2_6[n++]; + vxbrick_2[k][j][i] = work2_6[n++]; + } + + // y direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vybrick_1[k][j][i] = work2_6[n++]; + vybrick_2[k][j][i] = work2_6[n++]; + } + + // z direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vzbrick_1[k][j][i] = work2_6[n++]; + vzbrick_2[k][j][i] = work2_6[n++]; + } + + //Per-atom energy + + if (eflag_atom) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]; + work2_6[n+1] = work1_6[n+1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + u_pa_1[k][j][i] = work2_6[n++]; + u_pa_2[k][j][i] = work2_6[n++]; + } + } + + if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1, + v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2); +} + + +/* ---------------------------------------------------------------------- + Poisson solver for one mesh with 2 different dispersion densities + for ik scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_none_ik(int n1, int n2,FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, + FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1, + FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2, + FFT_SCALAR**** u_pa, FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa, + FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa) +{ + int i,j,k,n; + double eng; + + double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); + + // transform charge/dispersion density (r -> k) + // only one tansform required when energies and pressures do not + // need to be calculated + if (eflag_global + vflag_global == 0) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] = dfft_1[i]; + work1_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + } + + + // two transforms are required when energies and pressures are + // calculated + else { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n] = dfft_1[i]; + work2_6[n++] = ZEROF; + work1_6[n] = ZEROF; + work2_6[n++] = dfft_2[i]; + } + + + fft1_6->compute(work1_6,work1_6,1); + fft1_6->compute(work2_6,work2_6,1); + + double s2 = scaleinv*scaleinv; + + if (vflag_global) { + n = 0; + for (i = 0; i < nfft_6; i++) { + eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); + for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; + if (eflag_global)energy_6 += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft_6; i++) { + energy_6 += + s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); + n += 2; + } + } + // unify the two transformed vectors for efficient calculations later + for ( i = 0; i < 2*nfft_6; i++) { + work1_6[i] += work2_6[i]; + } + } + + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] *= scaleinv * greensfn_6[i]; + work1_6[n++] *= scaleinv * greensfn_6[i]; + } + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vxbrick_1[k][j][i] = B[n1]*work2_6[n++]; + vxbrick_2[k][j][i] = B[n2]*work2_6[n++]; + } + + // y direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vybrick_1[k][j][i] = B[n1]*work2_6[n++]; + vybrick_2[k][j][i] = B[n2]*work2_6[n++]; + } + + // z direction gradient + + n = 0; + for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) + for (j = nylo_fft_6; j <= nyhi_fft_6; j++) + for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) { + work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1]; + work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + vzbrick_1[k][j][i] = B[n1]*work2_6[n++]; + vzbrick_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Per-atom energy + + if (eflag_atom) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]; + work2_6[n+1] = work1_6[n+1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + u_pa[n1][k][j][i] = B[n1]*work2_6[n++]; + u_pa[n2][k][j][i] = B[n2]*work2_6[n++]; + } + } + + if (vflag_atom) poisson_none_peratom(n1,n2, + v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1], + v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]); +} + +/* ---------------------------------------------------------------------- + Poisson solver for one mesh with 2 different dispersion densities + for ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_2s_ad(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, + FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, + FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, + FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, + FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) + +{ + int i,j,k,n; + double eng; + + double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); + + // transform charge/dispersion density (r -> k) + // only one tansform required when energies and pressures do not + // need to be calculated + if (eflag_global + vflag_global == 0) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] = dfft_1[i]; + work1_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + } + // two transforms are required when energies and pressures are + // calculated + else { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n] = dfft_1[i]; + work2_6[n++] = ZEROF; + work1_6[n] = ZEROF; + work2_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + fft1_6->compute(work2_6,work2_6,1); + + double s2 = scaleinv*scaleinv; + + if (vflag_global) { + n = 0; + for (i = 0; i < nfft_6; i++) { + eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); + for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; + if (eflag_global)energy_6 += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft_6; i++) { + energy_6 += + 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]); + n += 2; + } + } + // unify the two transformed vectors for efficient calculations later + for ( i = 0; i < 2*nfft_6; i++) { + work1_6[i] += work2_6[i]; + } + } + + + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] *= scaleinv * greensfn_6[i]; + work1_6[n++] *= scaleinv * greensfn_6[i]; + } + + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]; + work2_6[n+1] = work1_6[n+1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + u_pa_1[k][j][i] = work2_6[n++]; + u_pa_2[k][j][i] = work2_6[n++]; + } + + if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1, + v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2); +} + +/* ---------------------------------------------------------------------- + Poisson solver for one mesh with 2 different dispersion densities + for ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_none_ad(int n1, int n2, FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2, + FFT_SCALAR*** u_pa_1, FFT_SCALAR*** u_pa_2, + FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa, + FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa) +{ + int i,j,k,n; + double eng; + + double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6); + + // transform charge/dispersion density (r -> k) + // only one tansform required when energies and pressures do not + // need to be calculated + if (eflag_global + vflag_global == 0) { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] = dfft_1[i]; + work1_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + } + // two transforms are required when energies and pressures are + // calculated + else { + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n] = dfft_1[i]; + work2_6[n++] = ZEROF; + work1_6[n] = ZEROF; + work2_6[n++] = dfft_2[i]; + } + + fft1_6->compute(work1_6,work1_6,1); + fft1_6->compute(work2_6,work2_6,1); + + double s2 = scaleinv*scaleinv; + + if (vflag_global) { + n = 0; + for (i = 0; i < nfft_6; i++) { + eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); + for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j]; + if (eflag_global)energy_6 += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft_6; i++) { + energy_6 += + s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1])); + n += 2; + } + } + // unify the two transformed vectors for efficient calculations later + for ( i = 0; i < 2*nfft_6; i++) { + work1_6[i] += work2_6[i]; + } + } + + + n = 0; + for (i = 0; i < nfft_6; i++) { + work1_6[n++] *= scaleinv * greensfn_6[i]; + work1_6[n++] *= scaleinv * greensfn_6[i]; + } + + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]; + work2_6[n+1] = work1_6[n+1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + u_pa_1[k][j][i] = B[n1]*work2_6[n++]; + u_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + if (vflag_atom) poisson_none_peratom(n1,n2, + v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1], + v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]); +} + +/* ---------------------------------------------------------------------- + Fourier Transform for per atom virial calculations +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_2s_peratom(FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, + FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, + FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, + FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) +{ + //Compute first virial term v0 + int n, i, j, k; + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][0]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][0]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v0_pa_1[k][j][i] = work2_6[n++]; + v0_pa_2[k][j][i] = work2_6[n++]; + } + + //Compute second virial term v1 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][1]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v1_pa_1[k][j][i] = work2_6[n++]; + v1_pa_2[k][j][i] = work2_6[n++]; + } + + //Compute third virial term v2 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][2]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][2]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v2_pa_1[k][j][i] = work2_6[n++]; + v2_pa_2[k][j][i] = work2_6[n++]; + } + + //Compute fourth virial term v3 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][0]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][0]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v3_pa_1[k][j][i] = work2_6[n++]; + v3_pa_2[k][j][i] = work2_6[n++]; + } + + //Compute fifth virial term v4 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][1]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v4_pa_1[k][j][i] = work2_6[n++]; + v4_pa_2[k][j][i] = work2_6[n++]; + } + + //Compute last virial term v5 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][2]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][2]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v5_pa_1[k][j][i] = work2_6[n++]; + v5_pa_2[k][j][i] = work2_6[n++]; + } +} + +/* ---------------------------------------------------------------------- + Fourier Transform for per atom virial calculations +------------------------------------------------------------------------- */ + +void PPPMDisp::poisson_none_peratom(int n1, int n2, + FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1, + FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1, + FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2, + FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2) +{ + //Compute first virial term v0 + int n, i, j, k; + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][0]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][0]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v0_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v0_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Compute second virial term v1 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][1]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v1_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v1_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Compute third virial term v2 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg_6[i][2]; + work2_6[n+1] = work1_6[n+1]*vg_6[i][2]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v2_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v2_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Compute fourth virial term v3 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][0]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][0]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v3_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v3_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Compute fifth virial term v4 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][1]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][1]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v4_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v4_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } + + //Compute last virial term v5 + + n = 0; + for (i = 0; i < nfft_6; i++) { + work2_6[n] = work1_6[n]*vg2_6[i][2]; + work2_6[n+1] = work1_6[n+1]*vg2_6[i][2]; + n += 2; + } + + fft2_6->compute(work2_6,work2_6,-1); + + n = 0; + for (k = nzlo_in_6; k <= nzhi_in_6; k++) + for (j = nylo_in_6; j <= nyhi_in_6; j++) + for (i = nxlo_in_6; i <= nxhi_in_6; i++) { + v5_pa_1[k][j][i] = B[n1]*work2_6[n++]; + v5_pa_2[k][j][i] = B[n2]*work2_6[n++]; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles + for ik scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_c_ik() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + + const double qfactor = force->qqrd2e * scale * q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + if (slabflag != 2) f[i][2] += qfactor*ekz; + } +} +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles + for ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_c_ad() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz; + FFT_SCALAR ekx,eky,ekz; + double s1,s2,s3; + double sf = 0.0; + + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double hx_inv = nx_pppm/xprd; + double hy_inv = ny_pppm/yprd; + double hz_inv = nz_pppm/zprd_slab; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); + compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; + eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; + ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx]; + } + } + } + ekx *= hx_inv; + eky *= hy_inv; + ekz *= hz_inv; + // convert E-field to force and substract self forces + const double qfactor = force->qqrd2e * scale; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + sf = sf_coeff[0]*sin(2*MY_PI*s1); + sf += sf_coeff[1]*sin(4*MY_PI*s1); + sf *= 2*q[i]*q[i]; + f[i][0] += qfactor*(ekx*q[i] - sf); + + sf = sf_coeff[2]*sin(2*MY_PI*s2); + sf += sf_coeff[3]*sin(4*MY_PI*s2); + sf *= 2*q[i]*q[i]; + f[i][1] += qfactor*(eky*q[i] - sf); + + + sf = sf_coeff[4]*sin(2*MY_PI*s3); + sf += sf_coeff[5]*sin(4*MY_PI*s3); + sf *= 2*q[i]*q[i]; + if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_c_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); + + u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + if (eflag_atom) u_pa += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + // convert E-field to force + + const double qfactor = 0.5*force->qqrd2e * scale * q[i]; + + if (eflag_atom) eatom[i] += u_pa*qfactor; + if (vflag_atom) { + vatom[i][0] += v0*qfactor; + vatom[i][1] += v1*qfactor; + vatom[i][2] += v2*qfactor; + vatom[i][3] += v3*qfactor; + vatom[i][4] += v4*qfactor; + vatom[i][5] += v5*qfactor; + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for geometric mixing rule +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_g_ik() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + + ekx = eky = ekz = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + ekx -= x0*vdx_brick_g[mz][my][mx]; + eky -= x0*vdy_brick_g[mz][my][mx]; + ekz -= x0*vdz_brick_g[mz][my][mx]; + } + } + } + + // convert E-field to force + type = atom->type[i]; + lj = B[type]; + f[i][0] += lj*ekx; + f[i][1] += lj*eky; + if (slabflag != 2) f[i][2] += lj*ekz; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for geometric mixing rule for ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_g_ad() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz; + FFT_SCALAR ekx,eky,ekz; + double s1,s2,s3; + double sf = 0.0; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double hx_inv = nx_pppm_6/xprd; + double hy_inv = ny_pppm_6/yprd; + double hz_inv = nz_pppm_6/zprd_slab; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj; + + int nlocal = atom->nlocal; + + + for (i = 0; i < nlocal; i++) { + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); + + + ekx = eky = ekz = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + ekx += drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx]; + eky += rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx]; + ekz += rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]*u_brick_g[mz][my][mx]; + } + } + } + ekx *= hx_inv; + eky *= hy_inv; + ekz *= hz_inv; + + // convert E-field to force + type = atom->type[i]; + lj = B[type]; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + + sf = sf_coeff_6[0]*sin(2*MY_PI*s1); + sf += sf_coeff_6[1]*sin(4*MY_PI*s1); + sf *= 2*lj*lj; + f[i][0] += ekx*lj - sf; + + sf = sf_coeff_6[2]*sin(2*MY_PI*s2); + sf += sf_coeff_6[3]*sin(4*MY_PI*s2); + sf *= 2*lj*lj; + f[i][1] += eky*lj - sf; + + + sf = sf_coeff_6[4]*sin(2*MY_PI*s3); + sf += sf_coeff_6[5]*sin(4*MY_PI*s3); + sf *= 2*lj*lj; + if (slabflag != 2) f[i][2] += ekz*lj - sf; + + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for geometric mixing rule for per atom quantities +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_g_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + int type; + double lj; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + + u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + if (eflag_atom) u_pa += x0*u_brick_g[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick_g[mz][my][mx]; + v1 += x0*v1_brick_g[mz][my][mx]; + v2 += x0*v2_brick_g[mz][my][mx]; + v3 += x0*v3_brick_g[mz][my][mx]; + v4 += x0*v4_brick_g[mz][my][mx]; + v5 += x0*v5_brick_g[mz][my][mx]; + } + } + } + } + + // convert E-field to force + type = atom->type[i]; + lj = B[type]*0.5; + + if (eflag_atom) eatom[i] += u_pa*lj; + if (vflag_atom) { + vatom[i][0] += v0*lj; + vatom[i][1] += v1*lj; + vatom[i][2] += v2*lj; + vatom[i][3] += v3*lj; + vatom[i][4] += v4*lj; + vatom[i][5] += v5*lj; + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule and ik scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_a_ik() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; + FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; + FFT_SCALAR ekx6, eky6, ekz6; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj0, lj1, lj2, lj3, lj4, lj5, lj6; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + ekx0 = eky0 = ekz0 = ZEROF; + ekx1 = eky1 = ekz1 = ZEROF; + ekx2 = eky2 = ekz2 = ZEROF; + ekx3 = eky3 = ekz3 = ZEROF; + ekx4 = eky4 = ekz4 = ZEROF; + ekx5 = eky5 = ekz5 = ZEROF; + ekx6 = eky6 = ekz6 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + ekx0 -= x0*vdx_brick_a0[mz][my][mx]; + eky0 -= x0*vdy_brick_a0[mz][my][mx]; + ekz0 -= x0*vdz_brick_a0[mz][my][mx]; + ekx1 -= x0*vdx_brick_a1[mz][my][mx]; + eky1 -= x0*vdy_brick_a1[mz][my][mx]; + ekz1 -= x0*vdz_brick_a1[mz][my][mx]; + ekx2 -= x0*vdx_brick_a2[mz][my][mx]; + eky2 -= x0*vdy_brick_a2[mz][my][mx]; + ekz2 -= x0*vdz_brick_a2[mz][my][mx]; + ekx3 -= x0*vdx_brick_a3[mz][my][mx]; + eky3 -= x0*vdy_brick_a3[mz][my][mx]; + ekz3 -= x0*vdz_brick_a3[mz][my][mx]; + ekx4 -= x0*vdx_brick_a4[mz][my][mx]; + eky4 -= x0*vdy_brick_a4[mz][my][mx]; + ekz4 -= x0*vdz_brick_a4[mz][my][mx]; + ekx5 -= x0*vdx_brick_a5[mz][my][mx]; + eky5 -= x0*vdy_brick_a5[mz][my][mx]; + ekz5 -= x0*vdz_brick_a5[mz][my][mx]; + ekx6 -= x0*vdx_brick_a6[mz][my][mx]; + eky6 -= x0*vdy_brick_a6[mz][my][mx]; + ekz6 -= x0*vdz_brick_a6[mz][my][mx]; + } + } + } + // convert D-field to force + type = atom->type[i]; + lj0 = B[7*type+6]; + lj1 = B[7*type+5]; + lj2 = B[7*type+4]; + lj3 = B[7*type+3]; + lj4 = B[7*type+2]; + lj5 = B[7*type+1]; + lj6 = B[7*type]; + f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6; + f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6; + if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule for the ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_a_ad() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; + FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; + FFT_SCALAR ekx6, eky6, ekz6; + + double s1,s2,s3; + double sf = 0.0; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double hx_inv = nx_pppm_6/xprd; + double hy_inv = ny_pppm_6/yprd; + double hz_inv = nz_pppm_6/zprd_slab; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj0, lj1, lj2, lj3, lj4, lj5, lj6; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); + + ekx0 = eky0 = ekz0 = ZEROF; + ekx1 = eky1 = ekz1 = ZEROF; + ekx2 = eky2 = ekz2 = ZEROF; + ekx3 = eky3 = ekz3 = ZEROF; + ekx4 = eky4 = ekz4 = ZEROF; + ekx5 = eky5 = ekz5 = ZEROF; + ekx6 = eky6 = ekz6 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]; + y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]; + z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]; + + ekx0 += x0*u_brick_a0[mz][my][mx]; + eky0 += y0*u_brick_a0[mz][my][mx]; + ekz0 += z0*u_brick_a0[mz][my][mx]; + + ekx1 += x0*u_brick_a1[mz][my][mx]; + eky1 += y0*u_brick_a1[mz][my][mx]; + ekz1 += z0*u_brick_a1[mz][my][mx]; + + ekx2 += x0*u_brick_a2[mz][my][mx]; + eky2 += y0*u_brick_a2[mz][my][mx]; + ekz2 += z0*u_brick_a2[mz][my][mx]; + + ekx3 += x0*u_brick_a3[mz][my][mx]; + eky3 += y0*u_brick_a3[mz][my][mx]; + ekz3 += z0*u_brick_a3[mz][my][mx]; + + ekx4 += x0*u_brick_a4[mz][my][mx]; + eky4 += y0*u_brick_a4[mz][my][mx]; + ekz4 += z0*u_brick_a4[mz][my][mx]; + + ekx5 += x0*u_brick_a5[mz][my][mx]; + eky5 += y0*u_brick_a5[mz][my][mx]; + ekz5 += z0*u_brick_a5[mz][my][mx]; + + ekx6 += x0*u_brick_a6[mz][my][mx]; + eky6 += y0*u_brick_a6[mz][my][mx]; + ekz6 += z0*u_brick_a6[mz][my][mx]; + } + } + } + + ekx0 *= hx_inv; + eky0 *= hy_inv; + ekz0 *= hz_inv; + + ekx1 *= hx_inv; + eky1 *= hy_inv; + ekz1 *= hz_inv; + + ekx2 *= hx_inv; + eky2 *= hy_inv; + ekz2 *= hz_inv; + + ekx3 *= hx_inv; + eky3 *= hy_inv; + ekz3 *= hz_inv; + + ekx4 *= hx_inv; + eky4 *= hy_inv; + ekz4 *= hz_inv; + + ekx5 *= hx_inv; + eky5 *= hy_inv; + ekz5 *= hz_inv; + + ekx6 *= hx_inv; + eky6 *= hy_inv; + ekz6 *= hz_inv; + + // convert D-field to force + type = atom->type[i]; + lj0 = B[7*type+6]; + lj1 = B[7*type+5]; + lj2 = B[7*type+4]; + lj3 = B[7*type+3]; + lj4 = B[7*type+2]; + lj5 = B[7*type+1]; + lj6 = B[7*type]; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + + sf = sf_coeff_6[0]*sin(2*MY_PI*s1); + sf += sf_coeff_6[1]*sin(4*MY_PI*s1); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf; + + sf = sf_coeff_6[2]*sin(2*MY_PI*s2); + sf += sf_coeff_6[3]*sin(4*MY_PI*s2); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf; + + sf = sf_coeff_6[4]*sin(2*MY_PI*s3); + sf += sf_coeff_6[5]*sin(4*MY_PI*s3); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule for per atom quantities +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_a_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u_pa0,v00,v10,v20,v30,v40,v50; + FFT_SCALAR u_pa1,v01,v11,v21,v31,v41,v51; + FFT_SCALAR u_pa2,v02,v12,v22,v32,v42,v52; + FFT_SCALAR u_pa3,v03,v13,v23,v33,v43,v53; + FFT_SCALAR u_pa4,v04,v14,v24,v34,v44,v54; + FFT_SCALAR u_pa5,v05,v15,v25,v35,v45,v55; + FFT_SCALAR u_pa6,v06,v16,v26,v36,v46,v56; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + int type; + double lj0, lj1, lj2, lj3, lj4, lj5, lj6; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + + u_pa0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF; + u_pa1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF; + u_pa2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF; + u_pa3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF; + u_pa4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF; + u_pa5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF; + u_pa6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + if (eflag_atom) { + u_pa0 += x0*u_brick_a0[mz][my][mx]; + u_pa1 += x0*u_brick_a1[mz][my][mx]; + u_pa2 += x0*u_brick_a2[mz][my][mx]; + u_pa3 += x0*u_brick_a3[mz][my][mx]; + u_pa4 += x0*u_brick_a4[mz][my][mx]; + u_pa5 += x0*u_brick_a5[mz][my][mx]; + u_pa6 += x0*u_brick_a6[mz][my][mx]; + } + if (vflag_atom) { + v00 += x0*v0_brick_a0[mz][my][mx]; + v10 += x0*v1_brick_a0[mz][my][mx]; + v20 += x0*v2_brick_a0[mz][my][mx]; + v30 += x0*v3_brick_a0[mz][my][mx]; + v40 += x0*v4_brick_a0[mz][my][mx]; + v50 += x0*v5_brick_a0[mz][my][mx]; + v01 += x0*v0_brick_a1[mz][my][mx]; + v11 += x0*v1_brick_a1[mz][my][mx]; + v21 += x0*v2_brick_a1[mz][my][mx]; + v31 += x0*v3_brick_a1[mz][my][mx]; + v41 += x0*v4_brick_a1[mz][my][mx]; + v51 += x0*v5_brick_a1[mz][my][mx]; + v02 += x0*v0_brick_a2[mz][my][mx]; + v12 += x0*v1_brick_a2[mz][my][mx]; + v22 += x0*v2_brick_a2[mz][my][mx]; + v32 += x0*v3_brick_a2[mz][my][mx]; + v42 += x0*v4_brick_a2[mz][my][mx]; + v52 += x0*v5_brick_a2[mz][my][mx]; + v03 += x0*v0_brick_a3[mz][my][mx]; + v13 += x0*v1_brick_a3[mz][my][mx]; + v23 += x0*v2_brick_a3[mz][my][mx]; + v33 += x0*v3_brick_a3[mz][my][mx]; + v43 += x0*v4_brick_a3[mz][my][mx]; + v53 += x0*v5_brick_a3[mz][my][mx]; + v04 += x0*v0_brick_a4[mz][my][mx]; + v14 += x0*v1_brick_a4[mz][my][mx]; + v24 += x0*v2_brick_a4[mz][my][mx]; + v34 += x0*v3_brick_a4[mz][my][mx]; + v44 += x0*v4_brick_a4[mz][my][mx]; + v54 += x0*v5_brick_a4[mz][my][mx]; + v05 += x0*v0_brick_a5[mz][my][mx]; + v15 += x0*v1_brick_a5[mz][my][mx]; + v25 += x0*v2_brick_a5[mz][my][mx]; + v35 += x0*v3_brick_a5[mz][my][mx]; + v45 += x0*v4_brick_a5[mz][my][mx]; + v55 += x0*v5_brick_a5[mz][my][mx]; + v06 += x0*v0_brick_a6[mz][my][mx]; + v16 += x0*v1_brick_a6[mz][my][mx]; + v26 += x0*v2_brick_a6[mz][my][mx]; + v36 += x0*v3_brick_a6[mz][my][mx]; + v46 += x0*v4_brick_a6[mz][my][mx]; + v56 += x0*v5_brick_a6[mz][my][mx]; + } + } + } + } + // convert D-field to force + type = atom->type[i]; + lj0 = B[7*type+6]*0.5; + lj1 = B[7*type+5]*0.5; + lj2 = B[7*type+4]*0.5; + lj3 = B[7*type+3]*0.5; + lj4 = B[7*type+2]*0.5; + lj5 = B[7*type+1]*0.5; + lj6 = B[7*type]*0.5; + + + if (eflag_atom) + eatom[i] += u_pa0*lj0 + u_pa1*lj1 + u_pa2*lj2 + + u_pa3*lj3 + u_pa4*lj4 + u_pa5*lj5 + u_pa6*lj6; + if (vflag_atom) { + vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 + + v04*lj4 + v05*lj5 + v06*lj6; + vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 + + v14*lj4 + v15*lj5 + v16*lj6; + vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 + + v24*lj4 + v25*lj5 + v26*lj6; + vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 + + v34*lj4 + v35*lj5 + v36*lj6; + vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 + + v44*lj4 + v45*lj5 + v46*lj6; + vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 + + v54*lj4 + v55*lj5 + v56*lj6; + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule and ik scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_none_ik() +{ + int i,k,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR *ekx, *eky, *ekz; + + ekx = new FFT_SCALAR[nsplit]; + eky = new FFT_SCALAR[nsplit]; + ekz = new FFT_SCALAR[nsplit]; + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + for (k = 0; k < nsplit; k++) + ekx[k] = eky[k] = ekz[k] = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + for (k = 0; k < nsplit; k++) { + ekx[k] -= x0*vdx_brick_none[k][mz][my][mx]; + eky[k] -= x0*vdy_brick_none[k][mz][my][mx]; + ekz[k] -= x0*vdz_brick_none[k][mz][my][mx]; + } + } + } + } + // convert D-field to force + type = atom->type[i]; + for (k = 0; k < nsplit; k++) { + lj = B[nsplit*type + k]; + f[i][0] += lj*ekx[k]; + f[i][1] +=lj*eky[k]; + if (slabflag != 2) f[i][2] +=lj*ekz[k]; + } + } + + delete [] ekx; + delete [] eky; + delete [] ekz; +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule for the ad scheme +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_none_ad() +{ + int i,k,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR *ekx, *eky, *ekz; + + ekx = new FFT_SCALAR[nsplit]; + eky = new FFT_SCALAR[nsplit]; + ekz = new FFT_SCALAR[nsplit]; + + + double s1,s2,s3; + double sf1,sf2,sf3; + double sf = 0.0; + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double hx_inv = nx_pppm_6/xprd; + double hy_inv = ny_pppm_6/yprd; + double hz_inv = nz_pppm_6/zprd_slab; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + double **f = atom->f; + int type; + double lj; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6); + + for (k = 0; k < nsplit; k++) + ekx[k] = eky[k] = ekz[k] = ZEROF; + + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]; + y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]; + z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]; + + for (k = 0; k < nsplit; k++) { + ekx[k] += x0*u_brick_none[k][mz][my][mx]; + eky[k] += y0*u_brick_none[k][mz][my][mx]; + ekz[k] += z0*u_brick_none[k][mz][my][mx]; + } + } + } + } + + for (k = 0; k < nsplit; k++) { + ekx[k] *= hx_inv; + eky[k] *= hy_inv; + ekz[k] *= hz_inv; + } + + // convert D-field to force + type = atom->type[i]; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + + sf1 = sf_coeff_6[0]*sin(2*MY_PI*s1); + sf1 += sf_coeff_6[1]*sin(4*MY_PI*s1); + + sf2 = sf_coeff_6[2]*sin(2*MY_PI*s2); + sf2 += sf_coeff_6[3]*sin(4*MY_PI*s2); + + sf3 = sf_coeff_6[4]*sin(2*MY_PI*s3); + sf3 += sf_coeff_6[5]*sin(4*MY_PI*s3); + + for (k = 0; k < nsplit; k++) { + lj = B[nsplit*type + k]; + + sf = sf1*B[k]*2*lj*lj; + f[i][0] += lj*ekx[k] - sf; + + + sf = sf2*B[k]*2*lj*lj; + f[i][1] += lj*eky[k] - sf; + + sf = sf3*B[k]*2*lj*lj; + if (slabflag != 2) f[i][2] += lj*ekz[k] - sf; + } + } + + delete [] ekx; + delete [] eky; + delete [] ekz; +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule for per atom quantities +------------------------------------------------------------------------- */ + +void PPPMDisp::fieldforce_none_peratom() +{ + int i,k,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR *u_pa,*v0,*v1,*v2,*v3,*v4,*v5; + + u_pa = new FFT_SCALAR[nsplit]; + v0 = new FFT_SCALAR[nsplit]; + v1 = new FFT_SCALAR[nsplit]; + v2 = new FFT_SCALAR[nsplit]; + v3 = new FFT_SCALAR[nsplit]; + v4 = new FFT_SCALAR[nsplit]; + v5 = new FFT_SCALAR[nsplit]; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + double **x = atom->x; + int type; + double lj; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6); + + for (k = 0; k < nsplit; k++) + u_pa[k] = v0[k] = v1[k] = v2[k] = v3[k] = v4[k] = v5[k] = ZEROF; + + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = rho1d_6[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*rho1d_6[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*rho1d_6[0][l]; + if (eflag_atom) { + for (k = 0; k < nsplit; k++) + u_pa[k] += x0*u_brick_none[k][mz][my][mx]; + } + if (vflag_atom) { + for (k = 0; k < nsplit; k++) { + v0[k] += x0*v0_brick_none[k][mz][my][mx]; + v1[k] += x0*v1_brick_none[k][mz][my][mx]; + v2[k] += x0*v2_brick_none[k][mz][my][mx]; + v3[k] += x0*v3_brick_none[k][mz][my][mx]; + v4[k] += x0*v4_brick_none[k][mz][my][mx]; + v5[k] += x0*v5_brick_none[k][mz][my][mx]; + } + } + } + } + } + // convert D-field to force + type = atom->type[i]; + for (k = 0; k < nsplit; k++) { + lj = B[nsplit*type + k]*0.5; + + if (eflag_atom) { + eatom[i] += u_pa[k]*lj; + } + if (vflag_atom) { + vatom[i][0] += v0[k]*lj; + vatom[i][1] += v1[k]*lj; + vatom[i][2] += v2[k]*lj; + vatom[i][3] += v3[k]*lj; + vatom[i][4] += v4[k]*lj; + vatom[i][5] += v5[k]*lj; + } + } + } + + delete [] u_pa; + delete [] v0; + delete [] v1; + delete [] v2; + delete [] v3; + delete [] v4; + delete [] v5; +} + +/* ---------------------------------------------------------------------- + pack values to buf to send to another proc +------------------------------------------------------------------------- */ + +void PPPMDisp::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + switch (flag) { + + // Coulomb interactions + + case FORWARD_IK: { + FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + buf[n++] = xsrc[list[i]]; + buf[n++] = ysrc[list[i]]; + buf[n++] = zsrc[list[i]]; + } + break; + } + + case FORWARD_AD: { + FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + break; + } + + case FORWARD_IK_PERATOM: { + FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) buf[n++] = esrc[list[i]]; + if (vflag_atom) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } + break; + } + + case FORWARD_AD_PERATOM: { + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + break; + } + + // Dispersion interactions, geometric mixing + + case FORWARD_IK_G: { + FFT_SCALAR *xsrc = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = xsrc[list[i]]; + buf[n++] = ysrc[list[i]]; + buf[n++] = zsrc[list[i]]; + } + break; + } + + case FORWARD_AD_G: { + FFT_SCALAR *src = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + break; + } + + case FORWARD_IK_PERATOM_G: { + FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) buf[n++] = esrc[list[i]]; + if (vflag_atom) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } + break; + } + + case FORWARD_AD_PERATOM_G: { + FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + break; + } + + // Dispersion interactions, arithmetic mixing + + case FORWARD_IK_A: { + FFT_SCALAR *xsrc0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xsrc6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + buf[n++] = xsrc0[list[i]]; + buf[n++] = ysrc0[list[i]]; + buf[n++] = zsrc0[list[i]]; + + buf[n++] = xsrc1[list[i]]; + buf[n++] = ysrc1[list[i]]; + buf[n++] = zsrc1[list[i]]; + + buf[n++] = xsrc2[list[i]]; + buf[n++] = ysrc2[list[i]]; + buf[n++] = zsrc2[list[i]]; + + buf[n++] = xsrc3[list[i]]; + buf[n++] = ysrc3[list[i]]; + buf[n++] = zsrc3[list[i]]; + + buf[n++] = xsrc4[list[i]]; + buf[n++] = ysrc4[list[i]]; + buf[n++] = zsrc4[list[i]]; + + buf[n++] = xsrc5[list[i]]; + buf[n++] = ysrc5[list[i]]; + buf[n++] = zsrc5[list[i]]; + + buf[n++] = xsrc6[list[i]]; + buf[n++] = ysrc6[list[i]]; + buf[n++] = zsrc6[list[i]]; + } + break; + } + + case FORWARD_AD_A: { + FFT_SCALAR *src0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + buf[n++] = src0[list[i]]; + buf[n++] = src1[list[i]]; + buf[n++] = src2[list[i]]; + buf[n++] = src3[list[i]]; + buf[n++] = src4[list[i]]; + buf[n++] = src5[list[i]]; + buf[n++] = src6[list[i]]; + } + break; + } + + case FORWARD_IK_PERATOM_A: { + FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + if (eflag_atom) { + buf[n++] = esrc0[list[i]]; + buf[n++] = esrc1[list[i]]; + buf[n++] = esrc2[list[i]]; + buf[n++] = esrc3[list[i]]; + buf[n++] = esrc4[list[i]]; + buf[n++] = esrc5[list[i]]; + buf[n++] = esrc6[list[i]]; + } + if (vflag_atom) { + buf[n++] = v0src0[list[i]]; + buf[n++] = v1src0[list[i]]; + buf[n++] = v2src0[list[i]]; + buf[n++] = v3src0[list[i]]; + buf[n++] = v4src0[list[i]]; + buf[n++] = v5src0[list[i]]; + + buf[n++] = v0src1[list[i]]; + buf[n++] = v1src1[list[i]]; + buf[n++] = v2src1[list[i]]; + buf[n++] = v3src1[list[i]]; + buf[n++] = v4src1[list[i]]; + buf[n++] = v5src1[list[i]]; + + buf[n++] = v0src2[list[i]]; + buf[n++] = v1src2[list[i]]; + buf[n++] = v2src2[list[i]]; + buf[n++] = v3src2[list[i]]; + buf[n++] = v4src2[list[i]]; + buf[n++] = v5src2[list[i]]; + + buf[n++] = v0src3[list[i]]; + buf[n++] = v1src3[list[i]]; + buf[n++] = v2src3[list[i]]; + buf[n++] = v3src3[list[i]]; + buf[n++] = v4src3[list[i]]; + buf[n++] = v5src3[list[i]]; + + buf[n++] = v0src4[list[i]]; + buf[n++] = v1src4[list[i]]; + buf[n++] = v2src4[list[i]]; + buf[n++] = v3src4[list[i]]; + buf[n++] = v4src4[list[i]]; + buf[n++] = v5src4[list[i]]; + + buf[n++] = v0src5[list[i]]; + buf[n++] = v1src5[list[i]]; + buf[n++] = v2src5[list[i]]; + buf[n++] = v3src5[list[i]]; + buf[n++] = v4src5[list[i]]; + buf[n++] = v5src5[list[i]]; + + buf[n++] = v0src6[list[i]]; + buf[n++] = v1src6[list[i]]; + buf[n++] = v2src6[list[i]]; + buf[n++] = v3src6[list[i]]; + buf[n++] = v4src6[list[i]]; + buf[n++] = v5src6[list[i]]; + } + } + break; + } + + case FORWARD_AD_PERATOM_A: { + FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + buf[n++] = v0src0[list[i]]; + buf[n++] = v1src0[list[i]]; + buf[n++] = v2src0[list[i]]; + buf[n++] = v3src0[list[i]]; + buf[n++] = v4src0[list[i]]; + buf[n++] = v5src0[list[i]]; + + buf[n++] = v0src1[list[i]]; + buf[n++] = v1src1[list[i]]; + buf[n++] = v2src1[list[i]]; + buf[n++] = v3src1[list[i]]; + buf[n++] = v4src1[list[i]]; + buf[n++] = v5src1[list[i]]; + + buf[n++] = v0src2[list[i]]; + buf[n++] = v1src2[list[i]]; + buf[n++] = v2src2[list[i]]; + buf[n++] = v3src2[list[i]]; + buf[n++] = v4src2[list[i]]; + buf[n++] = v5src2[list[i]]; + + buf[n++] = v0src3[list[i]]; + buf[n++] = v1src3[list[i]]; + buf[n++] = v2src3[list[i]]; + buf[n++] = v3src3[list[i]]; + buf[n++] = v4src3[list[i]]; + buf[n++] = v5src3[list[i]]; + + buf[n++] = v0src4[list[i]]; + buf[n++] = v1src4[list[i]]; + buf[n++] = v2src4[list[i]]; + buf[n++] = v3src4[list[i]]; + buf[n++] = v4src4[list[i]]; + buf[n++] = v5src4[list[i]]; + + buf[n++] = v0src5[list[i]]; + buf[n++] = v1src5[list[i]]; + buf[n++] = v2src5[list[i]]; + buf[n++] = v3src5[list[i]]; + buf[n++] = v4src5[list[i]]; + buf[n++] = v5src5[list[i]]; + + buf[n++] = v0src6[list[i]]; + buf[n++] = v1src6[list[i]]; + buf[n++] = v2src6[list[i]]; + buf[n++] = v3src6[list[i]]; + buf[n++] = v4src6[list[i]]; + buf[n++] = v5src6[list[i]]; + } + break; + } + + // Dispersion interactions, no mixing + + case FORWARD_IK_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *xsrc = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ysrc = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zsrc = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = xsrc[list[i]]; + buf[n++] = ysrc[list[i]]; + buf[n++] = zsrc[list[i]]; + } + } + break; + } + + case FORWARD_AD_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *src = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + buf[n++] = src[list[i]]; + } + break; + } + + case FORWARD_IK_PERATOM_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) buf[n++] = esrc[list[i]]; + if (vflag_atom) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } + } + break; + } + + case FORWARD_AD_PERATOM_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = v0src[list[i]]; + buf[n++] = v1src[list[i]]; + buf[n++] = v2src[list[i]]; + buf[n++] = v3src[list[i]]; + buf[n++] = v4src[list[i]]; + buf[n++] = v5src[list[i]]; + } + } + break; + } + + } +} + +/* ---------------------------------------------------------------------- + unpack another proc's own values from buf and set own ghost values +------------------------------------------------------------------------- */ + +void PPPMDisp::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + switch (flag) { + + // Coulomb interactions + + case FORWARD_IK: { + FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + xdest[list[i]] = buf[n++]; + ydest[list[i]] = buf[n++]; + zdest[list[i]] = buf[n++]; + } + break; + } + + case FORWARD_AD: { + FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + dest[list[i]] = buf[n++]; + break; + } + + case FORWARD_IK_PERATOM: { + FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) esrc[list[i]] = buf[n++]; + if (vflag_atom) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } + break; + } + + case FORWARD_AD_PERATOM: { + FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out]; + FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + break; + } + + // Disperion interactions, geometric mixing + + case FORWARD_IK_G: { + FFT_SCALAR *xdest = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + xdest[list[i]] = buf[n++]; + ydest[list[i]] = buf[n++]; + zdest[list[i]] = buf[n++]; + } + break; + } + + case FORWARD_AD_G: { + FFT_SCALAR *dest = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + dest[list[i]] = buf[n++]; + break; + } + + case FORWARD_IK_PERATOM_G: { + FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) esrc[list[i]] = buf[n++]; + if (vflag_atom) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } + break; + } + + case FORWARD_AD_PERATOM_G: { + FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + break; + } + + // Disperion interactions, arithmetic mixing + + case FORWARD_IK_A: { + FFT_SCALAR *xdest0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *xdest6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + xdest0[list[i]] = buf[n++]; + ydest0[list[i]] = buf[n++]; + zdest0[list[i]] = buf[n++]; + + xdest1[list[i]] = buf[n++]; + ydest1[list[i]] = buf[n++]; + zdest1[list[i]] = buf[n++]; + + xdest2[list[i]] = buf[n++]; + ydest2[list[i]] = buf[n++]; + zdest2[list[i]] = buf[n++]; + + xdest3[list[i]] = buf[n++]; + ydest3[list[i]] = buf[n++]; + zdest3[list[i]] = buf[n++]; + + xdest4[list[i]] = buf[n++]; + ydest4[list[i]] = buf[n++]; + zdest4[list[i]] = buf[n++]; + + xdest5[list[i]] = buf[n++]; + ydest5[list[i]] = buf[n++]; + zdest5[list[i]] = buf[n++]; + + xdest6[list[i]] = buf[n++]; + ydest6[list[i]] = buf[n++]; + zdest6[list[i]] = buf[n++]; + } + break; + } + + case FORWARD_AD_A: { + FFT_SCALAR *dest0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + dest0[list[i]] = buf[n++]; + dest1[list[i]] = buf[n++]; + dest2[list[i]] = buf[n++]; + dest3[list[i]] = buf[n++]; + dest4[list[i]] = buf[n++]; + dest5[list[i]] = buf[n++]; + dest6[list[i]] = buf[n++]; + } + break; + } + + case FORWARD_IK_PERATOM_A: { + FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + if (eflag_atom) { + esrc0[list[i]] = buf[n++]; + esrc1[list[i]] = buf[n++]; + esrc2[list[i]] = buf[n++]; + esrc3[list[i]] = buf[n++]; + esrc4[list[i]] = buf[n++]; + esrc5[list[i]] = buf[n++]; + esrc6[list[i]] = buf[n++]; + } + if (vflag_atom) { + v0src0[list[i]] = buf[n++]; + v1src0[list[i]] = buf[n++]; + v2src0[list[i]] = buf[n++]; + v3src0[list[i]] = buf[n++]; + v4src0[list[i]] = buf[n++]; + v5src0[list[i]] = buf[n++]; + + v0src1[list[i]] = buf[n++]; + v1src1[list[i]] = buf[n++]; + v2src1[list[i]] = buf[n++]; + v3src1[list[i]] = buf[n++]; + v4src1[list[i]] = buf[n++]; + v5src1[list[i]] = buf[n++]; + + v0src2[list[i]] = buf[n++]; + v1src2[list[i]] = buf[n++]; + v2src2[list[i]] = buf[n++]; + v3src2[list[i]] = buf[n++]; + v4src2[list[i]] = buf[n++]; + v5src2[list[i]] = buf[n++]; + + v0src3[list[i]] = buf[n++]; + v1src3[list[i]] = buf[n++]; + v2src3[list[i]] = buf[n++]; + v3src3[list[i]] = buf[n++]; + v4src3[list[i]] = buf[n++]; + v5src3[list[i]] = buf[n++]; + + v0src4[list[i]] = buf[n++]; + v1src4[list[i]] = buf[n++]; + v2src4[list[i]] = buf[n++]; + v3src4[list[i]] = buf[n++]; + v4src4[list[i]] = buf[n++]; + v5src4[list[i]] = buf[n++]; + + v0src5[list[i]] = buf[n++]; + v1src5[list[i]] = buf[n++]; + v2src5[list[i]] = buf[n++]; + v3src5[list[i]] = buf[n++]; + v4src5[list[i]] = buf[n++]; + v5src5[list[i]] = buf[n++]; + + v0src6[list[i]] = buf[n++]; + v1src6[list[i]] = buf[n++]; + v2src6[list[i]] = buf[n++]; + v3src6[list[i]] = buf[n++]; + v4src6[list[i]] = buf[n++]; + v5src6[list[i]] = buf[n++]; + } + } + break; + } + + case FORWARD_AD_PERATOM_A: { + FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + + for (int i = 0; i < nlist; i++) { + v0src0[list[i]] = buf[n++]; + v1src0[list[i]] = buf[n++]; + v2src0[list[i]] = buf[n++]; + v3src0[list[i]] = buf[n++]; + v4src0[list[i]] = buf[n++]; + v5src0[list[i]] = buf[n++]; + + v0src1[list[i]] = buf[n++]; + v1src1[list[i]] = buf[n++]; + v2src1[list[i]] = buf[n++]; + v3src1[list[i]] = buf[n++]; + v4src1[list[i]] = buf[n++]; + v5src1[list[i]] = buf[n++]; + + v0src2[list[i]] = buf[n++]; + v1src2[list[i]] = buf[n++]; + v2src2[list[i]] = buf[n++]; + v3src2[list[i]] = buf[n++]; + v4src2[list[i]] = buf[n++]; + v5src2[list[i]] = buf[n++]; + + v0src3[list[i]] = buf[n++]; + v1src3[list[i]] = buf[n++]; + v2src3[list[i]] = buf[n++]; + v3src3[list[i]] = buf[n++]; + v4src3[list[i]] = buf[n++]; + v5src3[list[i]] = buf[n++]; + + v0src4[list[i]] = buf[n++]; + v1src4[list[i]] = buf[n++]; + v2src4[list[i]] = buf[n++]; + v3src4[list[i]] = buf[n++]; + v4src4[list[i]] = buf[n++]; + v5src4[list[i]] = buf[n++]; + + v0src5[list[i]] = buf[n++]; + v1src5[list[i]] = buf[n++]; + v2src5[list[i]] = buf[n++]; + v3src5[list[i]] = buf[n++]; + v4src5[list[i]] = buf[n++]; + v5src5[list[i]] = buf[n++]; + + v0src6[list[i]] = buf[n++]; + v1src6[list[i]] = buf[n++]; + v2src6[list[i]] = buf[n++]; + v3src6[list[i]] = buf[n++]; + v4src6[list[i]] = buf[n++]; + v5src6[list[i]] = buf[n++]; + } + break; + } + + // Disperion interactions, geometric mixing + + case FORWARD_IK_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *xdest = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *ydest = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *zdest = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + xdest[list[i]] = buf[n++]; + ydest[list[i]] = buf[n++]; + zdest[list[i]] = buf[n++]; + } + } + break; + } + + case FORWARD_AD_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *dest = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + dest[list[i]] = buf[n++]; + } + break; + } + + case FORWARD_IK_PERATOM_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + if (eflag_atom) esrc[list[i]] = buf[n++]; + if (vflag_atom) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } + } + break; + } + + case FORWARD_AD_PERATOM_NONE: { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + v0src[list[i]] = buf[n++]; + v1src[list[i]] = buf[n++]; + v2src[list[i]] = buf[n++]; + v3src[list[i]] = buf[n++]; + v4src[list[i]] = buf[n++]; + v5src[list[i]] = buf[n++]; + } + } + break; + } + + } +} + +/* ---------------------------------------------------------------------- + pack ghost values into buf to send to another proc +------------------------------------------------------------------------- */ + +void PPPMDisp::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + //Coulomb interactions + + if (flag == REVERSE_RHO) { + FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + + //Dispersion interactions, geometric mixing + + } else if (flag == REVERSE_RHO_G) { + FFT_SCALAR *src = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + buf[i] = src[list[i]]; + + //Dispersion interactions, arithmetic mixing + + } else if (flag == REVERSE_RHO_A) { + FFT_SCALAR *src0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *src6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = src0[list[i]]; + buf[n++] = src1[list[i]]; + buf[n++] = src2[list[i]]; + buf[n++] = src3[list[i]]; + buf[n++] = src4[list[i]]; + buf[n++] = src5[list[i]]; + buf[n++] = src6[list[i]]; + } + + //Dispersion interactions, no mixing + + } else if (flag == REVERSE_RHO_NONE) { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *src = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + buf[n++] = src[list[i]]; + } + } + } +} + +/* ---------------------------------------------------------------------- + unpack another proc's ghost values from buf and add to own values +------------------------------------------------------------------------- */ + +void PPPMDisp::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + //Coulomb interactions + + if (flag == REVERSE_RHO) { + FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out]; + for (int i = 0; i < nlist; i++) + dest[list[i]] += buf[i]; + + //Dispersion interactions, geometric mixing + + } else if (flag == REVERSE_RHO_G) { + FFT_SCALAR *dest = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + dest[list[i]] += buf[i]; + + //Dispersion interactions, arithmetic mixing + + } else if (flag == REVERSE_RHO_A) { + FFT_SCALAR *dest0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]; + FFT_SCALAR *dest6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) { + dest0[list[i]] += buf[n++]; + dest1[list[i]] += buf[n++]; + dest2[list[i]] += buf[n++]; + dest3[list[i]] += buf[n++]; + dest4[list[i]] += buf[n++]; + dest5[list[i]] += buf[n++]; + dest6[list[i]] += buf[n++]; + } + + //Dispersion interactions, no mixing + + } else if (flag == REVERSE_RHO_NONE) { + for (int k = 0; k < nsplit_alloc; k++) { + FFT_SCALAR *dest = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]; + for (int i = 0; i < nlist; i++) + dest[list[i]] += buf[n++]; + } + } +} + +/* ---------------------------------------------------------------------- + map nprocs to NX by NY grid as PX by PY procs - return optimal px,py +------------------------------------------------------------------------- */ + +void PPPMDisp::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) +{ + // loop thru all possible factorizations of nprocs + // surf = surface area of largest proc sub-domain + // innermost if test minimizes surface area and surface/volume ratio + + int bestsurf = 2 * (nx + ny); + int bestboxx = 0; + int bestboxy = 0; + + int boxx,boxy,surf,ipx,ipy; + + ipx = 1; + while (ipx <= nprocs) { + if (nprocs % ipx == 0) { + ipy = nprocs/ipx; + boxx = nx/ipx; + if (nx % ipx) boxx++; + boxy = ny/ipy; + if (ny % ipy) boxy++; + surf = boxx + boxy; + if (surf < bestsurf || + (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { + bestsurf = surf; + bestboxx = boxx; + bestboxy = boxy; + *px = ipx; + *py = ipy; + } + } + ipx++; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into rho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz, int ord, + FFT_SCALAR **rho_c, FFT_SCALAR **r1d) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-ord)/2; k <= ord/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = ord-1; l >= 0; l--) { + r1 = rho_c[l][k] + r1*dx; + r2 = rho_c[l][k] + r2*dy; + r3 = rho_c[l][k] + r3*dz; + } + r1d[0][k] = r1; + r1d[1][k] = r2; + r1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into drho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz, int ord, + FFT_SCALAR **drho_c, FFT_SCALAR **dr1d) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-ord)/2; k <= ord/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = ord-2; l >= 0; l--) { + r1 = drho_c[l][k] + r1*dx; + r2 = drho_c[l][k] + r2*dy; + r3 = drho_c[l][k] + r3*dz; + } + dr1d[0][k] = r1; + dr1d[1][k] = r2; + dr1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + generate coeffients for the weight function of order n + + (n-1) + Wn(x) = Sum wn(k,x) , Sum is over every other integer + k=-(n-1) + For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 + k is odd integers if n is even and even integers if n is odd + --- + | n-1 + | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 + wn(k,x) = < l=0 + | + | 0 otherwise + --- + a coeffients are packed into the array rho_coeff to eliminate zeros + rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) +------------------------------------------------------------------------- */ + +void PPPMDisp::compute_rho_coeff(FFT_SCALAR **coeff , FFT_SCALAR **dcoeff, + int ord) +{ + int j,k,l,m; + FFT_SCALAR s; + + FFT_SCALAR **a; + memory->create2d_offset(a,ord,-ord,ord,"pppm/disp:a"); + + for (k = -ord; k <= ord; k++) + for (l = 0; l < ord; l++) + a[l][k] = 0.0; + + a[0][0] = 1.0; + for (j = 1; j < ord; j++) { + for (k = -j; k <= j; k += 2) { + s = 0.0; + for (l = 0; l < j; l++) { + a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); +#ifdef FFT_SINGLE + s += powf(0.5,(float) l+1) * + (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); +#else + s += pow(0.5,(double) l+1) * + (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); +#endif + } + a[0][k] = s; + } + } + + m = (1-ord)/2; + for (k = -(ord-1); k < ord; k += 2) { + for (l = 0; l < ord; l++) + coeff[l][m] = a[l][k]; + for (l = 1; l < ord; l++) + dcoeff[l-1][m] = l*a[l][k]; + m++; + } + + memory->destroy2d_offset(a,-ord); +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void PPPMDisp::slabcorr(int eflag) +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy_1 += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); +} + +/* ---------------------------------------------------------------------- + perform and time the 1d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPMDisp::timing_1d(int n, double &time1d) +{ + double time1,time2; + int mixing = 1; + if (function[2]) mixing = 4; + if (function[3]) mixing = nsplit_alloc/2; + + if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + if (function[1] + function[2] + function[3]) + for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + if (function[0]) { + for (int i = 0; i < n; i++) { + fft1->timing1d(work1,nfft_both,1); + fft2->timing1d(work1,nfft_both,-1); + if (differentiation_flag != 1){ + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + } + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time1d = time2 - time1; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + if (function[1] + function[2] + function[3]) { + for (int i = 0; i < n; i++) { + fft1_6->timing1d(work1_6,nfft_both_6,1); + fft2_6->timing1d(work1_6,nfft_both_6,-1); + if (differentiation_flag != 1){ + fft2_6->timing1d(work1_6,nfft_both_6,-1); + fft2_6->timing1d(work1_6,nfft_both_6,-1); + } + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time1d += (time2 - time1)*mixing; + + if (differentiation_flag) return 2; + return 4; +} + +/* ---------------------------------------------------------------------- + perform and time the 3d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPMDisp::timing_3d(int n, double &time3d) +{ + double time1,time2; + int mixing = 1; + if (function[2]) mixing = 4; + if (function[3]) mixing = nsplit_alloc/2; + + if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + if (function[1] + function[2] + function[3]) + for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF; + + + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + if (function[0]) { + for (int i = 0; i < n; i++) { + fft1->compute(work1,work1,1); + fft2->compute(work1,work1,-1); + if (differentiation_flag != 1) { + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + } + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time3d = time2 - time1; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + if (function[1] + function[2] + function[3]) { + for (int i = 0; i < n; i++) { + fft1_6->compute(work1_6,work1_6,1); + fft2_6->compute(work1_6,work1_6,-1); + if (differentiation_flag != 1) { + fft2_6->compute(work1_6,work1_6,-1); + fft2_6->compute(work1_6,work1_6,-1); + } + } + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time3d += (time2 - time1) * mixing; + + if (differentiation_flag) return 2; + return 4; +} + +/* ---------------------------------------------------------------------- + memory usage of local arrays +------------------------------------------------------------------------- */ + +double PPPMDisp::memory_usage() +{ + double bytes = nmax*3 * sizeof(double); + int mixing = 1; + int diff = 3; //depends on differentiation + int per = 7; //depends on per atom calculations + if (differentiation_flag) { + diff = 1; + per = 6; + } + if (!evflag_atom) per = 0; + if (function[2]) mixing = 7; + if (function[3]) mixing = nsplit_alloc; + + if (function[0]) { + int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + bytes += (1 + diff + per) * nbrick * sizeof(FFT_SCALAR); //brick memory + bytes += 6 * nfft_both * sizeof(double); // vg + bytes += nfft_both * sizeof(double); // greensfn + bytes += nfft_both * 3 * sizeof(FFT_SCALAR); // density_FFT, work1, work2 + bytes += cg->memory_usage(); + } + + if (function[1] + function[2] + function[3]) { + int nbrick = (nxhi_out_6-nxlo_out_6+1) * (nyhi_out_6-nylo_out_6+1) * + (nzhi_out_6-nzlo_out_6+1); + bytes += (1 + diff + per ) * nbrick * sizeof(FFT_SCALAR) * mixing; // density_brick + vd_brick + per atom bricks + bytes += 6 * nfft_both_6 * sizeof(double); // vg + bytes += nfft_both_6 * sizeof(double); // greensfn + bytes += nfft_both_6 * (mixing + 2) * sizeof(FFT_SCALAR); // density_FFT, work1, work2 + bytes += cg_6->memory_usage(); + } + return bytes; +} diff --git a/src/KSPACE/pppm_old.cpp b/src/KSPACE/pppm_old.cpp index a368b5d5b0..22c7471b18 100644 --- a/src/KSPACE/pppm_old.cpp +++ b/src/KSPACE/pppm_old.cpp @@ -1,2863 +1,2863 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) - per-atom energy/virial & group/group energy/force added by Stan Moore (BYU) -------------------------------------------------------------------------- */ - -#include "lmptype.h" -#include "mpi.h" -#include "string.h" -#include "stdio.h" -#include "stdlib.h" -#include "math.h" -#include "pppm_old.h" -#include "math_const.h" -#include "atom.h" -#include "comm.h" -#include "neighbor.h" -#include "force.h" -#include "pair.h" -#include "bond.h" -#include "angle.h" -#include "domain.h" -#include "fft3d_wrap.h" -#include "remap_wrap.h" -#include "memory.h" -#include "error.h" - -using namespace LAMMPS_NS; -using namespace MathConst; - -#define MAXORDER 7 -#define OFFSET 16384 -#define SMALL 0.00001 -#define LARGE 10000.0 -#define EPS_HOC 1.0e-7 - -#ifdef FFT_SINGLE -#define ZEROF 0.0f -#define ONEF 1.0f -#else -#define ZEROF 0.0 -#define ONEF 1.0 -#endif - -/* ---------------------------------------------------------------------- */ - -PPPMOld::PPPMOld(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) -{ - if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command"); - - triclinic_support = 0; - pppmflag = 1; - group_group_enable = 0; - - accuracy_relative = fabs(force->numeric(FLERR,arg[0])); - - nfactors = 3; - factors = new int[nfactors]; - factors[0] = 2; - factors[1] = 3; - factors[2] = 5; - - MPI_Comm_rank(world,&me); - MPI_Comm_size(world,&nprocs); - - density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; - density_fft = NULL; - u_brick = NULL; - v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; - greensfn = NULL; - work1 = work2 = NULL; - vg = NULL; - fkx = fky = fkz = NULL; - buf1 = buf2 = buf3 = buf4 = NULL; - - density_A_brick = density_B_brick = NULL; - density_A_fft = density_B_fft = NULL; - - gf_b = NULL; - rho1d = rho_coeff = NULL; - - fft1 = fft2 = NULL; - remap = NULL; - - nmax = 0; - part2grid = NULL; -} - -/* ---------------------------------------------------------------------- - free all memory -------------------------------------------------------------------------- */ - -PPPMOld::~PPPMOld() -{ - delete [] factors; - deallocate(); - deallocate_peratom(); - deallocate_groups(); - memory->destroy(part2grid); -} - -/* ---------------------------------------------------------------------- - called once before run -------------------------------------------------------------------------- */ - -void PPPMOld::init() -{ - if (me == 0) { - if (screen) fprintf(screen,"PPPM initialization ...\n"); - if (logfile) fprintf(logfile,"PPPM initialization ...\n"); - } - - // error check - - triclinic_check(); - if (domain->dimension == 2) error->all(FLERR, - "Cannot use PPPM with 2d simulation"); - - if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); - - if (slabflag == 0 && domain->nonperiodic > 0) - error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM"); - if (slabflag) { - if (domain->xperiodic != 1 || domain->yperiodic != 1 || - domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) - error->all(FLERR,"Incorrect boundaries with slab PPPM"); - } - - if (order < 2 || order > MAXORDER) { - char str[128]; - sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER); - error->all(FLERR,str); - } - - // free all arrays previously allocated - - deallocate(); - deallocate_peratom(); - peratom_allocate_flag = 0; - deallocate_groups(); - group_allocate_flag = 0; - - // extract short-range Coulombic cutoff from pair style - - scale = 1.0; - - pair_check(); - - int itmp=0; - double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); - if (p_cutoff == NULL) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - cutoff = *p_cutoff; - - // if kspace is TIP4P, extract TIP4P params from pair style - // bond/angle are not yet init(), so insure equilibrium request is valid - - qdist = 0.0; - - if (tip4pflag) { - double *p_qdist = (double *) force->pair->extract("qdist",itmp); - int *p_typeO = (int *) force->pair->extract("typeO",itmp); - int *p_typeH = (int *) force->pair->extract("typeH",itmp); - int *p_typeA = (int *) force->pair->extract("typeA",itmp); - int *p_typeB = (int *) force->pair->extract("typeB",itmp); - if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) - error->all(FLERR,"KSpace style is incompatible with Pair style"); - qdist = *p_qdist; - typeO = *p_typeO; - typeH = *p_typeH; - int typeA = *p_typeA; - int typeB = *p_typeB; - - if (force->angle == NULL || force->bond == NULL) - error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); - if (typeA < 1 || typeA > atom->nangletypes || - force->angle->setflag[typeA] == 0) - error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P"); - if (typeB < 1 || typeB > atom->nbondtypes || - force->bond->setflag[typeB] == 0) - error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P"); - double theta = force->angle->equilibrium_angle(typeA); - double blen = force->bond->equilibrium_distance(typeB); - alpha = qdist / (cos(0.5*theta) * blen); - } - - // compute qsum & qsqsum and warn if not charge-neutral - - qsum = qsqsum = 0.0; - for (int i = 0; i < atom->nlocal; i++) { - qsum += atom->q[i]; - qsqsum += atom->q[i]*atom->q[i]; - } - - double tmp; - MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsum = tmp; - MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - qsqsum = tmp; - - if (qsqsum == 0.0) - error->all(FLERR,"Cannot use kspace solver on system with no charge"); - if (fabs(qsum) > SMALL && me == 0) { - char str[128]; - sprintf(str,"System is not charge neutral, net charge = %g",qsum); - error->warning(FLERR,str); - } - - // set accuracy (force units) from accuracy_relative or accuracy_absolute - - if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; - else accuracy = accuracy_relative * two_charge_force; - - // setup FFT grid resolution and g_ewald - // normally one iteration thru while loop is all that is required - // if grid stencil extends beyond neighbor proc, reduce order and try again - - int iteration = 0; - - while (order > 1) { - if (iteration && me == 0) - error->warning(FLERR,"Reducing PPPM order b/c stencil extends " - "beyond neighbor processor"); - iteration++; - - set_grid(); - - if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) - error->all(FLERR,"PPPM grid is too large"); - - // global indices of PPPM grid range from 0 to N-1 - // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of - // global PPPM grid that I own without ghost cells - // for slab PPPM, assign z grid as if it were not extended - - nxlo_in = static_cast (comm->xsplit[comm->myloc[0]] * nx_pppm); - nxhi_in = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1; - - nylo_in = static_cast (comm->ysplit[comm->myloc[1]] * ny_pppm); - nyhi_in = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1; - - nzlo_in = static_cast - (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor); - nzhi_in = static_cast - (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1; - - // nlower,nupper = stencil size for mapping particles to PPPM grid - - nlower = -(order-1)/2; - nupper = order/2; - - // shift values for particle <-> grid mapping - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - if (order % 2) shift = OFFSET + 0.5; - else shift = OFFSET; - if (order % 2) shiftone = 0.0; - else shiftone = 0.5; - - // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of - // global PPPM grid that my particles can contribute charge to - // effectively nlo_in,nhi_in + ghost cells - // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest - // position a particle in my box can be at - // dist[3] = particle position bound = subbox + skin/2.0 + qdist - // qdist = offset due to TIP4P fictitious charge - // convert to triclinic if necessary - // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping - // for slab PPPM, assign z grid as if it were not extended - - triclinic = domain->triclinic; - double *prd,*sublo,*subhi; - - if (triclinic == 0) { - prd = domain->prd; - boxlo = domain->boxlo; - sublo = domain->sublo; - subhi = domain->subhi; - } else { - prd = domain->prd_lamda; - boxlo = domain->boxlo_lamda; - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double dist[3]; - double cuthalf = 0.5*neighbor->skin + qdist; - if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; - else { - dist[0] = cuthalf/domain->prd[0]; - dist[1] = cuthalf/domain->prd[1]; - dist[2] = cuthalf/domain->prd[2]; - } - - int nlo,nhi; - - nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * - nx_pppm/xprd + shift) - OFFSET; - nxlo_out = nlo + nlower; - nxhi_out = nhi + nupper; - - nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * - ny_pppm/yprd + shift) - OFFSET; - nylo_out = nlo + nlower; - nyhi_out = nhi + nupper; - - nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * - nz_pppm/zprd_slab + shift) - OFFSET; - nzlo_out = nlo + nlower; - nzhi_out = nhi + nupper; - - // for slab PPPM, change the grid boundary for processors at +z end - // to include the empty volume between periodically repeating slabs - // for slab PPPM, want charge data communicated from -z proc to +z proc, - // but not vice versa, also want field data communicated from +z proc to - // -z proc, but not vice versa - // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) - - if (slabflag == 1 && (comm->myloc[2] == comm->procgrid[2]-1)) { - nzhi_in = nz_pppm - 1; - nzhi_out = nz_pppm - 1; - } - - // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions - // that overlay domain I own - // proc in that direction tells me via sendrecv() - // if no neighbor proc, value is from self since I have ghosts regardless - - int nplanes; - MPI_Status status; - - nplanes = nxlo_in - nxlo_out; - if (comm->procneigh[0][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0, - &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0, - world,&status); - else nxhi_ghost = nplanes; - - nplanes = nxhi_out - nxhi_in; - if (comm->procneigh[0][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0, - &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0], - 0,world,&status); - else nxlo_ghost = nplanes; - - nplanes = nylo_in - nylo_out; - if (comm->procneigh[1][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0, - &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0, - world,&status); - else nyhi_ghost = nplanes; - - nplanes = nyhi_out - nyhi_in; - if (comm->procneigh[1][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0, - &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0, - world,&status); - else nylo_ghost = nplanes; - - nplanes = nzlo_in - nzlo_out; - if (comm->procneigh[2][0] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0, - &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0, - world,&status); - else nzhi_ghost = nplanes; - - nplanes = nzhi_out - nzhi_in; - if (comm->procneigh[2][1] != me) - MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0, - &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0, - world,&status); - else nzlo_ghost = nplanes; - - // test that ghost overlap is not bigger than my sub-domain - - int flag = 0; - if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1; - if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1; - if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1; - if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1; - if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1; - if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1; - - int flag_all; - MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); - - if (flag_all == 0) break; - order--; - } - - if (order == 0) error->all(FLERR,"PPPM order has been reduced to 0"); - - // decomposition of FFT mesh - // global indices range from 0 to N-1 - // proc owns entire x-dimension, clump of columns in y,z dimensions - // npey_fft,npez_fft = # of procs in y,z dims - // if nprocs is small enough, proc can own 1 or more entire xy planes, - // else proc owns 2d sub-blocks of yz plane - // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions - // nlo_fft,nhi_fft = lower/upper limit of the section - // of the global FFT mesh that I own - - int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); - - int me_y = me % npey_fft; - int me_z = me / npey_fft; - - nxlo_fft = 0; - nxhi_fft = nx_pppm - 1; - nylo_fft = me_y*ny_pppm/npey_fft; - nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; - nzlo_fft = me_z*nz_pppm/npez_fft; - nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; - - // PPPM grid for this proc, including ghosts - - ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - - // FFT arrays on this proc, without ghosts - // nfft = FFT points in FFT decomposition on this proc - // nfft_brick = FFT points in 3d brick-decomposition on this proc - // nfft_both = greater of 2 values - - nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * - (nzhi_fft-nzlo_fft+1); - int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * - (nzhi_in-nzlo_in+1); - nfft_both = MAX(nfft,nfft_brick); - - // buffer space for use in brick2fft and fillbrick - // idel = max # of ghost planes to send or recv in +/- dir of each dim - // nx,ny,nz = owned planes (including ghosts) in each dim - // nxx,nyy,nzz = max # of grid cells to send in each dim - // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick - - int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz; - - idelx = MAX(nxlo_ghost,nxhi_ghost); - idelx = MAX(idelx,nxhi_out-nxhi_in); - idelx = MAX(idelx,nxlo_in-nxlo_out); - - idely = MAX(nylo_ghost,nyhi_ghost); - idely = MAX(idely,nyhi_out-nyhi_in); - idely = MAX(idely,nylo_in-nylo_out); - - idelz = MAX(nzlo_ghost,nzhi_ghost); - idelz = MAX(idelz,nzhi_out-nzhi_in); - idelz = MAX(idelz,nzlo_in-nzlo_out); - - nx = nxhi_out - nxlo_out + 1; - ny = nyhi_out - nylo_out + 1; - nz = nzhi_out - nzlo_out + 1; - - nxx = idelx * ny * nz; - nyy = idely * nx * nz; - nzz = idelz * nx * ny; - - nbuf = MAX(nxx,nyy); - nbuf = MAX(nbuf,nzz); - - nbuf_peratom = 7*nbuf; - nbuf *= 3; - - // print stats - - int ngrid_max,nfft_both_max,nbuf_max; - MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); - MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world); - - if (me == 0) { - if (screen) fprintf(screen," brick FFT buffer size/proc = %d %d %d\n", - ngrid_max,nfft_both_max,nbuf_max); - if (logfile) fprintf(logfile," brick FFT buffer size/proc = %d %d %d\n", - ngrid_max,nfft_both_max,nbuf_max); - } - - // allocate K-space dependent memory - // don't invoke allocate_peratom() here, wait to see if needed - - allocate(); - - // pre-compute Green's function denomiator expansion - // pre-compute 1d charge distribution coefficients - - compute_gf_denom(); - compute_rho_coeff(); -} - -/* ---------------------------------------------------------------------- - adjust PPPM coeffs, called initially and whenever volume has changed -------------------------------------------------------------------------- */ - -void PPPMOld::setup() -{ - int i,j,k,l,m,n; - double *prd; - - // volume-dependent factors - // adjust z dimension for 2d slab PPPM - // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - volume = xprd * yprd * zprd_slab; - - delxinv = nx_pppm/xprd; - delyinv = ny_pppm/yprd; - delzinv = nz_pppm/zprd_slab; - - delvolinv = delxinv*delyinv*delzinv; - - double unitkx = (2.0*MY_PI/xprd); - double unitky = (2.0*MY_PI/yprd); - double unitkz = (2.0*MY_PI/zprd_slab); - - // fkx,fky,fkz for my FFT grid pts - - double per; - - for (i = nxlo_fft; i <= nxhi_fft; i++) { - per = i - nx_pppm*(2*i/nx_pppm); - fkx[i] = unitkx*per; - } - - for (i = nylo_fft; i <= nyhi_fft; i++) { - per = i - ny_pppm*(2*i/ny_pppm); - fky[i] = unitky*per; - } - - for (i = nzlo_fft; i <= nzhi_fft; i++) { - per = i - nz_pppm*(2*i/nz_pppm); - fkz[i] = unitkz*per; - } - - // virial coefficients - - double sqk,vterm; - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) { - for (j = nylo_fft; j <= nyhi_fft; j++) { - for (i = nxlo_fft; i <= nxhi_fft; i++) { - sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; - if (sqk == 0.0) { - vg[n][0] = 0.0; - vg[n][1] = 0.0; - vg[n][2] = 0.0; - vg[n][3] = 0.0; - vg[n][4] = 0.0; - vg[n][5] = 0.0; - } else { - vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); - vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; - vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; - vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; - vg[n][3] = vterm*fkx[i]*fky[j]; - vg[n][4] = vterm*fkx[i]*fkz[k]; - vg[n][5] = vterm*fky[j]*fkz[k]; - } - n++; - } - } - } - - // modified (Hockney-Eastwood) Coulomb Green's function - - int nx,ny,nz,kper,lper,mper; - double snx,sny,snz,snx2,sny2,snz2; - double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; - double sum1,dot1,dot2; - double numerator,denominator; - - int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * - pow(-log(EPS_HOC),0.25)); - int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * - pow(-log(EPS_HOC),0.25)); - int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * - pow(-log(EPS_HOC),0.25)); - - double form = 1.0; - - n = 0; - for (m = nzlo_fft; m <= nzhi_fft; m++) { - mper = m - nz_pppm*(2*m/nz_pppm); - snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm); - snz2 = snz*snz; - - for (l = nylo_fft; l <= nyhi_fft; l++) { - lper = l - ny_pppm*(2*l/ny_pppm); - sny = sin(0.5*unitky*lper*yprd/ny_pppm); - sny2 = sny*sny; - - for (k = nxlo_fft; k <= nxhi_fft; k++) { - kper = k - nx_pppm*(2*k/nx_pppm); - snx = sin(0.5*unitkx*kper*xprd/nx_pppm); - snx2 = snx*snx; - - sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + - pow(unitkz*mper,2.0); - - if (sqk != 0.0) { - numerator = form*12.5663706/sqk; - denominator = gf_denom(snx2,sny2,snz2); - sum1 = 0.0; - const double dorder = static_cast(order); - for (nx = -nbx; nx <= nbx; nx++) { - qx = unitkx*(kper+nx_pppm*nx); - sx = exp(-0.25*pow(qx/g_ewald,2.0)); - wx = 1.0; - argx = 0.5*qx*xprd/nx_pppm; - if (argx != 0.0) wx = pow(sin(argx)/argx,dorder); - for (ny = -nby; ny <= nby; ny++) { - qy = unitky*(lper+ny_pppm*ny); - sy = exp(-0.25*pow(qy/g_ewald,2.0)); - wy = 1.0; - argy = 0.5*qy*yprd/ny_pppm; - if (argy != 0.0) wy = pow(sin(argy)/argy,dorder); - for (nz = -nbz; nz <= nbz; nz++) { - qz = unitkz*(mper+nz_pppm*nz); - sz = exp(-0.25*pow(qz/g_ewald,2.0)); - wz = 1.0; - argz = 0.5*qz*zprd_slab/nz_pppm; - if (argz != 0.0) wz = pow(sin(argz)/argz,dorder); - - dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; - dot2 = qx*qx+qy*qy+qz*qz; - sum1 += (dot1/dot2) * sx*sy*sz * pow(wx*wy*wz,2.0); - } - } - } - greensfn[n++] = numerator*sum1/denominator; - } else greensfn[n++] = 0.0; - } - } - } -} - -/* ---------------------------------------------------------------------- - compute the PPPM long-range force, energy, virial -------------------------------------------------------------------------- */ - -void PPPMOld::compute(int eflag, int vflag) -{ - int i,j; - - // set energy/virial flags - // invoke allocate_peratom() if needed for first time - - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = evflag_atom = eflag_global = vflag_global = - eflag_atom = vflag_atom = 0; - - if (evflag_atom && !peratom_allocate_flag) { - allocate_peratom(); - peratom_allocate_flag = 1; - } - - // convert atoms from box to lamda coords - - if (triclinic == 0) boxlo = domain->boxlo; - else { - boxlo = domain->boxlo_lamda; - domain->x2lamda(atom->nlocal); - } - - // extend size of per-atom arrays if necessary - - if (atom->nlocal > nmax) { - memory->destroy(part2grid); - nmax = atom->nmax; - memory->create(part2grid,nmax,3,"pppm:part2grid"); - } - - // find grid points for all my particles - // map my particle charge onto my local 3d density grid - - particle_map(); - make_rho(); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - brick2fft(); - - // compute potential gradient on my FFT grid and - // portion of e_long on this proc's FFT grid - // return gradients (electric fields) in 3d brick decomposition - // also performs per-atom calculations via poisson_peratom() - - poisson(); - - // all procs communicate E-field values - // to fill ghost cells surrounding their 3d bricks - - fillbrick(); - - // extra per-atom energy/virial communication - - if (evflag_atom) fillbrick_peratom(); - - // calculate the force on my particles - - fieldforce(); - - // extra per-atom energy/virial communication - - if (evflag_atom) fieldforce_peratom(); - - // sum global energy across procs and add in volume-dependent term - - const double qscale = force->qqrd2e * scale; - - if (eflag_global) { - double energy_all; - MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); - energy = energy_all; - - energy *= 0.5*volume; - energy -= g_ewald*qsqsum/MY_PIS + - MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); - energy *= qscale; - } - - // sum global virial across procs - - if (vflag_global) { - double virial_all[6]; - MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); - for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; - } - - // per-atom energy/virial - // energy includes self-energy correction - - if (evflag_atom) { - double *q = atom->q; - int nlocal = atom->nlocal; - - if (eflag_atom) { - for (i = 0; i < nlocal; i++) { - eatom[i] *= 0.5; - eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / - (g_ewald*g_ewald*volume); - eatom[i] *= qscale; - } - } - - if (vflag_atom) { - for (i = 0; i < nlocal; i++) - for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale; - } - } - - // 2d slab correction - - if (slabflag == 1) slabcorr(); - - // convert atoms back from lamda to box coords - - if (triclinic) domain->lamda2x(atom->nlocal); -} - -/* ---------------------------------------------------------------------- - allocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMOld::allocate() -{ - memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_brick"); - memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdx_brick"); - memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdy_brick"); - memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:vdz_brick"); - - memory->create(density_fft,nfft_both,"pppm:density_fft"); - memory->create(greensfn,nfft_both,"pppm:greensfn"); - memory->create(work1,2*nfft_both,"pppm:work1"); - memory->create(work2,2*nfft_both,"pppm:work2"); - memory->create(vg,nfft_both,6,"pppm:vg"); - - memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx"); - memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky"); - memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz"); - - memory->create(buf1,nbuf,"pppm:buf1"); - memory->create(buf2,nbuf,"pppm:buf2"); - - // summation coeffs - - memory->create(gf_b,order,"pppm:gf_b"); - memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); - memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); - - // create 2 FFTs and a Remap - // 1st FFT keeps data in FFT decompostion - // 2nd FFT returns data in 3d brick decomposition - // remap takes data from 3d brick to FFT decomposition - - int tmp; - - fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 0,0,&tmp); - - fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - 0,0,&tmp); - - remap = new Remap(lmp,world, - nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_PRECISION); -} - -/* ---------------------------------------------------------------------- - allocate per-atom memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMOld::allocate_peratom() -{ - memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:u_brick"); - - memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v0_brick"); - memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v1_brick"); - memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v2_brick"); - memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v3_brick"); - memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v4_brick"); - memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:v5_brick"); - - memory->create(buf3,nbuf_peratom,"pppm:buf3"); - memory->create(buf4,nbuf_peratom,"pppm:buf4"); -} - -/* ---------------------------------------------------------------------- - deallocate memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMOld::deallocate() -{ - memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); - - memory->destroy(density_fft); - memory->destroy(greensfn); - memory->destroy(work1); - memory->destroy(work2); - memory->destroy(vg); - - memory->destroy1d_offset(fkx,nxlo_fft); - memory->destroy1d_offset(fky,nylo_fft); - memory->destroy1d_offset(fkz,nzlo_fft); - - memory->destroy(buf1); - memory->destroy(buf2); - - memory->destroy(gf_b); - memory->destroy2d_offset(rho1d,-order/2); - memory->destroy2d_offset(rho_coeff,(1-order)/2); - - delete fft1; - delete fft2; - delete remap; -} - -/* ---------------------------------------------------------------------- - deallocate per-atom memory that depends on # of K-vectors and order -------------------------------------------------------------------------- */ - -void PPPMOld::deallocate_peratom() -{ - memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); - - memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out); - - memory->destroy(buf3); - memory->destroy(buf4); -} - -/* ---------------------------------------------------------------------- - set size of FFT grid (nx,ny,nz_pppm) and g_ewald -------------------------------------------------------------------------- */ - -void PPPMOld::set_grid() -{ - // see JCP 109, pg 7698 for derivation of coefficients - // higher order coefficients may be computed if needed - - double **acons; - memory->create(acons,8,7,"pppm:acons"); - - acons[1][0] = 2.0 / 3.0; - acons[2][0] = 1.0 / 50.0; - acons[2][1] = 5.0 / 294.0; - acons[3][0] = 1.0 / 588.0; - acons[3][1] = 7.0 / 1440.0; - acons[3][2] = 21.0 / 3872.0; - acons[4][0] = 1.0 / 4320.0; - acons[4][1] = 3.0 / 1936.0; - acons[4][2] = 7601.0 / 2271360.0; - acons[4][3] = 143.0 / 28800.0; - acons[5][0] = 1.0 / 23232.0; - acons[5][1] = 7601.0 / 13628160.0; - acons[5][2] = 143.0 / 69120.0; - acons[5][3] = 517231.0 / 106536960.0; - acons[5][4] = 106640677.0 / 11737571328.0; - acons[6][0] = 691.0 / 68140800.0; - acons[6][1] = 13.0 / 57600.0; - acons[6][2] = 47021.0 / 35512320.0; - acons[6][3] = 9694607.0 / 2095994880.0; - acons[6][4] = 733191589.0 / 59609088000.0; - acons[6][5] = 326190917.0 / 11700633600.0; - acons[7][0] = 1.0 / 345600.0; - acons[7][1] = 3617.0 / 35512320.0; - acons[7][2] = 745739.0 / 838397952.0; - acons[7][3] = 56399353.0 / 12773376000.0; - acons[7][4] = 25091609.0 / 1560084480.0; - acons[7][5] = 1755948832039.0 / 36229939200000.0; - acons[7][6] = 4887769399.0 / 37838389248.0; - - double q2 = qsqsum * force->qqrd2e; - - // use xprd,yprd,zprd even if triclinic so grid size is the same - // adjust z dimension for 2d slab PPPM - // 3d PPPM just uses zprd since slab_volfactor = 1.0 - - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - double zprd_slab = zprd*slab_volfactor; - - // make initial g_ewald estimate - // based on desired accuracy and real space cutoff - // fluid-occupied volume used to estimate real-space error - // zprd used rather than zprd_slab - - double h_x,h_y,h_z; - bigint natoms = atom->natoms; - - if (!gewaldflag) { - if (accuracy <= 0.0) - error->all(FLERR,"KSpace accuracy must be > 0"); - g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); - if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; - else g_ewald = sqrt(-log(g_ewald)) / cutoff; - } - - // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy - // nz_pppm uses extended zprd_slab instead of zprd - // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1 - // reduce it until accuracy target is met - - if (!gridflag) { - double err; - h_x = h_y = h_z = 1.0/g_ewald; - - nx_pppm = static_cast (xprd/h_x) + 1; - ny_pppm = static_cast (yprd/h_y) + 1; - nz_pppm = static_cast (zprd_slab/h_z) + 1; - - err = rms(h_x,xprd,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_x,xprd,natoms,q2,acons); - nx_pppm++; - h_x = xprd/nx_pppm; - } - - err = rms(h_y,yprd,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_y,yprd,natoms,q2,acons); - ny_pppm++; - h_y = yprd/ny_pppm; - } - - err = rms(h_z,zprd_slab,natoms,q2,acons); - while (err > accuracy) { - err = rms(h_z,zprd_slab,natoms,q2,acons); - nz_pppm++; - h_z = zprd_slab/nz_pppm; - } - } - - // boost grid size until it is factorable - - while (!factorable(nx_pppm)) nx_pppm++; - while (!factorable(ny_pppm)) ny_pppm++; - while (!factorable(nz_pppm)) nz_pppm++; - - // adjust g_ewald for new grid size - - h_x = xprd/static_cast(nx_pppm); - h_y = yprd/static_cast(ny_pppm); - h_z = zprd_slab/static_cast(nz_pppm); - - if (!gewaldflag) { - double gew1,gew2,dgew,f,fmid,hmin,rtb; - int ncount; - - gew1 = 0.0; - g_ewald = gew1; - f = diffpr(h_x,h_y,h_z,q2,acons); - - hmin = MIN(h_x,MIN(h_y,h_z)); - gew2 = 10.0/hmin; - g_ewald = gew2; - fmid = diffpr(h_x,h_y,h_z,q2,acons); - - if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPM G"); - rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2); - ncount = 0; - while (fabs(dgew) > SMALL && fmid != 0.0) { - dgew *= 0.5; - g_ewald = rtb + dgew; - fmid = diffpr(h_x,h_y,h_z,q2,acons); - if (fmid <= 0.0) rtb = g_ewald; - ncount++; - if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPM G"); - } - } - - // final RMS accuracy - - double lprx = rms(h_x,xprd,natoms,q2,acons); - double lpry = rms(h_y,yprd,natoms,q2,acons); - double lprz = rms(h_z,zprd_slab,natoms,q2,acons); - double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); - double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); - double tpr = estimate_table_accuracy(q2_over_sqrt,spr); - double accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); - - // free local memory - - memory->destroy(acons); - - // print info - - if (me == 0) { -#ifdef FFT_SINGLE - const char fft_prec[] = "single"; -#else - const char fft_prec[] = "double"; -#endif - if (screen) { - fprintf(screen," G vector (1/distance)= %g\n",g_ewald); - fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(screen," stencil order = %d\n",order); - fprintf(screen," estimated absolute RMS force accuracy = %g\n", - accuracy); - fprintf(screen," estimated relative force accuracy = %g\n", - accuracy/two_charge_force); - fprintf(screen," using %s precision FFTs\n",fft_prec); - } - if (logfile) { - fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); - fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); - fprintf(logfile," stencil order = %d\n",order); - fprintf(logfile," estimated absolute RMS force accuracy = %g\n", - accuracy); - fprintf(logfile," estimated relative force accuracy = %g\n", - accuracy/two_charge_force); - fprintf(logfile," using %s precision FFTs\n",fft_prec); - } - } -} - -/* ---------------------------------------------------------------------- - check if all factors of n are in list of factors - return 1 if yes, 0 if no -------------------------------------------------------------------------- */ - -int PPPMOld::factorable(int n) -{ - int i; - - while (n > 1) { - for (i = 0; i < nfactors; i++) { - if (n % factors[i] == 0) { - n /= factors[i]; - break; - } - } - if (i == nfactors) return 0; - } - - return 1; -} - -/* ---------------------------------------------------------------------- - compute RMS accuracy for a dimension -------------------------------------------------------------------------- */ - -double PPPMOld::rms(double h, double prd, bigint natoms, - double q2, double **acons) -{ - double sum = 0.0; - for (int m = 0; m < order; m++) - sum += acons[order][m] * pow(h*g_ewald,2.0*m); - double value = q2 * pow(h*g_ewald,(double)order) * - sqrt(g_ewald*prd*sqrt(2.0*MY_PI)*sum/natoms) / (prd*prd); - return value; -} - -/* ---------------------------------------------------------------------- - compute difference in real-space and KSpace RMS accuracy -------------------------------------------------------------------------- */ - -double PPPMOld::diffpr(double h_x, double h_y, double h_z, double q2, - double **acons) -{ - double lprx,lpry,lprz,kspace_prec,real_prec; - double xprd = domain->xprd; - double yprd = domain->yprd; - double zprd = domain->zprd; - bigint natoms = atom->natoms; - - lprx = rms(h_x,xprd,natoms,q2,acons); - lpry = rms(h_y,yprd,natoms,q2,acons); - lprz = rms(h_z,zprd*slab_volfactor,natoms,q2,acons); - kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); - real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(static_cast(natoms)*cutoff*xprd*yprd*zprd); - double value = kspace_prec - real_prec; - return value; -} - -/* ---------------------------------------------------------------------- - pre-compute Green's function denominator expansion coeffs, Gamma(2n) -------------------------------------------------------------------------- */ - -void PPPMOld::compute_gf_denom() -{ - int k,l,m; - - for (l = 1; l < order; l++) gf_b[l] = 0.0; - gf_b[0] = 1.0; - - for (m = 1; m < order; m++) { - for (l = m; l > 0; l--) - gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1)); - gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5)); - } - - bigint ifact = 1; - for (k = 1; k < 2*order; k++) ifact *= k; - double gaminv = 1.0/ifact; - for (l = 0; l < order; l++) gf_b[l] *= gaminv; -} - -/* ---------------------------------------------------------------------- - ghost-swap to accumulate full density in brick decomposition - remap density from 3d brick decomposition to FFT decomposition -------------------------------------------------------------------------- */ - -void PPPMOld::brick2fft() -{ - int i,n,ix,iy,iz; - MPI_Request request; - MPI_Status status; - - // pack my ghosts for +x processor - // pass data to self or +x processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in+1; ix <= nxhi_out; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[0][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for -x processor - // pass data to self or -x processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_out; ix < nxlo_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[0][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for +y processor - // pass data to self or +y processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in+1; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[1][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for -y processor - // pass data to self or -y processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy < nylo_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[1][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for +z processor - // pass data to self or +z processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzhi_in+1; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[2][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // pack my ghosts for -z processor - // pass data to self or -z processor - // unpack and sum recv data into my real cells - - n = 0; - for (iz = nzlo_out; iz < nzlo_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - buf1[n++] = density_brick[iz][iy][ix]; - - if (comm->procneigh[2][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_brick[iz][iy][ix] += buf2[n++]; - - // remap from 3d brick decomposition to FFT decomposition - // copy grabs inner portion of density from 3d brick - // remap could be done as pre-stage of FFT, - // but this works optimally on only double values, not complex values - - n = 0; - for (iz = nzlo_in; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) - density_fft[n++] = density_brick[iz][iy][ix]; - - remap->perform(density_fft,density_fft,work1); -} - -/* ---------------------------------------------------------------------- - ghost-swap to fill ghost cells of my brick with field values -------------------------------------------------------------------------- */ - -void PPPMOld::fillbrick() -{ - int i,n,ix,iy,iz; - MPI_Request request; - MPI_Status status; - - // pack my real cells for +z processor - // pass data to self or +z processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[2][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz < nzlo_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for -z processor - // pass data to self or -z processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[2][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzhi_in+1; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for +y processor - // pass data to self or +y processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[1][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy < nylo_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for -y processor - // pass data to self or -y processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[1][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in+1; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for +x processor - // pass data to self or +x processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[0][1] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_out; ix < nxlo_in; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } - - // pack my real cells for -x processor - // pass data to self or -x processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) { - buf1[n++] = vdx_brick[iz][iy][ix]; - buf1[n++] = vdy_brick[iz][iy][ix]; - buf1[n++] = vdz_brick[iz][iy][ix]; - } - - if (comm->procneigh[0][0] == me) - for (i = 0; i < n; i++) buf2[i] = buf1[i]; - else { - MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request); - MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in+1; ix <= nxhi_out; ix++) { - vdx_brick[iz][iy][ix] = buf2[n++]; - vdy_brick[iz][iy][ix] = buf2[n++]; - vdz_brick[iz][iy][ix] = buf2[n++]; - } -} - -/* ---------------------------------------------------------------------- - ghost-swap to fill ghost cells of my brick with per-atom field values -------------------------------------------------------------------------- */ - -void PPPMOld::fillbrick_peratom() -{ - int i,n,ix,iy,iz; - MPI_Request request; - MPI_Status status; - - // pack my real cells for +z processor - // pass data to self or +z processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[2][1] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[2][0],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz < nzlo_in; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for -z processor - // pass data to self or -z processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[2][0] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[2][1],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzhi_in+1; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for +y processor - // pass data to self or +y processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[1][1] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[1][0],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy < nylo_in; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for -y processor - // pass data to self or -y processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[1][0] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[1][1],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nyhi_in+1; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix <= nxhi_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for +x processor - // pass data to self or +x processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[0][1] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[0][0],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_out; ix < nxlo_in; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } - - // pack my real cells for -x processor - // pass data to self or -x processor - // unpack and sum recv data into my ghost cells - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) { - if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; - if (vflag_atom) { - buf3[n++] = v0_brick[iz][iy][ix]; - buf3[n++] = v1_brick[iz][iy][ix]; - buf3[n++] = v2_brick[iz][iy][ix]; - buf3[n++] = v3_brick[iz][iy][ix]; - buf3[n++] = v4_brick[iz][iy][ix]; - buf3[n++] = v5_brick[iz][iy][ix]; - } - } - - if (comm->procneigh[0][0] == me) - for (i = 0; i < n; i++) buf4[i] = buf3[i]; - else { - MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, - comm->procneigh[0][1],0,world,&request); - MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); - MPI_Wait(&request,&status); - } - - n = 0; - for (iz = nzlo_out; iz <= nzhi_out; iz++) - for (iy = nylo_out; iy <= nyhi_out; iy++) - for (ix = nxhi_in+1; ix <= nxhi_out; ix++) { - if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; - if (vflag_atom) { - v0_brick[iz][iy][ix] = buf4[n++]; - v1_brick[iz][iy][ix] = buf4[n++]; - v2_brick[iz][iy][ix] = buf4[n++]; - v3_brick[iz][iy][ix] = buf4[n++]; - v4_brick[iz][iy][ix] = buf4[n++]; - v5_brick[iz][iy][ix] = buf4[n++]; - } - } -} - -/* ---------------------------------------------------------------------- - find center grid pt for each of my particles - check that full stencil for the particle will fit in my 3d brick - store central grid pt indices in part2grid array -------------------------------------------------------------------------- */ - -void PPPMOld::particle_map() -{ - int nx,ny,nz; - - double **x = atom->x; - int nlocal = atom->nlocal; - - int flag = 0; - for (int i = 0; i < nlocal; i++) { - - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // current particle coord can be outside global and local box - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; - ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; - nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; - - part2grid[i][0] = nx; - part2grid[i][1] = ny; - part2grid[i][2] = nz; - - // check that entire stencil around nx,ny,nz will fit in my 3d brick - - if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || - ny+nlower < nylo_out || ny+nupper > nyhi_out || - nz+nlower < nzlo_out || nz+nupper > nzhi_out) - flag = 1; - } - - if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid -------------------------------------------------------------------------- */ - -void PPPMOld::make_rho() -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density array - - memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - density_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver -------------------------------------------------------------------------- */ - -void PPPMOld::poisson() -{ - int i,j,k,n; - double eng; - - // transform charge density (r -> k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] = density_fft[i]; - work1[n++] = ZEROF; - } - - fft1->compute(work1,work1,1); - - // global energy and virial contribution - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - if (eflag_global || vflag_global) { - if (vflag_global) { - n = 0; - for (i = 0; i < nfft; i++) { - eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; - if (eflag_global) energy += eng; - n += 2; - } - } else { - n = 0; - for (i = 0; i < nfft; i++) { - energy += - s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); - n += 2; - } - } - } - - // scale by 1/total-grid-pts to get rho(k) - // multiply by Green's function to get V(k) - - n = 0; - for (i = 0; i < nfft; i++) { - work1[n++] *= scaleinv * greensfn[i]; - work1[n++] *= scaleinv * greensfn[i]; - } - - // extra FFTs for per-atom energy/virial - - if (evflag_atom) poisson_peratom(); - - // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) - // FFT leaves data in 3d brick decomposition - // copy it into inner portion of vdx,vdy,vdz arrays - - // x direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fkx[i]*work1[n+1]; - work2[n+1] = -fkx[i]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdx_brick[k][j][i] = work2[n]; - n += 2; - } - - // y direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fky[j]*work1[n+1]; - work2[n+1] = -fky[j]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdy_brick[k][j][i] = work2[n]; - n += 2; - } - - // z direction gradient - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - work2[n] = fkz[k]*work1[n+1]; - work2[n+1] = -fkz[k]*work1[n]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - vdz_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for per-atom energy/virial -------------------------------------------------------------------------- */ - -void PPPMOld::poisson_peratom() -{ - int i,j,k,n; - - // energy - - if (eflag_atom) { - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]; - work2[n+1] = work1[n+1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - u_brick[k][j][i] = work2[n]; - n += 2; - } - } - - // 6 components of virial in v0 thru v5 - - if (!vflag_atom) return; - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][0]; - work2[n+1] = work1[n+1]*vg[i][0]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v0_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][1]; - work2[n+1] = work1[n+1]*vg[i][1]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v1_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][2]; - work2[n+1] = work1[n+1]*vg[i][2]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v2_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][3]; - work2[n+1] = work1[n+1]*vg[i][3]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v3_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][4]; - work2[n+1] = work1[n+1]*vg[i][4]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v4_brick[k][j][i] = work2[n]; - n += 2; - } - - n = 0; - for (i = 0; i < nfft; i++) { - work2[n] = work1[n]*vg[i][5]; - work2[n+1] = work1[n+1]*vg[i][5]; - n += 2; - } - - fft2->compute(work2,work2,-1); - - n = 0; - for (k = nzlo_in; k <= nzhi_in; k++) - for (j = nylo_in; j <= nyhi_in; j++) - for (i = nxlo_in; i <= nxhi_in; i++) { - v5_brick[k][j][i] = work2[n]; - n += 2; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles -------------------------------------------------------------------------- */ - -void PPPMOld::fieldforce() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - ekx -= x0*vdx_brick[mz][my][mx]; - eky -= x0*vdy_brick[mz][my][mx]; - ekz -= x0*vdz_brick[mz][my][mx]; - } - } - } - - // convert E-field to force - - const double qfactor = force->qqrd2e * scale * q[i]; - f[i][0] += qfactor*ekx; - f[i][1] += qfactor*eky; - if (slabflag != 2) f[i][2] += qfactor*ekz; - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get per-atom energy/virial -------------------------------------------------------------------------- */ - -void PPPMOld::fieldforce_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR u,v0,v1,v2,v3,v4,v5; - - // loop over my charges, interpolate from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - if (eflag_atom) u += x0*u_brick[mz][my][mx]; - if (vflag_atom) { - v0 += x0*v0_brick[mz][my][mx]; - v1 += x0*v1_brick[mz][my][mx]; - v2 += x0*v2_brick[mz][my][mx]; - v3 += x0*v3_brick[mz][my][mx]; - v4 += x0*v4_brick[mz][my][mx]; - v5 += x0*v5_brick[mz][my][mx]; - } - } - } - } - - if (eflag_atom) eatom[i] += q[i]*u; - if (vflag_atom) { - vatom[i][0] += v0; - vatom[i][1] += v1; - vatom[i][2] += v2; - vatom[i][3] += v3; - vatom[i][4] += v4; - vatom[i][5] += v5; - } - } -} - -/* ---------------------------------------------------------------------- - map nprocs to NX by NY grid as PX by PY procs - return optimal px,py -------------------------------------------------------------------------- */ - -void PPPMOld::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) -{ - // loop thru all possible factorizations of nprocs - // surf = surface area of largest proc sub-domain - // innermost if test minimizes surface area and surface/volume ratio - - int bestsurf = 2 * (nx + ny); - int bestboxx = 0; - int bestboxy = 0; - - int boxx,boxy,surf,ipx,ipy; - - ipx = 1; - while (ipx <= nprocs) { - if (nprocs % ipx == 0) { - ipy = nprocs/ipx; - boxx = nx/ipx; - if (nx % ipx) boxx++; - boxy = ny/ipy; - if (ny % ipy) boxy++; - surf = boxx + boxy; - if (surf < bestsurf || - (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { - bestsurf = surf; - bestboxx = boxx; - bestboxy = boxy; - *px = ipx; - *py = ipy; - } - } - ipx++; - } -} - -/* ---------------------------------------------------------------------- - charge assignment into rho1d - dx,dy,dz = distance of particle from "lower left" grid point -------------------------------------------------------------------------- */ - -void PPPMOld::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, - const FFT_SCALAR &dz) -{ - int k,l; - FFT_SCALAR r1,r2,r3; - - for (k = (1-order)/2; k <= order/2; k++) { - r1 = r2 = r3 = ZEROF; - - for (l = order-1; l >= 0; l--) { - r1 = rho_coeff[l][k] + r1*dx; - r2 = rho_coeff[l][k] + r2*dy; - r3 = rho_coeff[l][k] + r3*dz; - } - rho1d[0][k] = r1; - rho1d[1][k] = r2; - rho1d[2][k] = r3; - } -} - -/* ---------------------------------------------------------------------- - generate coeffients for the weight function of order n - - (n-1) - Wn(x) = Sum wn(k,x) , Sum is over every other integer - k=-(n-1) - For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 - k is odd integers if n is even and even integers if n is odd - --- - | n-1 - | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 - wn(k,x) = < l=0 - | - | 0 otherwise - --- - a coeffients are packed into the array rho_coeff to eliminate zeros - rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) -------------------------------------------------------------------------- */ - -void PPPMOld::compute_rho_coeff() -{ - int j,k,l,m; - FFT_SCALAR s; - - FFT_SCALAR **a; - memory->create2d_offset(a,order,-order,order,"pppm:a"); - - for (k = -order; k <= order; k++) - for (l = 0; l < order; l++) - a[l][k] = 0.0; - - a[0][0] = 1.0; - for (j = 1; j < order; j++) { - for (k = -j; k <= j; k += 2) { - s = 0.0; - for (l = 0; l < j; l++) { - a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); -#ifdef FFT_SINGLE - s += powf(0.5,(float) l+1) * - (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); -#else - s += pow(0.5,(double) l+1) * - (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); -#endif - } - a[0][k] = s; - } - } - - m = (1-order)/2; - for (k = -(order-1); k < order; k += 2) { - for (l = 0; l < order; l++) - rho_coeff[l][m] = a[l][k]; - m++; - } - - memory->destroy2d_offset(a,-order); -} - -/* ---------------------------------------------------------------------- - Slab-geometry correction term to dampen inter-slab interactions between - periodically repeating slabs. Yields good approximation to 2D Ewald if - adequate empty space is left between repeating slabs (J. Chem. Phys. - 111, 3155). Slabs defined here to be parallel to the xy plane. Also - extended to non-neutral systems (J. Chem. Phys. 131, 094107). -------------------------------------------------------------------------- */ - -void PPPMOld::slabcorr() -{ - // compute local contribution to global dipole moment - - double *q = atom->q; - double **x = atom->x; - double zprd = domain->zprd; - int nlocal = atom->nlocal; - - double dipole = 0.0; - for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; - - // sum local contributions to get global dipole moment - - double dipole_all; - MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); - - // need to make non-neutral systems and/or - // per-atom energy translationally invariant - - double dipole_r2 = 0.0; - if (eflag_atom || fabs(qsum) > SMALL) { - for (int i = 0; i < nlocal; i++) - dipole_r2 += q[i]*x[i][2]*x[i][2]; - - // sum local contributions - - double tmp; - MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - dipole_r2 = tmp; - } - - // compute corrections - - const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - - qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; - const double qscale = force->qqrd2e * scale; - - if (eflag_global) energy += qscale * e_slabcorr; - - // per-atom energy - - if (eflag_atom) { - double efact = qscale * MY_2PI/volume; - for (int i = 0; i < nlocal; i++) - eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + - qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); - } - - // add on force corrections - - double ffact = qscale * (-4.0*MY_PI/volume); - double **f = atom->f; - - for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); -} - - -/* ---------------------------------------------------------------------- - perform and time the 1d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMOld::timing_1d(int n, double &time1d) -{ - double time1,time2; - - for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - for (int i = 0; i < n; i++) { - fft1->timing1d(work1,nfft_both,1); - fft2->timing1d(work1,nfft_both,-1); - fft2->timing1d(work1,nfft_both,-1); - fft2->timing1d(work1,nfft_both,-1); - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time1d = time2 - time1; - - return 4; -} - -/* ---------------------------------------------------------------------- - perform and time the 3d FFTs required for N timesteps -------------------------------------------------------------------------- */ - -int PPPMOld::timing_3d(int n, double &time3d) -{ - double time1,time2; - - for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; - - MPI_Barrier(world); - time1 = MPI_Wtime(); - - for (int i = 0; i < n; i++) { - fft1->compute(work1,work1,1); - fft2->compute(work1,work1,-1); - fft2->compute(work1,work1,-1); - fft2->compute(work1,work1,-1); - } - - MPI_Barrier(world); - time2 = MPI_Wtime(); - time3d = time2 - time1; - - return 4; -} - -/* ---------------------------------------------------------------------- - memory usage of local arrays -------------------------------------------------------------------------- */ - -double PPPMOld::memory_usage() -{ - double bytes = nmax*3 * sizeof(double); - int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * - (nzhi_out-nzlo_out+1); - bytes += 4 * nbrick * sizeof(FFT_SCALAR); - bytes += 6 * nfft_both * sizeof(double); - bytes += nfft_both * sizeof(double); - bytes += nfft_both*5 * sizeof(FFT_SCALAR); - bytes += 2 * nbuf * sizeof(FFT_SCALAR); - - if (peratom_allocate_flag) { - bytes += 7 * nbrick * sizeof(FFT_SCALAR); - bytes += 2 * nbuf_peratom * sizeof(FFT_SCALAR); - } - - if (group_allocate_flag) { - bytes += 2 * nbrick * sizeof(FFT_SCALAR); - bytes += 2 * nfft_both * sizeof(FFT_SCALAR);; - } - - return bytes; -} - -/* ---------------------------------------------------------------------- - group-group interactions - ------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - compute the PPPM total long-range force and energy for groups A and B - ------------------------------------------------------------------------- */ - -void PPPMOld::compute_group_group(int groupbit_A, int groupbit_B, int BA_flag) -{ - if (slabflag) - error->all(FLERR,"Cannot (yet) use K-space slab " - "correction with compute group/group"); - - int i,j; - - if (!group_allocate_flag) { - allocate_groups(); - group_allocate_flag = 1; - } - - e2group = 0; //energy - f2group[0] = 0; //force in x-direction - f2group[1] = 0; //force in y-direction - f2group[2] = 0; //force in z-direction - - double *q = atom->q; - int nlocal = atom->nlocal; - int *mask = atom->mask; - - - // map my particle charge onto my local 3d density grid - - make_rho_groups(groupbit_A,groupbit_B,BA_flag); - - // all procs communicate density values from their ghost cells - // to fully sum contribution in their 3d bricks - // remap from 3d decomposition to FFT decomposition - - // temporarily store and switch pointers so we can - // use brick2fft() for groups A and B (without - // writing an additional function) - - FFT_SCALAR ***density_brick_real = density_brick; - FFT_SCALAR *density_fft_real = density_fft; - - // group A - - density_brick = density_A_brick; - density_fft = density_A_fft; - - brick2fft(); - - // group B - - density_brick = density_B_brick; - density_fft = density_B_fft; - - brick2fft(); - - // switch back pointers - - density_brick = density_brick_real; - density_fft = density_fft_real; - - // compute potential gradient on my FFT grid and - // portion of group-group energy/force on this proc's FFT grid - - poisson_groups(BA_flag); - - const double qscale = force->qqrd2e * scale; - - // total group A <--> group B energy - // self and boundary correction terms are in compute_group_group.cpp - - double e2group_all; - MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world); - e2group = e2group_all; - - e2group *= qscale*0.5*volume; - - // total group A <--> group B force - - double f2group_all[3]; - MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world); - - for (i = 0; i < 3; i++) f2group[i] = qscale*volume*f2group_all[i]; -} - -/* ---------------------------------------------------------------------- - allocate group-group memory that depends on # of K-vectors and order - ------------------------------------------------------------------------- */ - -void PPPMOld::allocate_groups() -{ - memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_A_brick"); - memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, - nxlo_out,nxhi_out,"pppm:density_B_brick"); - memory->create(density_A_fft,nfft_both,"pppm:density_A_fft"); - memory->create(density_B_fft,nfft_both,"pppm:density_B_fft"); -} - -/* ---------------------------------------------------------------------- - deallocate group-group memory that depends on # of K-vectors and order - ------------------------------------------------------------------------- */ - -void PPPMOld::deallocate_groups() -{ - memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out); - memory->destroy(density_A_fft); - memory->destroy(density_B_fft); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid for group-group interactions - ------------------------------------------------------------------------- */ - -void PPPMOld::make_rho_groups(int groupbit_A, int groupbit_B, int BA_flag) -{ - int l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - - // clear 3d density arrays - - memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - int *mask = atom->mask; - - for (int i = 0; i < nlocal; i++) { - - if ((mask[i] & groupbit_A) && (mask[i] & groupbit_B)) - if (BA_flag) continue; - - if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; - dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; - dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - - // group A - - if (mask[i] & groupbit_A) - density_A_brick[mz][my][mx] += x0*rho1d[0][l]; - - // group B - - if (mask[i] & groupbit_B) - density_B_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } - } -} - -/* ---------------------------------------------------------------------- - FFT-based Poisson solver for group-group interactions - ------------------------------------------------------------------------- */ - -void PPPMOld::poisson_groups(int BA_flag) -{ - int i,j,k,n; - double eng; - - // reuse memory (already declared) - - FFT_SCALAR *work_A = work1; - FFT_SCALAR *work_B = work2; - - // transform charge density (r -> k) - - // group A - - n = 0; - for (i = 0; i < nfft; i++) { - work_A[n++] = density_A_fft[i]; - work_A[n++] = ZEROF; - } - - fft1->compute(work_A,work_A,1); - - // group B - - n = 0; - for (i = 0; i < nfft; i++) { - work_B[n++] = density_B_fft[i]; - work_B[n++] = ZEROF; - } - - fft1->compute(work_B,work_B,1); - - // group-group energy and force contribution, - // keep everything in reciprocal space so - // no inverse FFTs needed - - double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); - double s2 = scaleinv*scaleinv; - - // energy - - n = 0; - for (i = 0; i < nfft; i++) { - e2group += s2 * greensfn[i] * - (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]); - n += 2; - } - - if (BA_flag) return; - - - // multiply by Green's function and s2 - // (only for work_A so it is not squared below) - - n = 0; - for (i = 0; i < nfft; i++) { - work_A[n++] *= s2 * greensfn[i]; - work_A[n++] *= s2 * greensfn[i]; - } - - double partial_group; - - // force, x direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[0] += fkx[i] * partial_group; - n += 2; - } - - // force, y direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[1] += fky[j] * partial_group; - n += 2; - } - - // force, z direction - - n = 0; - for (k = nzlo_fft; k <= nzhi_fft; k++) - for (j = nylo_fft; j <= nyhi_fft; j++) - for (i = nxlo_fft; i <= nxhi_fft; i++) { - partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; - f2group[2] += fkz[k] * partial_group; - n += 2; - } -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) + per-atom energy/virial & group/group energy/force added by Stan Moore (BYU) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "string.h" +#include "stdio.h" +#include "stdlib.h" +#include "math.h" +#include "pppm_old.h" +#include "math_const.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "domain.h" +#include "fft3d_wrap.h" +#include "remap_wrap.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define MAXORDER 7 +#define OFFSET 16384 +#define SMALL 0.00001 +#define LARGE 10000.0 +#define EPS_HOC 1.0e-7 + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPMOld::PPPMOld(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) +{ + if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command"); + + triclinic_support = 0; + pppmflag = 1; + group_group_enable = 0; + + accuracy_relative = fabs(force->numeric(FLERR,arg[0])); + + nfactors = 3; + factors = new int[nfactors]; + factors[0] = 2; + factors[1] = 3; + factors[2] = 5; + + MPI_Comm_rank(world,&me); + MPI_Comm_size(world,&nprocs); + + density_brick = vdx_brick = vdy_brick = vdz_brick = NULL; + density_fft = NULL; + u_brick = NULL; + v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL; + greensfn = NULL; + work1 = work2 = NULL; + vg = NULL; + fkx = fky = fkz = NULL; + buf1 = buf2 = buf3 = buf4 = NULL; + + density_A_brick = density_B_brick = NULL; + density_A_fft = density_B_fft = NULL; + + gf_b = NULL; + rho1d = rho_coeff = NULL; + + fft1 = fft2 = NULL; + remap = NULL; + + nmax = 0; + part2grid = NULL; +} + +/* ---------------------------------------------------------------------- + free all memory +------------------------------------------------------------------------- */ + +PPPMOld::~PPPMOld() +{ + delete [] factors; + deallocate(); + deallocate_peratom(); + deallocate_groups(); + memory->destroy(part2grid); +} + +/* ---------------------------------------------------------------------- + called once before run +------------------------------------------------------------------------- */ + +void PPPMOld::init() +{ + if (me == 0) { + if (screen) fprintf(screen,"PPPM initialization ...\n"); + if (logfile) fprintf(logfile,"PPPM initialization ...\n"); + } + + // error check + + triclinic_check(); + if (domain->dimension == 2) error->all(FLERR, + "Cannot use PPPM with 2d simulation"); + + if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); + + if (slabflag == 0 && domain->nonperiodic > 0) + error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM"); + if (slabflag) { + if (domain->xperiodic != 1 || domain->yperiodic != 1 || + domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) + error->all(FLERR,"Incorrect boundaries with slab PPPM"); + } + + if (order < 2 || order > MAXORDER) { + char str[128]; + sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER); + error->all(FLERR,str); + } + + // free all arrays previously allocated + + deallocate(); + deallocate_peratom(); + peratom_allocate_flag = 0; + deallocate_groups(); + group_allocate_flag = 0; + + // extract short-range Coulombic cutoff from pair style + + scale = 1.0; + + pair_check(); + + int itmp=0; + double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); + if (p_cutoff == NULL) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + cutoff = *p_cutoff; + + // if kspace is TIP4P, extract TIP4P params from pair style + // bond/angle are not yet init(), so insure equilibrium request is valid + + qdist = 0.0; + + if (tip4pflag) { + double *p_qdist = (double *) force->pair->extract("qdist",itmp); + int *p_typeO = (int *) force->pair->extract("typeO",itmp); + int *p_typeH = (int *) force->pair->extract("typeH",itmp); + int *p_typeA = (int *) force->pair->extract("typeA",itmp); + int *p_typeB = (int *) force->pair->extract("typeB",itmp); + if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB) + error->all(FLERR,"KSpace style is incompatible with Pair style"); + qdist = *p_qdist; + typeO = *p_typeO; + typeH = *p_typeH; + int typeA = *p_typeA; + int typeB = *p_typeB; + + if (force->angle == NULL || force->bond == NULL) + error->all(FLERR,"Bond and angle potentials must be defined for TIP4P"); + if (typeA < 1 || typeA > atom->nangletypes || + force->angle->setflag[typeA] == 0) + error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P"); + if (typeB < 1 || typeB > atom->nbondtypes || + force->bond->setflag[typeB] == 0) + error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P"); + double theta = force->angle->equilibrium_angle(typeA); + double blen = force->bond->equilibrium_distance(typeB); + alpha = qdist / (cos(0.5*theta) * blen); + } + + // compute qsum & qsqsum and warn if not charge-neutral + + qsum = qsqsum = 0.0; + for (int i = 0; i < atom->nlocal; i++) { + qsum += atom->q[i]; + qsqsum += atom->q[i]*atom->q[i]; + } + + double tmp; + MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsum = tmp; + MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + qsqsum = tmp; + + if (qsqsum == 0.0) + error->all(FLERR,"Cannot use kspace solver on system with no charge"); + if (fabs(qsum) > SMALL && me == 0) { + char str[128]; + sprintf(str,"System is not charge neutral, net charge = %g",qsum); + error->warning(FLERR,str); + } + + // set accuracy (force units) from accuracy_relative or accuracy_absolute + + if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; + else accuracy = accuracy_relative * two_charge_force; + + // setup FFT grid resolution and g_ewald + // normally one iteration thru while loop is all that is required + // if grid stencil extends beyond neighbor proc, reduce order and try again + + int iteration = 0; + + while (order > 1) { + if (iteration && me == 0) + error->warning(FLERR,"Reducing PPPM order b/c stencil extends " + "beyond neighbor processor"); + iteration++; + + set_grid(); + + if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET) + error->all(FLERR,"PPPM grid is too large"); + + // global indices of PPPM grid range from 0 to N-1 + // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of + // global PPPM grid that I own without ghost cells + // for slab PPPM, assign z grid as if it were not extended + + nxlo_in = static_cast (comm->xsplit[comm->myloc[0]] * nx_pppm); + nxhi_in = static_cast (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1; + + nylo_in = static_cast (comm->ysplit[comm->myloc[1]] * ny_pppm); + nyhi_in = static_cast (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1; + + nzlo_in = static_cast + (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor); + nzhi_in = static_cast + (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1; + + // nlower,nupper = stencil size for mapping particles to PPPM grid + + nlower = -(order-1)/2; + nupper = order/2; + + // shift values for particle <-> grid mapping + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + if (order % 2) shift = OFFSET + 0.5; + else shift = OFFSET; + if (order % 2) shiftone = 0.0; + else shiftone = 0.5; + + // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of + // global PPPM grid that my particles can contribute charge to + // effectively nlo_in,nhi_in + ghost cells + // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest + // position a particle in my box can be at + // dist[3] = particle position bound = subbox + skin/2.0 + qdist + // qdist = offset due to TIP4P fictitious charge + // convert to triclinic if necessary + // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping + // for slab PPPM, assign z grid as if it were not extended + + triclinic = domain->triclinic; + double *prd,*sublo,*subhi; + + if (triclinic == 0) { + prd = domain->prd; + boxlo = domain->boxlo; + sublo = domain->sublo; + subhi = domain->subhi; + } else { + prd = domain->prd_lamda; + boxlo = domain->boxlo_lamda; + sublo = domain->sublo_lamda; + subhi = domain->subhi_lamda; + } + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double dist[3]; + double cuthalf = 0.5*neighbor->skin + qdist; + if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf; + else { + dist[0] = cuthalf/domain->prd[0]; + dist[1] = cuthalf/domain->prd[1]; + dist[2] = cuthalf/domain->prd[2]; + } + + int nlo,nhi; + + nlo = static_cast ((sublo[0]-dist[0]-boxlo[0]) * + nx_pppm/xprd + shift) - OFFSET; + nhi = static_cast ((subhi[0]+dist[0]-boxlo[0]) * + nx_pppm/xprd + shift) - OFFSET; + nxlo_out = nlo + nlower; + nxhi_out = nhi + nupper; + + nlo = static_cast ((sublo[1]-dist[1]-boxlo[1]) * + ny_pppm/yprd + shift) - OFFSET; + nhi = static_cast ((subhi[1]+dist[1]-boxlo[1]) * + ny_pppm/yprd + shift) - OFFSET; + nylo_out = nlo + nlower; + nyhi_out = nhi + nupper; + + nlo = static_cast ((sublo[2]-dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nhi = static_cast ((subhi[2]+dist[2]-boxlo[2]) * + nz_pppm/zprd_slab + shift) - OFFSET; + nzlo_out = nlo + nlower; + nzhi_out = nhi + nupper; + + // for slab PPPM, change the grid boundary for processors at +z end + // to include the empty volume between periodically repeating slabs + // for slab PPPM, want charge data communicated from -z proc to +z proc, + // but not vice versa, also want field data communicated from +z proc to + // -z proc, but not vice versa + // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells) + + if (slabflag == 1 && (comm->myloc[2] == comm->procgrid[2]-1)) { + nzhi_in = nz_pppm - 1; + nzhi_out = nz_pppm - 1; + } + + // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions + // that overlay domain I own + // proc in that direction tells me via sendrecv() + // if no neighbor proc, value is from self since I have ghosts regardless + + int nplanes; + MPI_Status status; + + nplanes = nxlo_in - nxlo_out; + if (comm->procneigh[0][0] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0, + &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0, + world,&status); + else nxhi_ghost = nplanes; + + nplanes = nxhi_out - nxhi_in; + if (comm->procneigh[0][1] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0, + &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0], + 0,world,&status); + else nxlo_ghost = nplanes; + + nplanes = nylo_in - nylo_out; + if (comm->procneigh[1][0] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0, + &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0, + world,&status); + else nyhi_ghost = nplanes; + + nplanes = nyhi_out - nyhi_in; + if (comm->procneigh[1][1] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0, + &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0, + world,&status); + else nylo_ghost = nplanes; + + nplanes = nzlo_in - nzlo_out; + if (comm->procneigh[2][0] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0, + &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0, + world,&status); + else nzhi_ghost = nplanes; + + nplanes = nzhi_out - nzhi_in; + if (comm->procneigh[2][1] != me) + MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0, + &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0, + world,&status); + else nzlo_ghost = nplanes; + + // test that ghost overlap is not bigger than my sub-domain + + int flag = 0; + if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1; + if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1; + if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1; + if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1; + if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1; + if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1; + + int flag_all; + MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); + + if (flag_all == 0) break; + order--; + } + + if (order == 0) error->all(FLERR,"PPPM order has been reduced to 0"); + + // decomposition of FFT mesh + // global indices range from 0 to N-1 + // proc owns entire x-dimension, clump of columns in y,z dimensions + // npey_fft,npez_fft = # of procs in y,z dims + // if nprocs is small enough, proc can own 1 or more entire xy planes, + // else proc owns 2d sub-blocks of yz plane + // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions + // nlo_fft,nhi_fft = lower/upper limit of the section + // of the global FFT mesh that I own + + int npey_fft,npez_fft; + if (nz_pppm >= nprocs) { + npey_fft = 1; + npez_fft = nprocs; + } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + int me_y = me % npey_fft; + int me_z = me / npey_fft; + + nxlo_fft = 0; + nxhi_fft = nx_pppm - 1; + nylo_fft = me_y*ny_pppm/npey_fft; + nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1; + nzlo_fft = me_z*nz_pppm/npez_fft; + nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1; + + // PPPM grid for this proc, including ghosts + + ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + + // FFT arrays on this proc, without ghosts + // nfft = FFT points in FFT decomposition on this proc + // nfft_brick = FFT points in 3d brick-decomposition on this proc + // nfft_both = greater of 2 values + + nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) * + (nzhi_fft-nzlo_fft+1); + int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) * + (nzhi_in-nzlo_in+1); + nfft_both = MAX(nfft,nfft_brick); + + // buffer space for use in brick2fft and fillbrick + // idel = max # of ghost planes to send or recv in +/- dir of each dim + // nx,ny,nz = owned planes (including ghosts) in each dim + // nxx,nyy,nzz = max # of grid cells to send in each dim + // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick + + int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz; + + idelx = MAX(nxlo_ghost,nxhi_ghost); + idelx = MAX(idelx,nxhi_out-nxhi_in); + idelx = MAX(idelx,nxlo_in-nxlo_out); + + idely = MAX(nylo_ghost,nyhi_ghost); + idely = MAX(idely,nyhi_out-nyhi_in); + idely = MAX(idely,nylo_in-nylo_out); + + idelz = MAX(nzlo_ghost,nzhi_ghost); + idelz = MAX(idelz,nzhi_out-nzhi_in); + idelz = MAX(idelz,nzlo_in-nzlo_out); + + nx = nxhi_out - nxlo_out + 1; + ny = nyhi_out - nylo_out + 1; + nz = nzhi_out - nzlo_out + 1; + + nxx = idelx * ny * nz; + nyy = idely * nx * nz; + nzz = idelz * nx * ny; + + nbuf = MAX(nxx,nyy); + nbuf = MAX(nbuf,nzz); + + nbuf_peratom = 7*nbuf; + nbuf *= 3; + + // print stats + + int ngrid_max,nfft_both_max,nbuf_max; + MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world); + MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world); + + if (me == 0) { + if (screen) fprintf(screen," brick FFT buffer size/proc = %d %d %d\n", + ngrid_max,nfft_both_max,nbuf_max); + if (logfile) fprintf(logfile," brick FFT buffer size/proc = %d %d %d\n", + ngrid_max,nfft_both_max,nbuf_max); + } + + // allocate K-space dependent memory + // don't invoke allocate_peratom() here, wait to see if needed + + allocate(); + + // pre-compute Green's function denomiator expansion + // pre-compute 1d charge distribution coefficients + + compute_gf_denom(); + compute_rho_coeff(); +} + +/* ---------------------------------------------------------------------- + adjust PPPM coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void PPPMOld::setup() +{ + int i,j,k,l,m,n; + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + delxinv = nx_pppm/xprd; + delyinv = ny_pppm/yprd; + delzinv = nz_pppm/zprd_slab; + + delvolinv = delxinv*delyinv*delzinv; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + // fkx,fky,fkz for my FFT grid pts + + double per; + + for (i = nxlo_fft; i <= nxhi_fft; i++) { + per = i - nx_pppm*(2*i/nx_pppm); + fkx[i] = unitkx*per; + } + + for (i = nylo_fft; i <= nyhi_fft; i++) { + per = i - ny_pppm*(2*i/ny_pppm); + fky[i] = unitky*per; + } + + for (i = nzlo_fft; i <= nzhi_fft; i++) { + per = i - nz_pppm*(2*i/nz_pppm); + fkz[i] = unitkz*per; + } + + // virial coefficients + + double sqk,vterm; + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) { + for (j = nylo_fft; j <= nyhi_fft; j++) { + for (i = nxlo_fft; i <= nxhi_fft; i++) { + sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; + if (sqk == 0.0) { + vg[n][0] = 0.0; + vg[n][1] = 0.0; + vg[n][2] = 0.0; + vg[n][3] = 0.0; + vg[n][4] = 0.0; + vg[n][5] = 0.0; + } else { + vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); + vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; + vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; + vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; + vg[n][3] = vterm*fkx[i]*fky[j]; + vg[n][4] = vterm*fkx[i]*fkz[k]; + vg[n][5] = vterm*fky[j]*fkz[k]; + } + n++; + } + } + } + + // modified (Hockney-Eastwood) Coulomb Green's function + + int nx,ny,nz,kper,lper,mper; + double snx,sny,snz,snx2,sny2,snz2; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,dot1,dot2; + double numerator,denominator; + + int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * + pow(-log(EPS_HOC),0.25)); + int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * + pow(-log(EPS_HOC),0.25)); + int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * + pow(-log(EPS_HOC),0.25)); + + double form = 1.0; + + n = 0; + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm); + snz2 = snz*snz; + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + sny = sin(0.5*unitky*lper*yprd/ny_pppm); + sny2 = sny*sny; + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + kper = k - nx_pppm*(2*k/nx_pppm); + snx = sin(0.5*unitkx*kper*xprd/nx_pppm); + snx2 = snx*snx; + + sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + + pow(unitkz*mper,2.0); + + if (sqk != 0.0) { + numerator = form*12.5663706/sqk; + denominator = gf_denom(snx2,sny2,snz2); + sum1 = 0.0; + const double dorder = static_cast(order); + for (nx = -nbx; nx <= nbx; nx++) { + qx = unitkx*(kper+nx_pppm*nx); + sx = exp(-0.25*pow(qx/g_ewald,2.0)); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm; + if (argx != 0.0) wx = pow(sin(argx)/argx,dorder); + for (ny = -nby; ny <= nby; ny++) { + qy = unitky*(lper+ny_pppm*ny); + sy = exp(-0.25*pow(qy/g_ewald,2.0)); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm; + if (argy != 0.0) wy = pow(sin(argy)/argy,dorder); + for (nz = -nbz; nz <= nbz; nz++) { + qz = unitkz*(mper+nz_pppm*nz); + sz = exp(-0.25*pow(qz/g_ewald,2.0)); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm; + if (argz != 0.0) wz = pow(sin(argz)/argz,dorder); + + dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; + dot2 = qx*qx+qy*qy+qz*qz; + sum1 += (dot1/dot2) * sx*sy*sz * pow(wx*wy*wz,2.0); + } + } + } + greensfn[n++] = numerator*sum1/denominator; + } else greensfn[n++] = 0.0; + } + } + } +} + +/* ---------------------------------------------------------------------- + compute the PPPM long-range force, energy, virial +------------------------------------------------------------------------- */ + +void PPPMOld::compute(int eflag, int vflag) +{ + int i,j; + + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + if (evflag_atom && !peratom_allocate_flag) { + allocate_peratom(); + peratom_allocate_flag = 1; + } + + // convert atoms from box to lamda coords + + if (triclinic == 0) boxlo = domain->boxlo; + else { + boxlo = domain->boxlo_lamda; + domain->x2lamda(atom->nlocal); + } + + // extend size of per-atom arrays if necessary + + if (atom->nlocal > nmax) { + memory->destroy(part2grid); + nmax = atom->nmax; + memory->create(part2grid,nmax,3,"pppm:part2grid"); + } + + // find grid points for all my particles + // map my particle charge onto my local 3d density grid + + particle_map(); + make_rho(); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + brick2fft(); + + // compute potential gradient on my FFT grid and + // portion of e_long on this proc's FFT grid + // return gradients (electric fields) in 3d brick decomposition + // also performs per-atom calculations via poisson_peratom() + + poisson(); + + // all procs communicate E-field values + // to fill ghost cells surrounding their 3d bricks + + fillbrick(); + + // extra per-atom energy/virial communication + + if (evflag_atom) fillbrick_peratom(); + + // calculate the force on my particles + + fieldforce(); + + // extra per-atom energy/virial communication + + if (evflag_atom) fieldforce_peratom(); + + // sum global energy across procs and add in volume-dependent term + + const double qscale = force->qqrd2e * scale; + + if (eflag_global) { + double energy_all; + MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy = energy_all; + + energy *= 0.5*volume; + energy -= g_ewald*qsqsum/MY_PIS + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy *= qscale; + } + + // sum global virial across procs + + if (vflag_global) { + double virial_all[6]; + MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; + } + + // per-atom energy/virial + // energy includes self-energy correction + + if (evflag_atom) { + double *q = atom->q; + int nlocal = atom->nlocal; + + if (eflag_atom) { + for (i = 0; i < nlocal; i++) { + eatom[i] *= 0.5; + eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / + (g_ewald*g_ewald*volume); + eatom[i] *= qscale; + } + } + + if (vflag_atom) { + for (i = 0; i < nlocal; i++) + for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale; + } + } + + // 2d slab correction + + if (slabflag == 1) slabcorr(); + + // convert atoms back from lamda to box coords + + if (triclinic) domain->lamda2x(atom->nlocal); +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMOld::allocate() +{ + memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_brick"); + memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdx_brick"); + memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdy_brick"); + memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:vdz_brick"); + + memory->create(density_fft,nfft_both,"pppm:density_fft"); + memory->create(greensfn,nfft_both,"pppm:greensfn"); + memory->create(work1,2*nfft_both,"pppm:work1"); + memory->create(work2,2*nfft_both,"pppm:work2"); + memory->create(vg,nfft_both,6,"pppm:vg"); + + memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx"); + memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky"); + memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz"); + + memory->create(buf1,nbuf,"pppm:buf1"); + memory->create(buf2,nbuf,"pppm:buf2"); + + // summation coeffs + + memory->create(gf_b,order,"pppm:gf_b"); + memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); + memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); + + // create 2 FFTs and a Remap + // 1st FFT keeps data in FFT decompostion + // 2nd FFT returns data in 3d brick decomposition + // remap takes data from 3d brick to FFT decomposition + + int tmp; + + fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 0,0,&tmp); + + fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + 0,0,&tmp); + + remap = new Remap(lmp,world, + nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, + nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, + 1,0,0,FFT_PRECISION); +} + +/* ---------------------------------------------------------------------- + allocate per-atom memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMOld::allocate_peratom() +{ + memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:u_brick"); + + memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v0_brick"); + memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v1_brick"); + memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v2_brick"); + memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v3_brick"); + memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v4_brick"); + memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:v5_brick"); + + memory->create(buf3,nbuf_peratom,"pppm:buf3"); + memory->create(buf4,nbuf_peratom,"pppm:buf4"); +} + +/* ---------------------------------------------------------------------- + deallocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMOld::deallocate() +{ + memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); + + memory->destroy(density_fft); + memory->destroy(greensfn); + memory->destroy(work1); + memory->destroy(work2); + memory->destroy(vg); + + memory->destroy1d_offset(fkx,nxlo_fft); + memory->destroy1d_offset(fky,nylo_fft); + memory->destroy1d_offset(fkz,nzlo_fft); + + memory->destroy(buf1); + memory->destroy(buf2); + + memory->destroy(gf_b); + memory->destroy2d_offset(rho1d,-order/2); + memory->destroy2d_offset(rho_coeff,(1-order)/2); + + delete fft1; + delete fft2; + delete remap; +} + +/* ---------------------------------------------------------------------- + deallocate per-atom memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMOld::deallocate_peratom() +{ + memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out); + + memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out); + + memory->destroy(buf3); + memory->destroy(buf4); +} + +/* ---------------------------------------------------------------------- + set size of FFT grid (nx,ny,nz_pppm) and g_ewald +------------------------------------------------------------------------- */ + +void PPPMOld::set_grid() +{ + // see JCP 109, pg 7698 for derivation of coefficients + // higher order coefficients may be computed if needed + + double **acons; + memory->create(acons,8,7,"pppm:acons"); + + acons[1][0] = 2.0 / 3.0; + acons[2][0] = 1.0 / 50.0; + acons[2][1] = 5.0 / 294.0; + acons[3][0] = 1.0 / 588.0; + acons[3][1] = 7.0 / 1440.0; + acons[3][2] = 21.0 / 3872.0; + acons[4][0] = 1.0 / 4320.0; + acons[4][1] = 3.0 / 1936.0; + acons[4][2] = 7601.0 / 2271360.0; + acons[4][3] = 143.0 / 28800.0; + acons[5][0] = 1.0 / 23232.0; + acons[5][1] = 7601.0 / 13628160.0; + acons[5][2] = 143.0 / 69120.0; + acons[5][3] = 517231.0 / 106536960.0; + acons[5][4] = 106640677.0 / 11737571328.0; + acons[6][0] = 691.0 / 68140800.0; + acons[6][1] = 13.0 / 57600.0; + acons[6][2] = 47021.0 / 35512320.0; + acons[6][3] = 9694607.0 / 2095994880.0; + acons[6][4] = 733191589.0 / 59609088000.0; + acons[6][5] = 326190917.0 / 11700633600.0; + acons[7][0] = 1.0 / 345600.0; + acons[7][1] = 3617.0 / 35512320.0; + acons[7][2] = 745739.0 / 838397952.0; + acons[7][3] = 56399353.0 / 12773376000.0; + acons[7][4] = 25091609.0 / 1560084480.0; + acons[7][5] = 1755948832039.0 / 36229939200000.0; + acons[7][6] = 4887769399.0 / 37838389248.0; + + double q2 = qsqsum * force->qqrd2e; + + // use xprd,yprd,zprd even if triclinic so grid size is the same + // adjust z dimension for 2d slab PPPM + // 3d PPPM just uses zprd since slab_volfactor = 1.0 + + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + double zprd_slab = zprd*slab_volfactor; + + // make initial g_ewald estimate + // based on desired accuracy and real space cutoff + // fluid-occupied volume used to estimate real-space error + // zprd used rather than zprd_slab + + double h_x,h_y,h_z; + bigint natoms = atom->natoms; + + if (!gewaldflag) { + if (accuracy <= 0.0) + error->all(FLERR,"KSpace accuracy must be > 0"); + g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2); + if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff; + else g_ewald = sqrt(-log(g_ewald)) / cutoff; + } + + // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy + // nz_pppm uses extended zprd_slab instead of zprd + // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1 + // reduce it until accuracy target is met + + if (!gridflag) { + double err; + h_x = h_y = h_z = 1.0/g_ewald; + + nx_pppm = static_cast (xprd/h_x) + 1; + ny_pppm = static_cast (yprd/h_y) + 1; + nz_pppm = static_cast (zprd_slab/h_z) + 1; + + err = rms(h_x,xprd,natoms,q2,acons); + while (err > accuracy) { + err = rms(h_x,xprd,natoms,q2,acons); + nx_pppm++; + h_x = xprd/nx_pppm; + } + + err = rms(h_y,yprd,natoms,q2,acons); + while (err > accuracy) { + err = rms(h_y,yprd,natoms,q2,acons); + ny_pppm++; + h_y = yprd/ny_pppm; + } + + err = rms(h_z,zprd_slab,natoms,q2,acons); + while (err > accuracy) { + err = rms(h_z,zprd_slab,natoms,q2,acons); + nz_pppm++; + h_z = zprd_slab/nz_pppm; + } + } + + // boost grid size until it is factorable + + while (!factorable(nx_pppm)) nx_pppm++; + while (!factorable(ny_pppm)) ny_pppm++; + while (!factorable(nz_pppm)) nz_pppm++; + + // adjust g_ewald for new grid size + + h_x = xprd/static_cast(nx_pppm); + h_y = yprd/static_cast(ny_pppm); + h_z = zprd_slab/static_cast(nz_pppm); + + if (!gewaldflag) { + double gew1,gew2,dgew,f,fmid,hmin,rtb; + int ncount; + + gew1 = 0.0; + g_ewald = gew1; + f = diffpr(h_x,h_y,h_z,q2,acons); + + hmin = MIN(h_x,MIN(h_y,h_z)); + gew2 = 10.0/hmin; + g_ewald = gew2; + fmid = diffpr(h_x,h_y,h_z,q2,acons); + + if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPM G"); + rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2); + ncount = 0; + while (fabs(dgew) > SMALL && fmid != 0.0) { + dgew *= 0.5; + g_ewald = rtb + dgew; + fmid = diffpr(h_x,h_y,h_z,q2,acons); + if (fmid <= 0.0) rtb = g_ewald; + ncount++; + if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPM G"); + } + } + + // final RMS accuracy + + double lprx = rms(h_x,xprd,natoms,q2,acons); + double lpry = rms(h_y,yprd,natoms,q2,acons); + double lprz = rms(h_z,zprd_slab,natoms,q2,acons); + double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); + double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff); + double tpr = estimate_table_accuracy(q2_over_sqrt,spr); + double accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr); + + // free local memory + + memory->destroy(acons); + + // print info + + if (me == 0) { +#ifdef FFT_SINGLE + const char fft_prec[] = "single"; +#else + const char fft_prec[] = "double"; +#endif + if (screen) { + fprintf(screen," G vector (1/distance)= %g\n",g_ewald); + fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(screen," stencil order = %d\n",order); + fprintf(screen," estimated absolute RMS force accuracy = %g\n", + accuracy); + fprintf(screen," estimated relative force accuracy = %g\n", + accuracy/two_charge_force); + fprintf(screen," using %s precision FFTs\n",fft_prec); + } + if (logfile) { + fprintf(logfile," G vector (1/distance) = %g\n",g_ewald); + fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); + fprintf(logfile," stencil order = %d\n",order); + fprintf(logfile," estimated absolute RMS force accuracy = %g\n", + accuracy); + fprintf(logfile," estimated relative force accuracy = %g\n", + accuracy/two_charge_force); + fprintf(logfile," using %s precision FFTs\n",fft_prec); + } + } +} + +/* ---------------------------------------------------------------------- + check if all factors of n are in list of factors + return 1 if yes, 0 if no +------------------------------------------------------------------------- */ + +int PPPMOld::factorable(int n) +{ + int i; + + while (n > 1) { + for (i = 0; i < nfactors; i++) { + if (n % factors[i] == 0) { + n /= factors[i]; + break; + } + } + if (i == nfactors) return 0; + } + + return 1; +} + +/* ---------------------------------------------------------------------- + compute RMS accuracy for a dimension +------------------------------------------------------------------------- */ + +double PPPMOld::rms(double h, double prd, bigint natoms, + double q2, double **acons) +{ + double sum = 0.0; + for (int m = 0; m < order; m++) + sum += acons[order][m] * pow(h*g_ewald,2.0*m); + double value = q2 * pow(h*g_ewald,(double)order) * + sqrt(g_ewald*prd*sqrt(2.0*MY_PI)*sum/natoms) / (prd*prd); + return value; +} + +/* ---------------------------------------------------------------------- + compute difference in real-space and KSpace RMS accuracy +------------------------------------------------------------------------- */ + +double PPPMOld::diffpr(double h_x, double h_y, double h_z, double q2, + double **acons) +{ + double lprx,lpry,lprz,kspace_prec,real_prec; + double xprd = domain->xprd; + double yprd = domain->yprd; + double zprd = domain->zprd; + bigint natoms = atom->natoms; + + lprx = rms(h_x,xprd,natoms,q2,acons); + lpry = rms(h_y,yprd,natoms,q2,acons); + lprz = rms(h_z,zprd*slab_volfactor,natoms,q2,acons); + kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); + real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / + sqrt(static_cast(natoms)*cutoff*xprd*yprd*zprd); + double value = kspace_prec - real_prec; + return value; +} + +/* ---------------------------------------------------------------------- + pre-compute Green's function denominator expansion coeffs, Gamma(2n) +------------------------------------------------------------------------- */ + +void PPPMOld::compute_gf_denom() +{ + int k,l,m; + + for (l = 1; l < order; l++) gf_b[l] = 0.0; + gf_b[0] = 1.0; + + for (m = 1; m < order; m++) { + for (l = m; l > 0; l--) + gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1)); + gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5)); + } + + bigint ifact = 1; + for (k = 1; k < 2*order; k++) ifact *= k; + double gaminv = 1.0/ifact; + for (l = 0; l < order; l++) gf_b[l] *= gaminv; +} + +/* ---------------------------------------------------------------------- + ghost-swap to accumulate full density in brick decomposition + remap density from 3d brick decomposition to FFT decomposition +------------------------------------------------------------------------- */ + +void PPPMOld::brick2fft() +{ + int i,n,ix,iy,iz; + MPI_Request request; + MPI_Status status; + + // pack my ghosts for +x processor + // pass data to self or +x processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in+1; ix <= nxhi_out; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[0][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // pack my ghosts for -x processor + // pass data to self or -x processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_out; ix < nxlo_in; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[0][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // pack my ghosts for +y processor + // pass data to self or +y processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in+1; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[1][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // pack my ghosts for -y processor + // pass data to self or -y processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy < nylo_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[1][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // pack my ghosts for +z processor + // pass data to self or +z processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzhi_in+1; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[2][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // pack my ghosts for -z processor + // pass data to self or -z processor + // unpack and sum recv data into my real cells + + n = 0; + for (iz = nzlo_out; iz < nzlo_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + buf1[n++] = density_brick[iz][iy][ix]; + + if (comm->procneigh[2][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_brick[iz][iy][ix] += buf2[n++]; + + // remap from 3d brick decomposition to FFT decomposition + // copy grabs inner portion of density from 3d brick + // remap could be done as pre-stage of FFT, + // but this works optimally on only double values, not complex values + + n = 0; + for (iz = nzlo_in; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) + density_fft[n++] = density_brick[iz][iy][ix]; + + remap->perform(density_fft,density_fft,work1); +} + +/* ---------------------------------------------------------------------- + ghost-swap to fill ghost cells of my brick with field values +------------------------------------------------------------------------- */ + +void PPPMOld::fillbrick() +{ + int i,n,ix,iy,iz; + MPI_Request request; + MPI_Status status; + + // pack my real cells for +z processor + // pass data to self or +z processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[2][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz < nzlo_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } + + // pack my real cells for -z processor + // pass data to self or -z processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[2][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzhi_in+1; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } + + // pack my real cells for +y processor + // pass data to self or +y processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[1][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy < nylo_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } + + // pack my real cells for -y processor + // pass data to self or -y processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[1][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in+1; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } + + // pack my real cells for +x processor + // pass data to self or +x processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[0][1] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_out; ix < nxlo_in; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } + + // pack my real cells for -x processor + // pass data to self or -x processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) { + buf1[n++] = vdx_brick[iz][iy][ix]; + buf1[n++] = vdy_brick[iz][iy][ix]; + buf1[n++] = vdz_brick[iz][iy][ix]; + } + + if (comm->procneigh[0][0] == me) + for (i = 0; i < n; i++) buf2[i] = buf1[i]; + else { + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in+1; ix <= nxhi_out; ix++) { + vdx_brick[iz][iy][ix] = buf2[n++]; + vdy_brick[iz][iy][ix] = buf2[n++]; + vdz_brick[iz][iy][ix] = buf2[n++]; + } +} + +/* ---------------------------------------------------------------------- + ghost-swap to fill ghost cells of my brick with per-atom field values +------------------------------------------------------------------------- */ + +void PPPMOld::fillbrick_peratom() +{ + int i,n,ix,iy,iz; + MPI_Request request; + MPI_Status status; + + // pack my real cells for +z processor + // pass data to self or +z processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[2][1] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[2][0],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz < nzlo_in; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for -z processor + // pass data to self or -z processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[2][0] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[2][1],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzhi_in+1; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for +y processor + // pass data to self or +y processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[1][1] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[1][0],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy < nylo_in; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for -y processor + // pass data to self or -y processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[1][0] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[1][1],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nyhi_in+1; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix <= nxhi_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for +x processor + // pass data to self or +x processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[0][1] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[0][0],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_out; ix < nxlo_in; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } + + // pack my real cells for -x processor + // pass data to self or -x processor + // unpack and sum recv data into my ghost cells + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) { + if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix]; + if (vflag_atom) { + buf3[n++] = v0_brick[iz][iy][ix]; + buf3[n++] = v1_brick[iz][iy][ix]; + buf3[n++] = v2_brick[iz][iy][ix]; + buf3[n++] = v3_brick[iz][iy][ix]; + buf3[n++] = v4_brick[iz][iy][ix]; + buf3[n++] = v5_brick[iz][iy][ix]; + } + } + + if (comm->procneigh[0][0] == me) + for (i = 0; i < n; i++) buf4[i] = buf3[i]; + else { + MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR, + comm->procneigh[0][1],0,world,&request); + MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); + MPI_Wait(&request,&status); + } + + n = 0; + for (iz = nzlo_out; iz <= nzhi_out; iz++) + for (iy = nylo_out; iy <= nyhi_out; iy++) + for (ix = nxhi_in+1; ix <= nxhi_out; ix++) { + if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++]; + if (vflag_atom) { + v0_brick[iz][iy][ix] = buf4[n++]; + v1_brick[iz][iy][ix] = buf4[n++]; + v2_brick[iz][iy][ix] = buf4[n++]; + v3_brick[iz][iy][ix] = buf4[n++]; + v4_brick[iz][iy][ix] = buf4[n++]; + v5_brick[iz][iy][ix] = buf4[n++]; + } + } +} + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void PPPMOld::particle_map() +{ + int nx,ny,nz; + + double **x = atom->x; + int nlocal = atom->nlocal; + + int flag = 0; + for (int i = 0; i < nlocal; i++) { + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + nx = static_cast ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET; + ny = static_cast ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET; + nz = static_cast ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET; + + part2grid[i][0] = nx; + part2grid[i][1] = ny; + part2grid[i][2] = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || + ny+nlower < nylo_out || ny+nupper > nyhi_out || + nz+nlower < nzlo_out || nz+nupper > nzhi_out) + flag = 1; + } + + if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMOld::make_rho() +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density array + + memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + density_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver +------------------------------------------------------------------------- */ + +void PPPMOld::poisson() +{ + int i,j,k,n; + double eng; + + // transform charge density (r -> k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] = density_fft[i]; + work1[n++] = ZEROF; + } + + fft1->compute(work1,work1,1); + + // global energy and virial contribution + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + for (i = 0; i < nfft; i++) { + eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; + if (eflag_global) energy += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft; i++) { + energy += + s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + n += 2; + } + } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] *= scaleinv * greensfn[i]; + work1[n++] *= scaleinv * greensfn[i]; + } + + // extra FFTs for per-atom energy/virial + + if (evflag_atom) poisson_peratom(); + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fkx[i]*work1[n+1]; + work2[n+1] = -fkx[i]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdx_brick[k][j][i] = work2[n]; + n += 2; + } + + // y direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fky[j]*work1[n+1]; + work2[n+1] = -fky[j]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdy_brick[k][j][i] = work2[n]; + n += 2; + } + + // z direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fkz[k]*work1[n+1]; + work2[n+1] = -fkz[k]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdz_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPMOld::poisson_peratom() +{ + int i,j,k,n; + + // energy + + if (eflag_atom) { + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]; + work2[n+1] = work1[n+1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + u_brick[k][j][i] = work2[n]; + n += 2; + } + } + + // 6 components of virial in v0 thru v5 + + if (!vflag_atom) return; + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][0]; + work2[n+1] = work1[n+1]*vg[i][0]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v0_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][1]; + work2[n+1] = work1[n+1]*vg[i][1]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v1_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][2]; + work2[n+1] = work1[n+1]*vg[i][2]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v2_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][3]; + work2[n+1] = work1[n+1]*vg[i][3]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v3_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][4]; + work2[n+1] = work1[n+1]*vg[i][4]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v4_brick[k][j][i] = work2[n]; + n += 2; + } + + n = 0; + for (i = 0; i < nfft; i++) { + work2[n] = work1[n]*vg[i][5]; + work2[n+1] = work1[n+1]*vg[i][5]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + v5_brick[k][j][i] = work2[n]; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPMOld::fieldforce() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + + const double qfactor = force->qqrd2e * scale * q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + if (slabflag != 2) f[i][2] += qfactor*ekz; + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial +------------------------------------------------------------------------- */ + +void PPPMOld::fieldforce_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + if (eflag_atom) u += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + if (eflag_atom) eatom[i] += q[i]*u; + if (vflag_atom) { + vatom[i][0] += v0; + vatom[i][1] += v1; + vatom[i][2] += v2; + vatom[i][3] += v3; + vatom[i][4] += v4; + vatom[i][5] += v5; + } + } +} + +/* ---------------------------------------------------------------------- + map nprocs to NX by NY grid as PX by PY procs - return optimal px,py +------------------------------------------------------------------------- */ + +void PPPMOld::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) +{ + // loop thru all possible factorizations of nprocs + // surf = surface area of largest proc sub-domain + // innermost if test minimizes surface area and surface/volume ratio + + int bestsurf = 2 * (nx + ny); + int bestboxx = 0; + int bestboxy = 0; + + int boxx,boxy,surf,ipx,ipy; + + ipx = 1; + while (ipx <= nprocs) { + if (nprocs % ipx == 0) { + ipy = nprocs/ipx; + boxx = nx/ipx; + if (nx % ipx) boxx++; + boxy = ny/ipy; + if (ny % ipy) boxy++; + surf = boxx + boxy; + if (surf < bestsurf || + (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) { + bestsurf = surf; + bestboxx = boxx; + bestboxy = boxy; + *px = ipx; + *py = ipy; + } + } + ipx++; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into rho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPMOld::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-order)/2; k <= order/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = order-1; l >= 0; l--) { + r1 = rho_coeff[l][k] + r1*dx; + r2 = rho_coeff[l][k] + r2*dy; + r3 = rho_coeff[l][k] + r3*dz; + } + rho1d[0][k] = r1; + rho1d[1][k] = r2; + rho1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + generate coeffients for the weight function of order n + + (n-1) + Wn(x) = Sum wn(k,x) , Sum is over every other integer + k=-(n-1) + For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1 + k is odd integers if n is even and even integers if n is odd + --- + | n-1 + | Sum a(l,j)*(x-k/2)**l if abs(x-k/2) < 1/2 + wn(k,x) = < l=0 + | + | 0 otherwise + --- + a coeffients are packed into the array rho_coeff to eliminate zeros + rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) +------------------------------------------------------------------------- */ + +void PPPMOld::compute_rho_coeff() +{ + int j,k,l,m; + FFT_SCALAR s; + + FFT_SCALAR **a; + memory->create2d_offset(a,order,-order,order,"pppm:a"); + + for (k = -order; k <= order; k++) + for (l = 0; l < order; l++) + a[l][k] = 0.0; + + a[0][0] = 1.0; + for (j = 1; j < order; j++) { + for (k = -j; k <= j; k += 2) { + s = 0.0; + for (l = 0; l < j; l++) { + a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); +#ifdef FFT_SINGLE + s += powf(0.5,(float) l+1) * + (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); +#else + s += pow(0.5,(double) l+1) * + (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); +#endif + } + a[0][k] = s; + } + } + + m = (1-order)/2; + for (k = -(order-1); k < order; k += 2) { + for (l = 0; l < order; l++) + rho_coeff[l][m] = a[l][k]; + m++; + } + + memory->destroy2d_offset(a,-order); +} + +/* ---------------------------------------------------------------------- + Slab-geometry correction term to dampen inter-slab interactions between + periodically repeating slabs. Yields good approximation to 2D Ewald if + adequate empty space is left between repeating slabs (J. Chem. Phys. + 111, 3155). Slabs defined here to be parallel to the xy plane. Also + extended to non-neutral systems (J. Chem. Phys. 131, 094107). +------------------------------------------------------------------------- */ + +void PPPMOld::slabcorr() +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; + + // sum local contributions to get global dipole moment + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]); +} + + +/* ---------------------------------------------------------------------- + perform and time the 1d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPMOld::timing_1d(int n, double &time1d) +{ + double time1,time2; + + for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + for (int i = 0; i < n; i++) { + fft1->timing1d(work1,nfft_both,1); + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + fft2->timing1d(work1,nfft_both,-1); + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time1d = time2 - time1; + + return 4; +} + +/* ---------------------------------------------------------------------- + perform and time the 3d FFTs required for N timesteps +------------------------------------------------------------------------- */ + +int PPPMOld::timing_3d(int n, double &time3d) +{ + double time1,time2; + + for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; + + MPI_Barrier(world); + time1 = MPI_Wtime(); + + for (int i = 0; i < n; i++) { + fft1->compute(work1,work1,1); + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + fft2->compute(work1,work1,-1); + } + + MPI_Barrier(world); + time2 = MPI_Wtime(); + time3d = time2 - time1; + + return 4; +} + +/* ---------------------------------------------------------------------- + memory usage of local arrays +------------------------------------------------------------------------- */ + +double PPPMOld::memory_usage() +{ + double bytes = nmax*3 * sizeof(double); + int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * + (nzhi_out-nzlo_out+1); + bytes += 4 * nbrick * sizeof(FFT_SCALAR); + bytes += 6 * nfft_both * sizeof(double); + bytes += nfft_both * sizeof(double); + bytes += nfft_both*5 * sizeof(FFT_SCALAR); + bytes += 2 * nbuf * sizeof(FFT_SCALAR); + + if (peratom_allocate_flag) { + bytes += 7 * nbrick * sizeof(FFT_SCALAR); + bytes += 2 * nbuf_peratom * sizeof(FFT_SCALAR); + } + + if (group_allocate_flag) { + bytes += 2 * nbrick * sizeof(FFT_SCALAR); + bytes += 2 * nfft_both * sizeof(FFT_SCALAR);; + } + + return bytes; +} + +/* ---------------------------------------------------------------------- + group-group interactions + ------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + compute the PPPM total long-range force and energy for groups A and B + ------------------------------------------------------------------------- */ + +void PPPMOld::compute_group_group(int groupbit_A, int groupbit_B, int BA_flag) +{ + if (slabflag) + error->all(FLERR,"Cannot (yet) use K-space slab " + "correction with compute group/group"); + + int i,j; + + if (!group_allocate_flag) { + allocate_groups(); + group_allocate_flag = 1; + } + + e2group = 0; //energy + f2group[0] = 0; //force in x-direction + f2group[1] = 0; //force in y-direction + f2group[2] = 0; //force in z-direction + + double *q = atom->q; + int nlocal = atom->nlocal; + int *mask = atom->mask; + + + // map my particle charge onto my local 3d density grid + + make_rho_groups(groupbit_A,groupbit_B,BA_flag); + + // all procs communicate density values from their ghost cells + // to fully sum contribution in their 3d bricks + // remap from 3d decomposition to FFT decomposition + + // temporarily store and switch pointers so we can + // use brick2fft() for groups A and B (without + // writing an additional function) + + FFT_SCALAR ***density_brick_real = density_brick; + FFT_SCALAR *density_fft_real = density_fft; + + // group A + + density_brick = density_A_brick; + density_fft = density_A_fft; + + brick2fft(); + + // group B + + density_brick = density_B_brick; + density_fft = density_B_fft; + + brick2fft(); + + // switch back pointers + + density_brick = density_brick_real; + density_fft = density_fft_real; + + // compute potential gradient on my FFT grid and + // portion of group-group energy/force on this proc's FFT grid + + poisson_groups(BA_flag); + + const double qscale = force->qqrd2e * scale; + + // total group A <--> group B energy + // self and boundary correction terms are in compute_group_group.cpp + + double e2group_all; + MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world); + e2group = e2group_all; + + e2group *= qscale*0.5*volume; + + // total group A <--> group B force + + double f2group_all[3]; + MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world); + + for (i = 0; i < 3; i++) f2group[i] = qscale*volume*f2group_all[i]; +} + +/* ---------------------------------------------------------------------- + allocate group-group memory that depends on # of K-vectors and order + ------------------------------------------------------------------------- */ + +void PPPMOld::allocate_groups() +{ + memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_A_brick"); + memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_B_brick"); + memory->create(density_A_fft,nfft_both,"pppm:density_A_fft"); + memory->create(density_B_fft,nfft_both,"pppm:density_B_fft"); +} + +/* ---------------------------------------------------------------------- + deallocate group-group memory that depends on # of K-vectors and order + ------------------------------------------------------------------------- */ + +void PPPMOld::deallocate_groups() +{ + memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy(density_A_fft); + memory->destroy(density_B_fft); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid for group-group interactions + ------------------------------------------------------------------------- */ + +void PPPMOld::make_rho_groups(int groupbit_A, int groupbit_B, int BA_flag) +{ + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // clear 3d density arrays + + memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0, + ngrid*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + double *q = atom->q; + double **x = atom->x; + int nlocal = atom->nlocal; + int *mask = atom->mask; + + for (int i = 0; i < nlocal; i++) { + + if ((mask[i] & groupbit_A) && (mask[i] & groupbit_B)) + if (BA_flag) continue; + + if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + + // group A + + if (mask[i] & groupbit_A) + density_A_brick[mz][my][mx] += x0*rho1d[0][l]; + + // group B + + if (mask[i] & groupbit_B) + density_B_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for group-group interactions + ------------------------------------------------------------------------- */ + +void PPPMOld::poisson_groups(int BA_flag) +{ + int i,j,k,n; + double eng; + + // reuse memory (already declared) + + FFT_SCALAR *work_A = work1; + FFT_SCALAR *work_B = work2; + + // transform charge density (r -> k) + + // group A + + n = 0; + for (i = 0; i < nfft; i++) { + work_A[n++] = density_A_fft[i]; + work_A[n++] = ZEROF; + } + + fft1->compute(work_A,work_A,1); + + // group B + + n = 0; + for (i = 0; i < nfft; i++) { + work_B[n++] = density_B_fft[i]; + work_B[n++] = ZEROF; + } + + fft1->compute(work_B,work_B,1); + + // group-group energy and force contribution, + // keep everything in reciprocal space so + // no inverse FFTs needed + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + // energy + + n = 0; + for (i = 0; i < nfft; i++) { + e2group += s2 * greensfn[i] * + (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]); + n += 2; + } + + if (BA_flag) return; + + + // multiply by Green's function and s2 + // (only for work_A so it is not squared below) + + n = 0; + for (i = 0; i < nfft; i++) { + work_A[n++] *= s2 * greensfn[i]; + work_A[n++] *= s2 * greensfn[i]; + } + + double partial_group; + + // force, x direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[0] += fkx[i] * partial_group; + n += 2; + } + + // force, y direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[1] += fky[j] * partial_group; + n += 2; + } + + // force, z direction + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1]; + f2group[2] += fkz[k] * partial_group; + n += 2; + } +}