From 2bcf10827cc2239544e149a4f2203c2e88e8f109 Mon Sep 17 00:00:00 2001
From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa>
Date: Thu, 6 Mar 2014 15:28:27 +0000
Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11588
 f3b2605a-c512-4ea7-a41b-209d697bcdaa

---
 src/KSPACE/ewald.cpp      |  2984 +++----
 src/KSPACE/ewald_disp.cpp |  2950 +++----
 src/KSPACE/pppm.cpp       |  7002 +++++++--------
 src/KSPACE/pppm_disp.cpp  | 16418 ++++++++++++++++++------------------
 src/KSPACE/pppm_old.cpp   |  5726 ++++++-------
 5 files changed, 17540 insertions(+), 17540 deletions(-)

diff --git a/src/KSPACE/ewald.cpp b/src/KSPACE/ewald.cpp
index f750c2cbf3..a684ce80a5 100644
--- a/src/KSPACE/ewald.cpp
+++ b/src/KSPACE/ewald.cpp
@@ -1,1492 +1,1492 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
-     per-atom energy/virial added by German Samolyuk (ORNL), Stan Moore (BYU)
-     group/group energy/force added by Stan Moore (BYU)
-     triclinic added by Stan Moore (SNL)
-------------------------------------------------------------------------- */
-
-#include "mpi.h"
-#include "stdlib.h"
-#include "stdio.h"
-#include "string.h"
-#include "math.h"
-#include "ewald.h"
-#include "atom.h"
-#include "comm.h"
-#include "force.h"
-#include "pair.h"
-#include "domain.h"
-#include "math_const.h"
-#include "memory.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-using namespace MathConst;
-
-#define SMALL 0.00001
-
-/* ---------------------------------------------------------------------- */
-
-Ewald::Ewald(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
-{
-  if (narg != 1) error->all(FLERR,"Illegal kspace_style ewald command");
-
-  ewaldflag = 1;
-  group_group_enable = 1;
-  group_allocate_flag = 0;
-
-  accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
-
-  kmax = 0;
-  kxvecs = kyvecs = kzvecs = NULL;
-  ug = NULL;
-  eg = vg = NULL;
-  sfacrl = sfacim = sfacrl_all = sfacim_all = NULL;
-
-  nmax = 0;
-  ek = NULL;
-  cs = sn = NULL;
-
-  kcount = 0;
-}
-
-/* ----------------------------------------------------------------------
-   free all memory
-------------------------------------------------------------------------- */
-
-Ewald::~Ewald()
-{
-  deallocate();
-  if (group_allocate_flag) deallocate_groups();
-  memory->destroy(ek);
-  memory->destroy3d_offset(cs,-kmax_created);
-  memory->destroy3d_offset(sn,-kmax_created);
-}
-
-/* ---------------------------------------------------------------------- */
-
-void Ewald::init()
-{
-  if (comm->me == 0) {
-    if (screen) fprintf(screen,"Ewald initialization ...\n");
-    if (logfile) fprintf(logfile,"Ewald initialization ...\n");
-  }
-
-  // error check
-  
-  triclinic_check();
-  if (domain->dimension == 2)
-    error->all(FLERR,"Cannot use Ewald with 2d simulation");
-
-  if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
-
-  if (slabflag == 0 && domain->nonperiodic > 0)
-    error->all(FLERR,"Cannot use nonperiodic boundaries with Ewald");
-  if (slabflag) {
-    if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
-        domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
-      error->all(FLERR,"Incorrect boundaries with slab Ewald");
-    if (domain->triclinic)
-      error->all(FLERR,"Cannot (yet) use Ewald with triclinic box "
-                 "and slab correction");
-  }
-
-  // extract short-range Coulombic cutoff from pair style
-
-  scale = 1.0;
-
-  pair_check();
-
-  int itmp;
-  double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
-  if (p_cutoff == NULL)
-    error->all(FLERR,"KSpace style is incompatible with Pair style");
-  double cutoff = *p_cutoff;
-
-  qsum = qsqsum = 0.0;
-  for (int i = 0; i < atom->nlocal; i++) {
-    qsum += atom->q[i];
-    qsqsum += atom->q[i]*atom->q[i];
-  }
-
-  double tmp;
-  MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  qsum = tmp;
-  MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  qsqsum = tmp;
-
-  if (qsqsum == 0.0)
-    error->all(FLERR,"Cannot use kspace solver on system with no charge");
-  if (fabs(qsum) > SMALL && comm->me == 0) {
-    char str[128];
-    sprintf(str,"System is not charge neutral, net charge = %g",qsum);
-    error->warning(FLERR,str);
-  }
-
-  // set accuracy (force units) from accuracy_relative or accuracy_absolute
-
-  if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
-  else accuracy = accuracy_relative * two_charge_force;
-
-  // setup K-space resolution
-
-  q2 = qsqsum * force->qqrd2e;
-  bigint natoms = atom->natoms;
-
-  triclinic = domain->triclinic;
-
-  // use xprd,yprd,zprd even if triclinic so grid size is the same
-  // adjust z dimension for 2d slab Ewald
-  // 3d Ewald just uses zprd since slab_volfactor = 1.0
-
-  double xprd = domain->xprd;
-  double yprd = domain->yprd;
-  double zprd = domain->zprd;
-  double zprd_slab = zprd*slab_volfactor;
-
-  // make initial g_ewald estimate
-  // based on desired accuracy and real space cutoff
-  // fluid-occupied volume used to estimate real-space error
-  // zprd used rather than zprd_slab
-
-  if (!gewaldflag) {
-    if (accuracy <= 0.0)
-      error->all(FLERR,"KSpace accuracy must be > 0");
-    g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
-    if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff;
-    else g_ewald = sqrt(-log(g_ewald)) / cutoff;
-  }
-
-  // setup Ewald coefficients so can print stats
-
-  setup();
-
-  // final RMS accuracy
-
-  double lprx = rms(kxmax_orig,xprd,natoms,q2);
-  double lpry = rms(kymax_orig,yprd,natoms,q2);
-  double lprz = rms(kzmax_orig,zprd_slab,natoms,q2);
-  double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
-  double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab);
-  double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff);
-  double tpr = estimate_table_accuracy(q2_over_sqrt,spr);
-  double estimated_accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr);
-
-  // stats
-
-  if (comm->me == 0) {
-    if (screen) {
-      fprintf(screen,"  G vector (1/distance) = %g\n",g_ewald);
-      fprintf(screen,"  estimated absolute RMS force accuracy = %g\n",
-              estimated_accuracy);
-      fprintf(screen,"  estimated relative force accuracy = %g\n",
-              estimated_accuracy/two_charge_force);
-      fprintf(screen,"  KSpace vectors: actual max1d max3d = %d %d %d\n",
-              kcount,kmax,kmax3d);
-      fprintf(screen,"                  kxmax kymax kzmax  = %d %d %d\n",
-              kxmax,kymax,kzmax);
-    }
-    if (logfile) {
-      fprintf(logfile,"  G vector (1/distance) = %g\n",g_ewald);
-      fprintf(logfile,"  estimated absolute RMS force accuracy = %g\n",
-              estimated_accuracy);
-      fprintf(logfile,"  estimated relative force accuracy = %g\n",
-              estimated_accuracy/two_charge_force);
-      fprintf(logfile,"  KSpace vectors: actual max1d max3d = %d %d %d\n",
-              kcount,kmax,kmax3d);
-      fprintf(logfile,"                  kxmax kymax kzmax  = %d %d %d\n",
-              kxmax,kymax,kzmax);
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   adjust Ewald coeffs, called initially and whenever volume has changed
-------------------------------------------------------------------------- */
-
-void Ewald::setup()
-{
-  // volume-dependent factors
-
-  double xprd = domain->xprd;
-  double yprd = domain->yprd;
-  double zprd = domain->zprd;
-
-  // adjustment of z dimension for 2d slab Ewald
-  // 3d Ewald just uses zprd since slab_volfactor = 1.0
-
-  double zprd_slab = zprd*slab_volfactor;
-  volume = xprd * yprd * zprd_slab;
-
-  unitk[0] = 2.0*MY_PI/xprd;
-  unitk[1] = 2.0*MY_PI/yprd;
-  unitk[2] = 2.0*MY_PI/zprd_slab;
-
-  int kmax_old = kmax;
-
-  if (kewaldflag == 0) {
-
-    // determine kmax
-    // function of current box size, accuracy, G_ewald (short-range cutoff)
-
-    bigint natoms = atom->natoms;
-    double err;
-    kxmax = 1;
-    kymax = 1;
-    kzmax = 1;
-
-    err = rms(kxmax,xprd,natoms,q2);
-    while (err > accuracy) {
-      kxmax++;
-      err = rms(kxmax,xprd,natoms,q2);
-    }
-
-    err = rms(kymax,yprd,natoms,q2);
-    while (err > accuracy) {
-      kymax++;
-      err = rms(kymax,yprd,natoms,q2);
-    }
-
-    err = rms(kzmax,zprd_slab,natoms,q2);
-    while (err > accuracy) {
-      kzmax++;
-      err = rms(kzmax,zprd_slab,natoms,q2);
-    }
-
-    kmax = MAX(kxmax,kymax);
-    kmax = MAX(kmax,kzmax);
-    kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax;
-    
-    double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax;
-    double gsqymx = unitk[1]*unitk[1]*kymax*kymax;
-    double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax;
-    gsqmx = MAX(gsqxmx,gsqymx);
-    gsqmx = MAX(gsqmx,gsqzmx);
-    
-    kxmax_orig = kxmax;
-    kymax_orig = kymax;
-    kzmax_orig = kzmax;
-
-    // scale lattice vectors for triclinic skew
-    
-    if (triclinic) {
-      double tmp[3];
-      tmp[0] = kxmax/xprd;
-      tmp[1] = kymax/yprd;
-      tmp[2] = kzmax/zprd;
-      lamda2xT(&tmp[0],&tmp[0]);
-      kxmax = MAX(1,static_cast<int>(tmp[0]));
-      kymax = MAX(1,static_cast<int>(tmp[1]));
-      kzmax = MAX(1,static_cast<int>(tmp[2]));
-      
-      kmax = MAX(kxmax,kymax);
-      kmax = MAX(kmax,kzmax);
-      kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax;
-    }
-
-  } else {
-
-    kxmax = kx_ewald;
-    kymax = ky_ewald;
-    kzmax = kz_ewald;
-    
-    kxmax_orig = kxmax;
-    kymax_orig = kymax;
-    kzmax_orig = kzmax;
-
-    kmax = MAX(kxmax,kymax);
-    kmax = MAX(kmax,kzmax);
-    kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax;
-
-    double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax;
-    double gsqymx = unitk[1]*unitk[1]*kymax*kymax;
-    double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax;
-    gsqmx = MAX(gsqxmx,gsqymx);
-    gsqmx = MAX(gsqmx,gsqzmx);
-  }
-
-  gsqmx *= 1.00001;
-
-  // if size has grown, reallocate k-dependent and nlocal-dependent arrays
-
-  if (kmax > kmax_old) {
-    deallocate();
-    allocate();
-    group_allocate_flag = 0;
-
-    memory->destroy(ek);
-    memory->destroy3d_offset(cs,-kmax_created);
-    memory->destroy3d_offset(sn,-kmax_created);
-    nmax = atom->nmax;
-    memory->create(ek,nmax,3,"ewald:ek");
-    memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs");
-    memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn");
-    kmax_created = kmax;
-  }
-
-  // pre-compute Ewald coefficients
-
-  if (triclinic == 0)
-    coeffs();
-  else
-    coeffs_triclinic();
-}
-
-/* ----------------------------------------------------------------------
-   compute RMS accuracy for a dimension
-------------------------------------------------------------------------- */
-
-double Ewald::rms(int km, double prd, bigint natoms, double q2)
-{
-  double value = 2.0*q2*g_ewald/prd *
-    sqrt(1.0/(MY_PI*km*natoms)) *
-    exp(-MY_PI*MY_PI*km*km/(g_ewald*g_ewald*prd*prd));
-
-  return value;
-}
-
-/* ----------------------------------------------------------------------
-   compute the Ewald long-range force, energy, virial
-------------------------------------------------------------------------- */
-
-void Ewald::compute(int eflag, int vflag)
-{
-  int i,j,k;
-
-  // set energy/virial flags
-
-  if (eflag || vflag) ev_setup(eflag,vflag);
-  else evflag = evflag_atom = eflag_global = vflag_global =
-         eflag_atom = vflag_atom = 0;
-
-  // extend size of per-atom arrays if necessary
-
-  if (atom->nlocal > nmax) {
-    memory->destroy(ek);
-    memory->destroy3d_offset(cs,-kmax_created);
-    memory->destroy3d_offset(sn,-kmax_created);
-    nmax = atom->nmax;
-    memory->create(ek,nmax,3,"ewald:ek");
-    memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs");
-    memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn");
-    kmax_created = kmax;
-  }
-
-  // partial structure factors on each processor
-  // total structure factor by summing over procs
-
-  if (triclinic == 0)
-    eik_dot_r();
-  else
-    eik_dot_r_triclinic();
-
-  MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world);
-  MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world);
-
-  // K-space portion of electric field
-  // double loop over K-vectors and local atoms
-  // perform per-atom calculations if needed
-
-  double **f = atom->f;
-  double *q = atom->q;
-  int nlocal = atom->nlocal;
-
-  int kx,ky,kz;
-  double cypz,sypz,exprl,expim,partial,partial_peratom;
-
-  for (i = 0; i < nlocal; i++) {
-    ek[i][0] = 0.0;
-    ek[i][1] = 0.0;
-    ek[i][2] = 0.0;
-  }
-
-  for (k = 0; k < kcount; k++) {
-    kx = kxvecs[k];
-    ky = kyvecs[k];
-    kz = kzvecs[k];
-
-    for (i = 0; i < nlocal; i++) {
-      cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i];
-      sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i];
-      exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz;
-      expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz;
-      partial = expim*sfacrl_all[k] - exprl*sfacim_all[k];
-      ek[i][0] += partial*eg[k][0];
-      ek[i][1] += partial*eg[k][1];
-      ek[i][2] += partial*eg[k][2];
-
-      if (evflag_atom) {
-        partial_peratom = exprl*sfacrl_all[k] + expim*sfacim_all[k];
-        if (eflag_atom) eatom[i] += q[i]*ug[k]*partial_peratom;
-        if (vflag_atom)
-          for (j = 0; j < 6; j++)
-            vatom[i][j] += ug[k]*vg[k][j]*partial_peratom;
-      }
-    }
-  }
-
-  // convert E-field to force
-
-  const double qscale = force->qqrd2e * scale;
-
-  for (i = 0; i < nlocal; i++) {
-    f[i][0] += qscale * q[i]*ek[i][0];
-    f[i][1] += qscale * q[i]*ek[i][1];
-    if (slabflag != 2) f[i][2] += qscale * q[i]*ek[i][2];
-  }
-
-  // global energy
-
-  if (eflag_global) {
-    for (k = 0; k < kcount; k++)
-      energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] +
-                         sfacim_all[k]*sfacim_all[k]);
-    energy -= g_ewald*qsqsum/MY_PIS +
-      MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
-    energy *= qscale;
-  }
-
-  // global virial
-
-  if (vflag_global) {
-    double uk;
-    for (k = 0; k < kcount; k++) {
-      uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]);
-      for (j = 0; j < 6; j++) virial[j] += uk*vg[k][j];
-    }
-    for (j = 0; j < 6; j++) virial[j] *= qscale;
-  }
-
-  // per-atom energy/virial
-  // energy includes self-energy correction
-
-  if (evflag_atom) {
-    if (eflag_atom) {
-      for (i = 0; i < nlocal; i++) {
-        eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
-          (g_ewald*g_ewald*volume);
-        eatom[i] *= qscale;
-      }
-    }
-
-    if (vflag_atom)
-      for (i = 0; i < nlocal; i++)
-        for (j = 0; j < 6; j++) vatom[i][j] *= q[i]*qscale;
-  }
-
-  // 2d slab correction
-
-  if (slabflag == 1) slabcorr();
-}
-
-/* ---------------------------------------------------------------------- */
-
-void Ewald::eik_dot_r()
-{
-  int i,k,l,m,n,ic;
-  double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4;
-  double sqk,clpm,slpm;
-
-  double **x = atom->x;
-  double *q = atom->q;
-  int nlocal = atom->nlocal;
-
-  n = 0;
-
-  // (k,0,0), (0,l,0), (0,0,m)
-
-  for (ic = 0; ic < 3; ic++) {
-    sqk = unitk[ic]*unitk[ic];
-    if (sqk <= gsqmx) {
-      cstr1 = 0.0;
-      sstr1 = 0.0;
-      for (i = 0; i < nlocal; i++) {
-        cs[0][ic][i] = 1.0;
-        sn[0][ic][i] = 0.0;
-        cs[1][ic][i] = cos(unitk[ic]*x[i][ic]);
-        sn[1][ic][i] = sin(unitk[ic]*x[i][ic]);
-        cs[-1][ic][i] = cs[1][ic][i];
-        sn[-1][ic][i] = -sn[1][ic][i];
-        cstr1 += q[i]*cs[1][ic][i];
-        sstr1 += q[i]*sn[1][ic][i];
-      }
-      sfacrl[n] = cstr1;
-      sfacim[n++] = sstr1;
-    }
-  }
-
-  for (m = 2; m <= kmax; m++) {
-    for (ic = 0; ic < 3; ic++) {
-      sqk = m*unitk[ic] * m*unitk[ic];
-      if (sqk <= gsqmx) {
-        cstr1 = 0.0;
-        sstr1 = 0.0;
-        for (i = 0; i < nlocal; i++) {
-          cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] -
-            sn[m-1][ic][i]*sn[1][ic][i];
-          sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] +
-            cs[m-1][ic][i]*sn[1][ic][i];
-          cs[-m][ic][i] = cs[m][ic][i];
-          sn[-m][ic][i] = -sn[m][ic][i];
-          cstr1 += q[i]*cs[m][ic][i];
-          sstr1 += q[i]*sn[m][ic][i];
-        }
-        sfacrl[n] = cstr1;
-        sfacim[n++] = sstr1;
-      }
-    }
-  }
-
-  // 1 = (k,l,0), 2 = (k,-l,0)
-
-  for (k = 1; k <= kxmax; k++) {
-    for (l = 1; l <= kymax; l++) {
-      sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]);
-      if (sqk <= gsqmx) {
-        cstr1 = 0.0;
-        sstr1 = 0.0;
-        cstr2 = 0.0;
-        sstr2 = 0.0;
-        for (i = 0; i < nlocal; i++) {
-          cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]);
-          sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]);
-          cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]);
-          sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]);
-        }
-        sfacrl[n] = cstr1;
-        sfacim[n++] = sstr1;
-        sfacrl[n] = cstr2;
-        sfacim[n++] = sstr2;
-      }
-    }
-  }
-
-  // 1 = (0,l,m), 2 = (0,l,-m)
-
-  for (l = 1; l <= kymax; l++) {
-    for (m = 1; m <= kzmax; m++) {
-      sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]);
-      if (sqk <= gsqmx) {
-        cstr1 = 0.0;
-        sstr1 = 0.0;
-        cstr2 = 0.0;
-        sstr2 = 0.0;
-        for (i = 0; i < nlocal; i++) {
-          cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]);
-          sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]);
-          cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]);
-          sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]);
-        }
-        sfacrl[n] = cstr1;
-        sfacim[n++] = sstr1;
-        sfacrl[n] = cstr2;
-        sfacim[n++] = sstr2;
-      }
-    }
-  }
-
-  // 1 = (k,0,m), 2 = (k,0,-m)
-
-  for (k = 1; k <= kxmax; k++) {
-    for (m = 1; m <= kzmax; m++) {
-      sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]);
-      if (sqk <= gsqmx) {
-        cstr1 = 0.0;
-        sstr1 = 0.0;
-        cstr2 = 0.0;
-        sstr2 = 0.0;
-        for (i = 0; i < nlocal; i++) {
-          cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]);
-          sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]);
-          cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]);
-          sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]);
-        }
-        sfacrl[n] = cstr1;
-        sfacim[n++] = sstr1;
-        sfacrl[n] = cstr2;
-        sfacim[n++] = sstr2;
-      }
-    }
-  }
-
-  // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
-
-  for (k = 1; k <= kxmax; k++) {
-    for (l = 1; l <= kymax; l++) {
-      for (m = 1; m <= kzmax; m++) {
-        sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) +
-          (m*unitk[2] * m*unitk[2]);
-        if (sqk <= gsqmx) {
-          cstr1 = 0.0;
-          sstr1 = 0.0;
-          cstr2 = 0.0;
-          sstr2 = 0.0;
-          cstr3 = 0.0;
-          sstr3 = 0.0;
-          cstr4 = 0.0;
-          sstr4 = 0.0;
-          for (i = 0; i < nlocal; i++) {
-            clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
-            slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
-            cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
-            sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
-
-            clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
-            slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
-            cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
-            sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
-
-            clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
-            slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
-            cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
-            sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
-
-            clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
-            slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
-            cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
-            sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
-          }
-          sfacrl[n] = cstr1;
-          sfacim[n++] = sstr1;
-          sfacrl[n] = cstr2;
-          sfacim[n++] = sstr2;
-          sfacrl[n] = cstr3;
-          sfacim[n++] = sstr3;
-          sfacrl[n] = cstr4;
-          sfacim[n++] = sstr4;
-        }
-      }
-    }
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-void Ewald::eik_dot_r_triclinic()
-{
-  int i,k,l,m,n,ic;
-  double cstr1,sstr1;
-  double sqk,clpm,slpm;
-
-  double **x = atom->x;
-  double *q = atom->q;
-  int nlocal = atom->nlocal;
-
-  double unitk_lamda[3];
-
-  double max_kvecs[3];
-  max_kvecs[0] = kxmax;
-  max_kvecs[1] = kymax;
-  max_kvecs[2] = kzmax;
-
-  // (k,0,0), (0,l,0), (0,0,m)
-
-  for (ic = 0; ic < 3; ic++) {
-    unitk_lamda[0] = 0.0;
-    unitk_lamda[1] = 0.0;
-    unitk_lamda[2] = 0.0;
-    unitk_lamda[ic] = 2.0*MY_PI;
-    x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
-    sqk = unitk_lamda[ic]*unitk_lamda[ic];
-    if (sqk <= gsqmx) {
-      for (i = 0; i < nlocal; i++) {
-        cs[0][ic][i] = 1.0;
-        sn[0][ic][i] = 0.0;
-        cs[1][ic][i] = cos(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]);
-        sn[1][ic][i] = sin(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]);
-        cs[-1][ic][i] = cs[1][ic][i];
-        sn[-1][ic][i] = -sn[1][ic][i];
-      }
-    }
-  }
-
-  for (ic = 0; ic < 3; ic++) {
-    for (m = 2; m <= max_kvecs[ic]; m++) {
-      unitk_lamda[0] = 0.0;
-      unitk_lamda[1] = 0.0;
-      unitk_lamda[2] = 0.0;
-      unitk_lamda[ic] = 2.0*MY_PI*m;
-      x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
-      sqk = unitk_lamda[ic]*unitk_lamda[ic];
-      for (i = 0; i < nlocal; i++) {
-        cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] -
-          sn[m-1][ic][i]*sn[1][ic][i];
-        sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] +
-          cs[m-1][ic][i]*sn[1][ic][i];
-        cs[-m][ic][i] = cs[m][ic][i];
-        sn[-m][ic][i] = -sn[m][ic][i];
-      }
-    }
-  }
-
-  for (n = 0; n < kcount; n++) {
-    k = kxvecs[n];
-    l = kyvecs[n];
-    m = kzvecs[n];
-    cstr1 = 0.0;
-    sstr1 = 0.0;
-    for (i = 0; i < nlocal; i++) {
-      clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
-      slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
-      cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
-      sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
-    }
-    sfacrl[n] = cstr1;
-    sfacim[n] = sstr1;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   pre-compute coefficients for each Ewald K-vector
-------------------------------------------------------------------------- */
-
-void Ewald::coeffs()
-{
-  int k,l,m;
-  double sqk,vterm;
-
-  double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald);
-  double preu = 4.0*MY_PI/volume;
-
-  kcount = 0;
-
-  // (k,0,0), (0,l,0), (0,0,m)
-
-  for (m = 1; m <= kmax; m++) {
-    sqk = (m*unitk[0]) * (m*unitk[0]);
-    if (sqk <= gsqmx) {
-      kxvecs[kcount] = m;
-      kyvecs[kcount] = 0;
-      kzvecs[kcount] = 0;
-      ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
-      eg[kcount][0] = 2.0*unitk[0]*m*ug[kcount];
-      eg[kcount][1] = 0.0;
-      eg[kcount][2] = 0.0;
-      vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
-      vg[kcount][0] = 1.0 + vterm*(unitk[0]*m)*(unitk[0]*m);
-      vg[kcount][1] = 1.0;
-      vg[kcount][2] = 1.0;
-      vg[kcount][3] = 0.0;
-      vg[kcount][4] = 0.0;
-      vg[kcount][5] = 0.0;
-      kcount++;
-    }
-    sqk = (m*unitk[1]) * (m*unitk[1]);
-    if (sqk <= gsqmx) {
-      kxvecs[kcount] = 0;
-      kyvecs[kcount] = m;
-      kzvecs[kcount] = 0;
-      ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
-      eg[kcount][0] = 0.0;
-      eg[kcount][1] = 2.0*unitk[1]*m*ug[kcount];
-      eg[kcount][2] = 0.0;
-      vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
-      vg[kcount][0] = 1.0;
-      vg[kcount][1] = 1.0 + vterm*(unitk[1]*m)*(unitk[1]*m);
-      vg[kcount][2] = 1.0;
-      vg[kcount][3] = 0.0;
-      vg[kcount][4] = 0.0;
-      vg[kcount][5] = 0.0;
-      kcount++;
-    }
-    sqk = (m*unitk[2]) * (m*unitk[2]);
-    if (sqk <= gsqmx) {
-      kxvecs[kcount] = 0;
-      kyvecs[kcount] = 0;
-      kzvecs[kcount] = m;
-      ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
-      eg[kcount][0] = 0.0;
-      eg[kcount][1] = 0.0;
-      eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount];
-      vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
-      vg[kcount][0] = 1.0;
-      vg[kcount][1] = 1.0;
-      vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
-      vg[kcount][3] = 0.0;
-      vg[kcount][4] = 0.0;
-      vg[kcount][5] = 0.0;
-      kcount++;
-    }
-  }
-
-  // 1 = (k,l,0), 2 = (k,-l,0)
-
-  for (k = 1; k <= kxmax; k++) {
-    for (l = 1; l <= kymax; l++) {
-      sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l);
-      if (sqk <= gsqmx) {
-        kxvecs[kcount] = k;
-        kyvecs[kcount] = l;
-        kzvecs[kcount] = 0;
-        ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
-        eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
-        eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount];
-        eg[kcount][2] = 0.0;
-        vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
-        vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
-        vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
-        vg[kcount][2] = 1.0;
-        vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l;
-        vg[kcount][4] = 0.0;
-        vg[kcount][5] = 0.0;
-        kcount++;
-
-        kxvecs[kcount] = k;
-        kyvecs[kcount] = -l;
-        kzvecs[kcount] = 0;
-        ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
-        eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
-        eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount];
-        eg[kcount][2] = 0.0;
-        vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
-        vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
-        vg[kcount][2] = 1.0;
-        vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l;
-        vg[kcount][4] = 0.0;
-        vg[kcount][5] = 0.0;
-        kcount++;;
-      }
-    }
-  }
-
-  // 1 = (0,l,m), 2 = (0,l,-m)
-
-  for (l = 1; l <= kymax; l++) {
-    for (m = 1; m <= kzmax; m++) {
-      sqk = (unitk[1]*l) * (unitk[1]*l) + (unitk[2]*m) * (unitk[2]*m);
-      if (sqk <= gsqmx) {
-        kxvecs[kcount] = 0;
-        kyvecs[kcount] = l;
-        kzvecs[kcount] = m;
-        ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
-        eg[kcount][0] =  0.0;
-        eg[kcount][1] =  2.0*unitk[1]*l*ug[kcount];
-        eg[kcount][2] =  2.0*unitk[2]*m*ug[kcount];
-        vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
-        vg[kcount][0] = 1.0;
-        vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
-        vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
-        vg[kcount][3] = 0.0;
-        vg[kcount][4] = 0.0;
-        vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m;
-        kcount++;
-
-        kxvecs[kcount] = 0;
-        kyvecs[kcount] = l;
-        kzvecs[kcount] = -m;
-        ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
-        eg[kcount][0] =  0.0;
-        eg[kcount][1] =  2.0*unitk[1]*l*ug[kcount];
-        eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount];
-        vg[kcount][0] = 1.0;
-        vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
-        vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
-        vg[kcount][3] = 0.0;
-        vg[kcount][4] = 0.0;
-        vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m;
-        kcount++;
-      }
-    }
-  }
-
-  // 1 = (k,0,m), 2 = (k,0,-m)
-
-  for (k = 1; k <= kxmax; k++) {
-    for (m = 1; m <= kzmax; m++) {
-      sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[2]*m) * (unitk[2]*m);
-      if (sqk <= gsqmx) {
-        kxvecs[kcount] = k;
-        kyvecs[kcount] = 0;
-        kzvecs[kcount] = m;
-        ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
-        eg[kcount][0] =  2.0*unitk[0]*k*ug[kcount];
-        eg[kcount][1] =  0.0;
-        eg[kcount][2] =  2.0*unitk[2]*m*ug[kcount];
-        vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
-        vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
-        vg[kcount][1] = 1.0;
-        vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
-        vg[kcount][3] = 0.0;
-        vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m;
-        vg[kcount][5] = 0.0;
-        kcount++;
-
-        kxvecs[kcount] = k;
-        kyvecs[kcount] = 0;
-        kzvecs[kcount] = -m;
-        ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
-        eg[kcount][0] =  2.0*unitk[0]*k*ug[kcount];
-        eg[kcount][1] =  0.0;
-        eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount];
-        vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
-        vg[kcount][1] = 1.0;
-        vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
-        vg[kcount][3] = 0.0;
-        vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m;
-        vg[kcount][5] = 0.0;
-        kcount++;
-      }
-    }
-  }
-
-  // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
-
-  for (k = 1; k <= kxmax; k++) {
-    for (l = 1; l <= kymax; l++) {
-      for (m = 1; m <= kzmax; m++) {
-        sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l) +
-          (unitk[2]*m) * (unitk[2]*m);
-        if (sqk <= gsqmx) {
-          kxvecs[kcount] = k;
-          kyvecs[kcount] = l;
-          kzvecs[kcount] = m;
-          ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
-          eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
-          eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount];
-          eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount];
-          vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
-          vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
-          vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
-          vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
-          vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l;
-          vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m;
-          vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m;
-          kcount++;
-
-          kxvecs[kcount] = k;
-          kyvecs[kcount] = -l;
-          kzvecs[kcount] = m;
-          ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
-          eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
-          eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount];
-          eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount];
-          vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
-          vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
-          vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
-          vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l;
-          vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m;
-          vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m;
-          kcount++;
-
-          kxvecs[kcount] = k;
-          kyvecs[kcount] = l;
-          kzvecs[kcount] = -m;
-          ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
-          eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
-          eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount];
-          eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount];
-          vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
-          vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
-          vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
-          vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l;
-          vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m;
-          vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m;
-          kcount++;
-
-          kxvecs[kcount] = k;
-          kyvecs[kcount] = -l;
-          kzvecs[kcount] = -m;
-          ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
-          eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
-          eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount];
-          eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount];
-          vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
-          vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
-          vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
-          vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l;
-          vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m;
-          vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m;
-          kcount++;
-        }
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   pre-compute coefficients for each Ewald K-vector for a triclinic 
-   system
-------------------------------------------------------------------------- */
-
-void Ewald::coeffs_triclinic()
-{
-  int k,l,m;
-  double sqk,vterm;
-
-  double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald);
-  double preu = 4.0*MY_PI/volume;
-
-  double unitk_lamda[3];
-
-  kcount = 0;
-
-  // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
-
-  for (k = 1; k <= kxmax; k++) {
-    for (l = -kymax; l <= kymax; l++) {
-      for (m = -kzmax; m <= kzmax; m++) {
-        unitk_lamda[0] = 2.0*MY_PI*k;
-        unitk_lamda[1] = 2.0*MY_PI*l;
-        unitk_lamda[2] = 2.0*MY_PI*m;
-        x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
-        sqk = unitk_lamda[0]*unitk_lamda[0] + unitk_lamda[1]*unitk_lamda[1] +
-          unitk_lamda[2]*unitk_lamda[2];
-        if (sqk <= gsqmx) {
-          kxvecs[kcount] = k;
-          kyvecs[kcount] = l;
-          kzvecs[kcount] = m;
-          ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
-          eg[kcount][0] = 2.0*unitk_lamda[0]*ug[kcount];
-          eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount];
-          eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount];
-          vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
-          vg[kcount][0] = 1.0 + vterm*unitk_lamda[0]*unitk_lamda[0];
-          vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1];
-          vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2];
-          vg[kcount][3] = vterm*unitk_lamda[0]*unitk_lamda[1];
-          vg[kcount][4] = vterm*unitk_lamda[0]*unitk_lamda[2];
-          vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2];
-          kcount++;
-        }
-      }
-    }
-  }
-
-  // 1 = (0,l,m), 2 = (0,l,-m)
-
-  for (l = 1; l <= kymax; l++) {
-    for (m = -kzmax; m <= kzmax; m++) {
-      unitk_lamda[0] = 0.0;
-      unitk_lamda[1] = 2.0*MY_PI*l;
-      unitk_lamda[2] = 2.0*MY_PI*m;
-      x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
-      sqk = unitk_lamda[1]*unitk_lamda[1] + unitk_lamda[2]*unitk_lamda[2];
-      if (sqk <= gsqmx) {
-        kxvecs[kcount] = 0;
-        kyvecs[kcount] = l;
-        kzvecs[kcount] = m;
-        ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
-        eg[kcount][0] =  0.0;
-        eg[kcount][1] =  2.0*unitk_lamda[1]*ug[kcount];
-        eg[kcount][2] =  2.0*unitk_lamda[2]*ug[kcount];
-        vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
-        vg[kcount][0] = 1.0;
-        vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1];
-        vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2];
-        vg[kcount][3] = 0.0;
-        vg[kcount][4] = 0.0;
-        vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2];
-        kcount++;
-      }
-    }
-  }
-
-  // (0,0,m)
-
-  for (m = 1; m <= kmax; m++) {
-    unitk_lamda[0] = 0.0;
-    unitk_lamda[1] = 0.0;
-    unitk_lamda[2] = 2.0*MY_PI*m;
-    x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
-    sqk = unitk_lamda[2]*unitk_lamda[2];
-    if (sqk <= gsqmx) {
-      kxvecs[kcount] = 0;
-      kyvecs[kcount] = 0;
-      kzvecs[kcount] = m;
-      ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
-      eg[kcount][0] = 0.0;
-      eg[kcount][1] = 0.0;
-      eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount];
-      vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
-      vg[kcount][0] = 1.0;
-      vg[kcount][1] = 1.0;
-      vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2];
-      vg[kcount][3] = 0.0;
-      vg[kcount][4] = 0.0;
-      vg[kcount][5] = 0.0;
-      kcount++;
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   allocate memory that depends on # of K-vectors
-------------------------------------------------------------------------- */
-
-void Ewald::allocate()
-{
-  kxvecs = new int[kmax3d];
-  kyvecs = new int[kmax3d];
-  kzvecs = new int[kmax3d];
-
-  ug = new double[kmax3d];
-  memory->create(eg,kmax3d,3,"ewald:eg");
-  memory->create(vg,kmax3d,6,"ewald:vg");
-
-  sfacrl = new double[kmax3d];
-  sfacim = new double[kmax3d];
-  sfacrl_all = new double[kmax3d];
-  sfacim_all = new double[kmax3d];
-}
-
-/* ----------------------------------------------------------------------
-   deallocate memory that depends on # of K-vectors
-------------------------------------------------------------------------- */
-
-void Ewald::deallocate()
-{
-  delete [] kxvecs;
-  delete [] kyvecs;
-  delete [] kzvecs;
-
-  delete [] ug;
-  memory->destroy(eg);
-  memory->destroy(vg);
-
-  delete [] sfacrl;
-  delete [] sfacim;
-  delete [] sfacrl_all;
-  delete [] sfacim_all;
-}
-
-/* ----------------------------------------------------------------------
-   Slab-geometry correction term to dampen inter-slab interactions between
-   periodically repeating slabs.  Yields good approximation to 2D Ewald if
-   adequate empty space is left between repeating slabs (J. Chem. Phys.
-   111, 3155).  Slabs defined here to be parallel to the xy plane. Also
-   extended to non-neutral systems (J. Chem. Phys. 131, 094107).
-------------------------------------------------------------------------- */
-
-void Ewald::slabcorr()
-{
-  // compute local contribution to global dipole moment
-
-  double *q = atom->q;
-  double **x = atom->x;
-  double zprd = domain->zprd;
-  int nlocal = atom->nlocal;
-
-  double dipole = 0.0;
-  for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
-
-  // sum local contributions to get global dipole moment
-
-  double dipole_all;
-  MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
-
-  // need to make non-neutral systems and/or
-  //  per-atom energy translationally invariant
-
-  double dipole_r2 = 0.0;
-  if (eflag_atom || fabs(qsum) > SMALL) {
-    for (int i = 0; i < nlocal; i++)
-      dipole_r2 += q[i]*x[i][2]*x[i][2];
-
-    // sum local contributions
-
-    double tmp;
-    MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-    dipole_r2 = tmp;
-  }
-
-  // compute corrections
-
-  const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
-    qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
-  const double qscale = force->qqrd2e * scale;
-
-  if (eflag_global) energy += qscale * e_slabcorr;
-
-  // per-atom energy
-
-  if (eflag_atom) {
-    double efact = qscale * MY_2PI/volume;
-    for (int i = 0; i < nlocal; i++)
-      eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
-        qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
-  }
-
-  // add on force corrections
-
-  double ffact = qscale * (-4.0*MY_PI/volume);
-  double **f = atom->f;
-
-  for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
-}
-
-/* ----------------------------------------------------------------------
-   memory usage of local arrays
-------------------------------------------------------------------------- */
-
-double Ewald::memory_usage()
-{
-  double bytes = 3 * kmax3d * sizeof(int);
-  bytes += (1 + 3 + 6) * kmax3d * sizeof(double);
-  bytes += 4 * kmax3d * sizeof(double);
-  bytes += nmax*3 * sizeof(double);
-  bytes += 2 * (2*kmax+1)*3*nmax * sizeof(double);
-  return bytes;
-}
-
-/* ----------------------------------------------------------------------
-   group-group interactions
- ------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   compute the Ewald total long-range force and energy for groups A and B
- ------------------------------------------------------------------------- */
-
-void Ewald::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
-{
-  if (slabflag && triclinic)
-    error->all(FLERR,"Cannot (yet) use K-space slab "
-               "correction with compute group/group for triclinic systems");
-
-  int i,k;
-
-  if (!group_allocate_flag) {
-    allocate_groups();
-    group_allocate_flag = 1;
-  }
-
-  e2group = 0.0; //energy
-  f2group[0] = 0.0; //force in x-direction
-  f2group[1] = 0.0; //force in y-direction
-  f2group[2] = 0.0; //force in z-direction
-
-  // partial and total structure factors for groups A and B
-
-  for (k = 0; k < kcount; k++) {
-
-    // group A
-
-    sfacrl_A[k] = 0.0;
-    sfacim_A[k] = 0.0;
-    sfacrl_A_all[k] = 0.0;
-    sfacim_A_all[k] = 0;
-
-    // group B
-
-    sfacrl_B[k] = 0.0;
-    sfacim_B[k] = 0.0;
-    sfacrl_B_all[k] = 0.0;
-    sfacim_B_all[k] = 0.0;
-  }
-
-  double *q = atom->q;
-  int nlocal = atom->nlocal;
-  int *mask = atom->mask;
-
-  int kx,ky,kz;
-  double cypz,sypz,exprl,expim;
-
-  // partial structure factors for groups A and B on each processor
-
-  for (k = 0; k < kcount; k++) {
-    kx = kxvecs[k];
-    ky = kyvecs[k];
-    kz = kzvecs[k];
-
-    for (i = 0; i < nlocal; i++) {
-
-      if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
-        if (AA_flag) continue;
-
-      if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
-
-        cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i];
-        sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i];
-        exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz;
-        expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz;
-
-        // group A
-
-        if (mask[i] & groupbit_A) {
-          sfacrl_A[k] += q[i]*exprl;
-          sfacim_A[k] += q[i]*expim;
-        }
-
-        // group B
-
-        if (mask[i] & groupbit_B) {
-          sfacrl_B[k] += q[i]*exprl;
-          sfacim_B[k] += q[i]*expim;
-        }
-      }
-    }
-  }
-
-  // total structure factor by summing over procs
-
-  MPI_Allreduce(sfacrl_A,sfacrl_A_all,kcount,MPI_DOUBLE,MPI_SUM,world);
-  MPI_Allreduce(sfacim_A,sfacim_A_all,kcount,MPI_DOUBLE,MPI_SUM,world);
-
-  MPI_Allreduce(sfacrl_B,sfacrl_B_all,kcount,MPI_DOUBLE,MPI_SUM,world);
-  MPI_Allreduce(sfacim_B,sfacim_B_all,kcount,MPI_DOUBLE,MPI_SUM,world);
-
-  const double qscale = force->qqrd2e * scale;
-  double partial_group;
-
-  // total group A <--> group B energy
-  // self and boundary correction terms are in compute_group_group.cpp
-
-  for (k = 0; k < kcount; k++) {
-    partial_group = sfacrl_A_all[k]*sfacrl_B_all[k] +
-      sfacim_A_all[k]*sfacim_B_all[k];
-    e2group += ug[k]*partial_group;
-  }
-
-  e2group *= qscale;
-
-  // total group A <--> group B force
-
-  for (k = 0; k < kcount; k++) {
-    partial_group = sfacim_A_all[k]*sfacrl_B_all[k] -
-      sfacrl_A_all[k]*sfacim_B_all[k];
-    f2group[0] += eg[k][0]*partial_group;
-    f2group[1] += eg[k][1]*partial_group;
-    if (slabflag != 2) f2group[2] += eg[k][2]*partial_group;
-  }
-
-  f2group[0] *= qscale;
-  f2group[1] *= qscale;
-  f2group[2] *= qscale;
-
-  // 2d slab correction
-
-  if (slabflag == 1)
-    slabcorr_groups(groupbit_A, groupbit_B, AA_flag);
-}
-
-/* ----------------------------------------------------------------------
-   Slab-geometry correction term to dampen inter-slab interactions between
-   periodically repeating slabs.  Yields good approximation to 2D Ewald if
-   adequate empty space is left between repeating slabs (J. Chem. Phys.
-   111, 3155).  Slabs defined here to be parallel to the xy plane. Also
-   extended to non-neutral systems (J. Chem. Phys. 131, 094107).
-------------------------------------------------------------------------- */
-
-void Ewald::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag)
-{
-  // compute local contribution to global dipole moment
-
-  double *q = atom->q;
-  double **x = atom->x;
-  double zprd = domain->zprd;
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
-
-  double qsum_A = 0.0;
-  double qsum_B = 0.0;
-  double dipole_A = 0.0;
-  double dipole_B = 0.0;
-  double dipole_r2_A = 0.0;
-  double dipole_r2_B = 0.0;
-
-  for (int i = 0; i < nlocal; i++) {
-    if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
-      if (AA_flag) continue;
-
-    if (mask[i] & groupbit_A) { 
-      qsum_A += q[i];
-      dipole_A += q[i]*x[i][2];
-      dipole_r2_A += q[i]*x[i][2]*x[i][2];
-    }
-
-    if (mask[i] & groupbit_B) {
-      qsum_B += q[i];
-      dipole_B += q[i]*x[i][2];
-      dipole_r2_B += q[i]*x[i][2]*x[i][2];
-    }
-  }
-
-  // sum local contributions to get total charge and global dipole moment
-  //  for each group
-
-  double tmp;
-  MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  qsum_A = tmp;
-
-  MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  qsum_B = tmp;
-
-  MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  dipole_A = tmp;
-
-  MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  dipole_B = tmp;
-
-  MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  dipole_r2_A = tmp;
-
-  MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  dipole_r2_B = tmp;
-
-  // compute corrections
-
-  const double qscale = force->qqrd2e * scale;
-  const double efact = qscale * MY_2PI/volume;
-
-  e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B +
-    qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0);
-
-  // add on force corrections
-
-  const double ffact = qscale * (-4.0*MY_PI/volume);
-  f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A);
-}
-
-/* ----------------------------------------------------------------------
-   allocate group-group memory that depends on # of K-vectors
-------------------------------------------------------------------------- */
-
-void Ewald::allocate_groups()
-{
-  // group A
-
-  sfacrl_A = new double[kmax3d];
-  sfacim_A = new double[kmax3d];
-  sfacrl_A_all = new double[kmax3d];
-  sfacim_A_all = new double[kmax3d];
-
-  // group B
-
-  sfacrl_B = new double[kmax3d];
-  sfacim_B = new double[kmax3d];
-  sfacrl_B_all = new double[kmax3d];
-  sfacim_B_all = new double[kmax3d];
-}
-
-/* ----------------------------------------------------------------------
-   deallocate group-group memory that depends on # of K-vectors
-------------------------------------------------------------------------- */
-
-void Ewald::deallocate_groups()
-{
-  // group A
-
-  delete [] sfacrl_A;
-  delete [] sfacim_A;
-  delete [] sfacrl_A_all;
-  delete [] sfacim_A_all;
-
-  // group B
-
-  delete [] sfacrl_B;
-  delete [] sfacim_B;
-  delete [] sfacrl_B_all;
-  delete [] sfacim_B_all;
-}
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
+     per-atom energy/virial added by German Samolyuk (ORNL), Stan Moore (BYU)
+     group/group energy/force added by Stan Moore (BYU)
+     triclinic added by Stan Moore (SNL)
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "stdlib.h"
+#include "stdio.h"
+#include "string.h"
+#include "math.h"
+#include "ewald.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "pair.h"
+#include "domain.h"
+#include "math_const.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.00001
+
+/* ---------------------------------------------------------------------- */
+
+Ewald::Ewald(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
+{
+  if (narg != 1) error->all(FLERR,"Illegal kspace_style ewald command");
+
+  ewaldflag = 1;
+  group_group_enable = 1;
+  group_allocate_flag = 0;
+
+  accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
+
+  kmax = 0;
+  kxvecs = kyvecs = kzvecs = NULL;
+  ug = NULL;
+  eg = vg = NULL;
+  sfacrl = sfacim = sfacrl_all = sfacim_all = NULL;
+
+  nmax = 0;
+  ek = NULL;
+  cs = sn = NULL;
+
+  kcount = 0;
+}
+
+/* ----------------------------------------------------------------------
+   free all memory
+------------------------------------------------------------------------- */
+
+Ewald::~Ewald()
+{
+  deallocate();
+  if (group_allocate_flag) deallocate_groups();
+  memory->destroy(ek);
+  memory->destroy3d_offset(cs,-kmax_created);
+  memory->destroy3d_offset(sn,-kmax_created);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void Ewald::init()
+{
+  if (comm->me == 0) {
+    if (screen) fprintf(screen,"Ewald initialization ...\n");
+    if (logfile) fprintf(logfile,"Ewald initialization ...\n");
+  }
+
+  // error check
+  
+  triclinic_check();
+  if (domain->dimension == 2)
+    error->all(FLERR,"Cannot use Ewald with 2d simulation");
+
+  if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
+
+  if (slabflag == 0 && domain->nonperiodic > 0)
+    error->all(FLERR,"Cannot use nonperiodic boundaries with Ewald");
+  if (slabflag) {
+    if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
+        domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
+      error->all(FLERR,"Incorrect boundaries with slab Ewald");
+    if (domain->triclinic)
+      error->all(FLERR,"Cannot (yet) use Ewald with triclinic box "
+                 "and slab correction");
+  }
+
+  // extract short-range Coulombic cutoff from pair style
+
+  scale = 1.0;
+
+  pair_check();
+
+  int itmp;
+  double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
+  if (p_cutoff == NULL)
+    error->all(FLERR,"KSpace style is incompatible with Pair style");
+  double cutoff = *p_cutoff;
+
+  qsum = qsqsum = 0.0;
+  for (int i = 0; i < atom->nlocal; i++) {
+    qsum += atom->q[i];
+    qsqsum += atom->q[i]*atom->q[i];
+  }
+
+  double tmp;
+  MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  qsum = tmp;
+  MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  qsqsum = tmp;
+
+  if (qsqsum == 0.0)
+    error->all(FLERR,"Cannot use kspace solver on system with no charge");
+  if (fabs(qsum) > SMALL && comm->me == 0) {
+    char str[128];
+    sprintf(str,"System is not charge neutral, net charge = %g",qsum);
+    error->warning(FLERR,str);
+  }
+
+  // set accuracy (force units) from accuracy_relative or accuracy_absolute
+
+  if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
+  else accuracy = accuracy_relative * two_charge_force;
+
+  // setup K-space resolution
+
+  q2 = qsqsum * force->qqrd2e;
+  bigint natoms = atom->natoms;
+
+  triclinic = domain->triclinic;
+
+  // use xprd,yprd,zprd even if triclinic so grid size is the same
+  // adjust z dimension for 2d slab Ewald
+  // 3d Ewald just uses zprd since slab_volfactor = 1.0
+
+  double xprd = domain->xprd;
+  double yprd = domain->yprd;
+  double zprd = domain->zprd;
+  double zprd_slab = zprd*slab_volfactor;
+
+  // make initial g_ewald estimate
+  // based on desired accuracy and real space cutoff
+  // fluid-occupied volume used to estimate real-space error
+  // zprd used rather than zprd_slab
+
+  if (!gewaldflag) {
+    if (accuracy <= 0.0)
+      error->all(FLERR,"KSpace accuracy must be > 0");
+    g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
+    if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff;
+    else g_ewald = sqrt(-log(g_ewald)) / cutoff;
+  }
+
+  // setup Ewald coefficients so can print stats
+
+  setup();
+
+  // final RMS accuracy
+
+  double lprx = rms(kxmax_orig,xprd,natoms,q2);
+  double lpry = rms(kymax_orig,yprd,natoms,q2);
+  double lprz = rms(kzmax_orig,zprd_slab,natoms,q2);
+  double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
+  double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab);
+  double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff);
+  double tpr = estimate_table_accuracy(q2_over_sqrt,spr);
+  double estimated_accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr);
+
+  // stats
+
+  if (comm->me == 0) {
+    if (screen) {
+      fprintf(screen,"  G vector (1/distance) = %g\n",g_ewald);
+      fprintf(screen,"  estimated absolute RMS force accuracy = %g\n",
+              estimated_accuracy);
+      fprintf(screen,"  estimated relative force accuracy = %g\n",
+              estimated_accuracy/two_charge_force);
+      fprintf(screen,"  KSpace vectors: actual max1d max3d = %d %d %d\n",
+              kcount,kmax,kmax3d);
+      fprintf(screen,"                  kxmax kymax kzmax  = %d %d %d\n",
+              kxmax,kymax,kzmax);
+    }
+    if (logfile) {
+      fprintf(logfile,"  G vector (1/distance) = %g\n",g_ewald);
+      fprintf(logfile,"  estimated absolute RMS force accuracy = %g\n",
+              estimated_accuracy);
+      fprintf(logfile,"  estimated relative force accuracy = %g\n",
+              estimated_accuracy/two_charge_force);
+      fprintf(logfile,"  KSpace vectors: actual max1d max3d = %d %d %d\n",
+              kcount,kmax,kmax3d);
+      fprintf(logfile,"                  kxmax kymax kzmax  = %d %d %d\n",
+              kxmax,kymax,kzmax);
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   adjust Ewald coeffs, called initially and whenever volume has changed
+------------------------------------------------------------------------- */
+
+void Ewald::setup()
+{
+  // volume-dependent factors
+
+  double xprd = domain->xprd;
+  double yprd = domain->yprd;
+  double zprd = domain->zprd;
+
+  // adjustment of z dimension for 2d slab Ewald
+  // 3d Ewald just uses zprd since slab_volfactor = 1.0
+
+  double zprd_slab = zprd*slab_volfactor;
+  volume = xprd * yprd * zprd_slab;
+
+  unitk[0] = 2.0*MY_PI/xprd;
+  unitk[1] = 2.0*MY_PI/yprd;
+  unitk[2] = 2.0*MY_PI/zprd_slab;
+
+  int kmax_old = kmax;
+
+  if (kewaldflag == 0) {
+
+    // determine kmax
+    // function of current box size, accuracy, G_ewald (short-range cutoff)
+
+    bigint natoms = atom->natoms;
+    double err;
+    kxmax = 1;
+    kymax = 1;
+    kzmax = 1;
+
+    err = rms(kxmax,xprd,natoms,q2);
+    while (err > accuracy) {
+      kxmax++;
+      err = rms(kxmax,xprd,natoms,q2);
+    }
+
+    err = rms(kymax,yprd,natoms,q2);
+    while (err > accuracy) {
+      kymax++;
+      err = rms(kymax,yprd,natoms,q2);
+    }
+
+    err = rms(kzmax,zprd_slab,natoms,q2);
+    while (err > accuracy) {
+      kzmax++;
+      err = rms(kzmax,zprd_slab,natoms,q2);
+    }
+
+    kmax = MAX(kxmax,kymax);
+    kmax = MAX(kmax,kzmax);
+    kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax;
+    
+    double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax;
+    double gsqymx = unitk[1]*unitk[1]*kymax*kymax;
+    double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax;
+    gsqmx = MAX(gsqxmx,gsqymx);
+    gsqmx = MAX(gsqmx,gsqzmx);
+    
+    kxmax_orig = kxmax;
+    kymax_orig = kymax;
+    kzmax_orig = kzmax;
+
+    // scale lattice vectors for triclinic skew
+    
+    if (triclinic) {
+      double tmp[3];
+      tmp[0] = kxmax/xprd;
+      tmp[1] = kymax/yprd;
+      tmp[2] = kzmax/zprd;
+      lamda2xT(&tmp[0],&tmp[0]);
+      kxmax = MAX(1,static_cast<int>(tmp[0]));
+      kymax = MAX(1,static_cast<int>(tmp[1]));
+      kzmax = MAX(1,static_cast<int>(tmp[2]));
+      
+      kmax = MAX(kxmax,kymax);
+      kmax = MAX(kmax,kzmax);
+      kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax;
+    }
+
+  } else {
+
+    kxmax = kx_ewald;
+    kymax = ky_ewald;
+    kzmax = kz_ewald;
+    
+    kxmax_orig = kxmax;
+    kymax_orig = kymax;
+    kzmax_orig = kzmax;
+
+    kmax = MAX(kxmax,kymax);
+    kmax = MAX(kmax,kzmax);
+    kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax;
+
+    double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax;
+    double gsqymx = unitk[1]*unitk[1]*kymax*kymax;
+    double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax;
+    gsqmx = MAX(gsqxmx,gsqymx);
+    gsqmx = MAX(gsqmx,gsqzmx);
+  }
+
+  gsqmx *= 1.00001;
+
+  // if size has grown, reallocate k-dependent and nlocal-dependent arrays
+
+  if (kmax > kmax_old) {
+    deallocate();
+    allocate();
+    group_allocate_flag = 0;
+
+    memory->destroy(ek);
+    memory->destroy3d_offset(cs,-kmax_created);
+    memory->destroy3d_offset(sn,-kmax_created);
+    nmax = atom->nmax;
+    memory->create(ek,nmax,3,"ewald:ek");
+    memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs");
+    memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn");
+    kmax_created = kmax;
+  }
+
+  // pre-compute Ewald coefficients
+
+  if (triclinic == 0)
+    coeffs();
+  else
+    coeffs_triclinic();
+}
+
+/* ----------------------------------------------------------------------
+   compute RMS accuracy for a dimension
+------------------------------------------------------------------------- */
+
+double Ewald::rms(int km, double prd, bigint natoms, double q2)
+{
+  double value = 2.0*q2*g_ewald/prd *
+    sqrt(1.0/(MY_PI*km*natoms)) *
+    exp(-MY_PI*MY_PI*km*km/(g_ewald*g_ewald*prd*prd));
+
+  return value;
+}
+
+/* ----------------------------------------------------------------------
+   compute the Ewald long-range force, energy, virial
+------------------------------------------------------------------------- */
+
+void Ewald::compute(int eflag, int vflag)
+{
+  int i,j,k;
+
+  // set energy/virial flags
+
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = evflag_atom = eflag_global = vflag_global =
+         eflag_atom = vflag_atom = 0;
+
+  // extend size of per-atom arrays if necessary
+
+  if (atom->nlocal > nmax) {
+    memory->destroy(ek);
+    memory->destroy3d_offset(cs,-kmax_created);
+    memory->destroy3d_offset(sn,-kmax_created);
+    nmax = atom->nmax;
+    memory->create(ek,nmax,3,"ewald:ek");
+    memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs");
+    memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn");
+    kmax_created = kmax;
+  }
+
+  // partial structure factors on each processor
+  // total structure factor by summing over procs
+
+  if (triclinic == 0)
+    eik_dot_r();
+  else
+    eik_dot_r_triclinic();
+
+  MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+  MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+
+  // K-space portion of electric field
+  // double loop over K-vectors and local atoms
+  // perform per-atom calculations if needed
+
+  double **f = atom->f;
+  double *q = atom->q;
+  int nlocal = atom->nlocal;
+
+  int kx,ky,kz;
+  double cypz,sypz,exprl,expim,partial,partial_peratom;
+
+  for (i = 0; i < nlocal; i++) {
+    ek[i][0] = 0.0;
+    ek[i][1] = 0.0;
+    ek[i][2] = 0.0;
+  }
+
+  for (k = 0; k < kcount; k++) {
+    kx = kxvecs[k];
+    ky = kyvecs[k];
+    kz = kzvecs[k];
+
+    for (i = 0; i < nlocal; i++) {
+      cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i];
+      sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i];
+      exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz;
+      expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz;
+      partial = expim*sfacrl_all[k] - exprl*sfacim_all[k];
+      ek[i][0] += partial*eg[k][0];
+      ek[i][1] += partial*eg[k][1];
+      ek[i][2] += partial*eg[k][2];
+
+      if (evflag_atom) {
+        partial_peratom = exprl*sfacrl_all[k] + expim*sfacim_all[k];
+        if (eflag_atom) eatom[i] += q[i]*ug[k]*partial_peratom;
+        if (vflag_atom)
+          for (j = 0; j < 6; j++)
+            vatom[i][j] += ug[k]*vg[k][j]*partial_peratom;
+      }
+    }
+  }
+
+  // convert E-field to force
+
+  const double qscale = force->qqrd2e * scale;
+
+  for (i = 0; i < nlocal; i++) {
+    f[i][0] += qscale * q[i]*ek[i][0];
+    f[i][1] += qscale * q[i]*ek[i][1];
+    if (slabflag != 2) f[i][2] += qscale * q[i]*ek[i][2];
+  }
+
+  // global energy
+
+  if (eflag_global) {
+    for (k = 0; k < kcount; k++)
+      energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] +
+                         sfacim_all[k]*sfacim_all[k]);
+    energy -= g_ewald*qsqsum/MY_PIS +
+      MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
+    energy *= qscale;
+  }
+
+  // global virial
+
+  if (vflag_global) {
+    double uk;
+    for (k = 0; k < kcount; k++) {
+      uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]);
+      for (j = 0; j < 6; j++) virial[j] += uk*vg[k][j];
+    }
+    for (j = 0; j < 6; j++) virial[j] *= qscale;
+  }
+
+  // per-atom energy/virial
+  // energy includes self-energy correction
+
+  if (evflag_atom) {
+    if (eflag_atom) {
+      for (i = 0; i < nlocal; i++) {
+        eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
+          (g_ewald*g_ewald*volume);
+        eatom[i] *= qscale;
+      }
+    }
+
+    if (vflag_atom)
+      for (i = 0; i < nlocal; i++)
+        for (j = 0; j < 6; j++) vatom[i][j] *= q[i]*qscale;
+  }
+
+  // 2d slab correction
+
+  if (slabflag == 1) slabcorr();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void Ewald::eik_dot_r()
+{
+  int i,k,l,m,n,ic;
+  double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4;
+  double sqk,clpm,slpm;
+
+  double **x = atom->x;
+  double *q = atom->q;
+  int nlocal = atom->nlocal;
+
+  n = 0;
+
+  // (k,0,0), (0,l,0), (0,0,m)
+
+  for (ic = 0; ic < 3; ic++) {
+    sqk = unitk[ic]*unitk[ic];
+    if (sqk <= gsqmx) {
+      cstr1 = 0.0;
+      sstr1 = 0.0;
+      for (i = 0; i < nlocal; i++) {
+        cs[0][ic][i] = 1.0;
+        sn[0][ic][i] = 0.0;
+        cs[1][ic][i] = cos(unitk[ic]*x[i][ic]);
+        sn[1][ic][i] = sin(unitk[ic]*x[i][ic]);
+        cs[-1][ic][i] = cs[1][ic][i];
+        sn[-1][ic][i] = -sn[1][ic][i];
+        cstr1 += q[i]*cs[1][ic][i];
+        sstr1 += q[i]*sn[1][ic][i];
+      }
+      sfacrl[n] = cstr1;
+      sfacim[n++] = sstr1;
+    }
+  }
+
+  for (m = 2; m <= kmax; m++) {
+    for (ic = 0; ic < 3; ic++) {
+      sqk = m*unitk[ic] * m*unitk[ic];
+      if (sqk <= gsqmx) {
+        cstr1 = 0.0;
+        sstr1 = 0.0;
+        for (i = 0; i < nlocal; i++) {
+          cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] -
+            sn[m-1][ic][i]*sn[1][ic][i];
+          sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] +
+            cs[m-1][ic][i]*sn[1][ic][i];
+          cs[-m][ic][i] = cs[m][ic][i];
+          sn[-m][ic][i] = -sn[m][ic][i];
+          cstr1 += q[i]*cs[m][ic][i];
+          sstr1 += q[i]*sn[m][ic][i];
+        }
+        sfacrl[n] = cstr1;
+        sfacim[n++] = sstr1;
+      }
+    }
+  }
+
+  // 1 = (k,l,0), 2 = (k,-l,0)
+
+  for (k = 1; k <= kxmax; k++) {
+    for (l = 1; l <= kymax; l++) {
+      sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]);
+      if (sqk <= gsqmx) {
+        cstr1 = 0.0;
+        sstr1 = 0.0;
+        cstr2 = 0.0;
+        sstr2 = 0.0;
+        for (i = 0; i < nlocal; i++) {
+          cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]);
+          sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]);
+          cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]);
+          sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]);
+        }
+        sfacrl[n] = cstr1;
+        sfacim[n++] = sstr1;
+        sfacrl[n] = cstr2;
+        sfacim[n++] = sstr2;
+      }
+    }
+  }
+
+  // 1 = (0,l,m), 2 = (0,l,-m)
+
+  for (l = 1; l <= kymax; l++) {
+    for (m = 1; m <= kzmax; m++) {
+      sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]);
+      if (sqk <= gsqmx) {
+        cstr1 = 0.0;
+        sstr1 = 0.0;
+        cstr2 = 0.0;
+        sstr2 = 0.0;
+        for (i = 0; i < nlocal; i++) {
+          cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]);
+          sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]);
+          cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]);
+          sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]);
+        }
+        sfacrl[n] = cstr1;
+        sfacim[n++] = sstr1;
+        sfacrl[n] = cstr2;
+        sfacim[n++] = sstr2;
+      }
+    }
+  }
+
+  // 1 = (k,0,m), 2 = (k,0,-m)
+
+  for (k = 1; k <= kxmax; k++) {
+    for (m = 1; m <= kzmax; m++) {
+      sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]);
+      if (sqk <= gsqmx) {
+        cstr1 = 0.0;
+        sstr1 = 0.0;
+        cstr2 = 0.0;
+        sstr2 = 0.0;
+        for (i = 0; i < nlocal; i++) {
+          cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]);
+          sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]);
+          cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]);
+          sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]);
+        }
+        sfacrl[n] = cstr1;
+        sfacim[n++] = sstr1;
+        sfacrl[n] = cstr2;
+        sfacim[n++] = sstr2;
+      }
+    }
+  }
+
+  // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
+
+  for (k = 1; k <= kxmax; k++) {
+    for (l = 1; l <= kymax; l++) {
+      for (m = 1; m <= kzmax; m++) {
+        sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) +
+          (m*unitk[2] * m*unitk[2]);
+        if (sqk <= gsqmx) {
+          cstr1 = 0.0;
+          sstr1 = 0.0;
+          cstr2 = 0.0;
+          sstr2 = 0.0;
+          cstr3 = 0.0;
+          sstr3 = 0.0;
+          cstr4 = 0.0;
+          sstr4 = 0.0;
+          for (i = 0; i < nlocal; i++) {
+            clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
+            slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
+            cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+            sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+
+            clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
+            slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
+            cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+            sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+
+            clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
+            slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
+            cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+            sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+
+            clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
+            slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
+            cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+            sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+          }
+          sfacrl[n] = cstr1;
+          sfacim[n++] = sstr1;
+          sfacrl[n] = cstr2;
+          sfacim[n++] = sstr2;
+          sfacrl[n] = cstr3;
+          sfacim[n++] = sstr3;
+          sfacrl[n] = cstr4;
+          sfacim[n++] = sstr4;
+        }
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void Ewald::eik_dot_r_triclinic()
+{
+  int i,k,l,m,n,ic;
+  double cstr1,sstr1;
+  double sqk,clpm,slpm;
+
+  double **x = atom->x;
+  double *q = atom->q;
+  int nlocal = atom->nlocal;
+
+  double unitk_lamda[3];
+
+  double max_kvecs[3];
+  max_kvecs[0] = kxmax;
+  max_kvecs[1] = kymax;
+  max_kvecs[2] = kzmax;
+
+  // (k,0,0), (0,l,0), (0,0,m)
+
+  for (ic = 0; ic < 3; ic++) {
+    unitk_lamda[0] = 0.0;
+    unitk_lamda[1] = 0.0;
+    unitk_lamda[2] = 0.0;
+    unitk_lamda[ic] = 2.0*MY_PI;
+    x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
+    sqk = unitk_lamda[ic]*unitk_lamda[ic];
+    if (sqk <= gsqmx) {
+      for (i = 0; i < nlocal; i++) {
+        cs[0][ic][i] = 1.0;
+        sn[0][ic][i] = 0.0;
+        cs[1][ic][i] = cos(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]);
+        sn[1][ic][i] = sin(unitk_lamda[0]*x[i][0] + unitk_lamda[1]*x[i][1] + unitk_lamda[2]*x[i][2]);
+        cs[-1][ic][i] = cs[1][ic][i];
+        sn[-1][ic][i] = -sn[1][ic][i];
+      }
+    }
+  }
+
+  for (ic = 0; ic < 3; ic++) {
+    for (m = 2; m <= max_kvecs[ic]; m++) {
+      unitk_lamda[0] = 0.0;
+      unitk_lamda[1] = 0.0;
+      unitk_lamda[2] = 0.0;
+      unitk_lamda[ic] = 2.0*MY_PI*m;
+      x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
+      sqk = unitk_lamda[ic]*unitk_lamda[ic];
+      for (i = 0; i < nlocal; i++) {
+        cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] -
+          sn[m-1][ic][i]*sn[1][ic][i];
+        sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] +
+          cs[m-1][ic][i]*sn[1][ic][i];
+        cs[-m][ic][i] = cs[m][ic][i];
+        sn[-m][ic][i] = -sn[m][ic][i];
+      }
+    }
+  }
+
+  for (n = 0; n < kcount; n++) {
+    k = kxvecs[n];
+    l = kyvecs[n];
+    m = kzvecs[n];
+    cstr1 = 0.0;
+    sstr1 = 0.0;
+    for (i = 0; i < nlocal; i++) {
+      clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
+      slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
+      cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+      sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+    }
+    sfacrl[n] = cstr1;
+    sfacim[n] = sstr1;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pre-compute coefficients for each Ewald K-vector
+------------------------------------------------------------------------- */
+
+void Ewald::coeffs()
+{
+  int k,l,m;
+  double sqk,vterm;
+
+  double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald);
+  double preu = 4.0*MY_PI/volume;
+
+  kcount = 0;
+
+  // (k,0,0), (0,l,0), (0,0,m)
+
+  for (m = 1; m <= kmax; m++) {
+    sqk = (m*unitk[0]) * (m*unitk[0]);
+    if (sqk <= gsqmx) {
+      kxvecs[kcount] = m;
+      kyvecs[kcount] = 0;
+      kzvecs[kcount] = 0;
+      ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+      eg[kcount][0] = 2.0*unitk[0]*m*ug[kcount];
+      eg[kcount][1] = 0.0;
+      eg[kcount][2] = 0.0;
+      vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+      vg[kcount][0] = 1.0 + vterm*(unitk[0]*m)*(unitk[0]*m);
+      vg[kcount][1] = 1.0;
+      vg[kcount][2] = 1.0;
+      vg[kcount][3] = 0.0;
+      vg[kcount][4] = 0.0;
+      vg[kcount][5] = 0.0;
+      kcount++;
+    }
+    sqk = (m*unitk[1]) * (m*unitk[1]);
+    if (sqk <= gsqmx) {
+      kxvecs[kcount] = 0;
+      kyvecs[kcount] = m;
+      kzvecs[kcount] = 0;
+      ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+      eg[kcount][0] = 0.0;
+      eg[kcount][1] = 2.0*unitk[1]*m*ug[kcount];
+      eg[kcount][2] = 0.0;
+      vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+      vg[kcount][0] = 1.0;
+      vg[kcount][1] = 1.0 + vterm*(unitk[1]*m)*(unitk[1]*m);
+      vg[kcount][2] = 1.0;
+      vg[kcount][3] = 0.0;
+      vg[kcount][4] = 0.0;
+      vg[kcount][5] = 0.0;
+      kcount++;
+    }
+    sqk = (m*unitk[2]) * (m*unitk[2]);
+    if (sqk <= gsqmx) {
+      kxvecs[kcount] = 0;
+      kyvecs[kcount] = 0;
+      kzvecs[kcount] = m;
+      ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+      eg[kcount][0] = 0.0;
+      eg[kcount][1] = 0.0;
+      eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount];
+      vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+      vg[kcount][0] = 1.0;
+      vg[kcount][1] = 1.0;
+      vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+      vg[kcount][3] = 0.0;
+      vg[kcount][4] = 0.0;
+      vg[kcount][5] = 0.0;
+      kcount++;
+    }
+  }
+
+  // 1 = (k,l,0), 2 = (k,-l,0)
+
+  for (k = 1; k <= kxmax; k++) {
+    for (l = 1; l <= kymax; l++) {
+      sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l);
+      if (sqk <= gsqmx) {
+        kxvecs[kcount] = k;
+        kyvecs[kcount] = l;
+        kzvecs[kcount] = 0;
+        ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+        eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
+        eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount];
+        eg[kcount][2] = 0.0;
+        vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+        vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
+        vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
+        vg[kcount][2] = 1.0;
+        vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l;
+        vg[kcount][4] = 0.0;
+        vg[kcount][5] = 0.0;
+        kcount++;
+
+        kxvecs[kcount] = k;
+        kyvecs[kcount] = -l;
+        kzvecs[kcount] = 0;
+        ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+        eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
+        eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount];
+        eg[kcount][2] = 0.0;
+        vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
+        vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
+        vg[kcount][2] = 1.0;
+        vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l;
+        vg[kcount][4] = 0.0;
+        vg[kcount][5] = 0.0;
+        kcount++;;
+      }
+    }
+  }
+
+  // 1 = (0,l,m), 2 = (0,l,-m)
+
+  for (l = 1; l <= kymax; l++) {
+    for (m = 1; m <= kzmax; m++) {
+      sqk = (unitk[1]*l) * (unitk[1]*l) + (unitk[2]*m) * (unitk[2]*m);
+      if (sqk <= gsqmx) {
+        kxvecs[kcount] = 0;
+        kyvecs[kcount] = l;
+        kzvecs[kcount] = m;
+        ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+        eg[kcount][0] =  0.0;
+        eg[kcount][1] =  2.0*unitk[1]*l*ug[kcount];
+        eg[kcount][2] =  2.0*unitk[2]*m*ug[kcount];
+        vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+        vg[kcount][0] = 1.0;
+        vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
+        vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+        vg[kcount][3] = 0.0;
+        vg[kcount][4] = 0.0;
+        vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m;
+        kcount++;
+
+        kxvecs[kcount] = 0;
+        kyvecs[kcount] = l;
+        kzvecs[kcount] = -m;
+        ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+        eg[kcount][0] =  0.0;
+        eg[kcount][1] =  2.0*unitk[1]*l*ug[kcount];
+        eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount];
+        vg[kcount][0] = 1.0;
+        vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
+        vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+        vg[kcount][3] = 0.0;
+        vg[kcount][4] = 0.0;
+        vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m;
+        kcount++;
+      }
+    }
+  }
+
+  // 1 = (k,0,m), 2 = (k,0,-m)
+
+  for (k = 1; k <= kxmax; k++) {
+    for (m = 1; m <= kzmax; m++) {
+      sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[2]*m) * (unitk[2]*m);
+      if (sqk <= gsqmx) {
+        kxvecs[kcount] = k;
+        kyvecs[kcount] = 0;
+        kzvecs[kcount] = m;
+        ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+        eg[kcount][0] =  2.0*unitk[0]*k*ug[kcount];
+        eg[kcount][1] =  0.0;
+        eg[kcount][2] =  2.0*unitk[2]*m*ug[kcount];
+        vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+        vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
+        vg[kcount][1] = 1.0;
+        vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+        vg[kcount][3] = 0.0;
+        vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m;
+        vg[kcount][5] = 0.0;
+        kcount++;
+
+        kxvecs[kcount] = k;
+        kyvecs[kcount] = 0;
+        kzvecs[kcount] = -m;
+        ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+        eg[kcount][0] =  2.0*unitk[0]*k*ug[kcount];
+        eg[kcount][1] =  0.0;
+        eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount];
+        vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
+        vg[kcount][1] = 1.0;
+        vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+        vg[kcount][3] = 0.0;
+        vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m;
+        vg[kcount][5] = 0.0;
+        kcount++;
+      }
+    }
+  }
+
+  // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
+
+  for (k = 1; k <= kxmax; k++) {
+    for (l = 1; l <= kymax; l++) {
+      for (m = 1; m <= kzmax; m++) {
+        sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l) +
+          (unitk[2]*m) * (unitk[2]*m);
+        if (sqk <= gsqmx) {
+          kxvecs[kcount] = k;
+          kyvecs[kcount] = l;
+          kzvecs[kcount] = m;
+          ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+          eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
+          eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount];
+          eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount];
+          vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+          vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
+          vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
+          vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+          vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l;
+          vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m;
+          vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m;
+          kcount++;
+
+          kxvecs[kcount] = k;
+          kyvecs[kcount] = -l;
+          kzvecs[kcount] = m;
+          ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+          eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
+          eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount];
+          eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount];
+          vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
+          vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
+          vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+          vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l;
+          vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m;
+          vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m;
+          kcount++;
+
+          kxvecs[kcount] = k;
+          kyvecs[kcount] = l;
+          kzvecs[kcount] = -m;
+          ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+          eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
+          eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount];
+          eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount];
+          vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
+          vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
+          vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+          vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l;
+          vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m;
+          vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m;
+          kcount++;
+
+          kxvecs[kcount] = k;
+          kyvecs[kcount] = -l;
+          kzvecs[kcount] = -m;
+          ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+          eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount];
+          eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount];
+          eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount];
+          vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k);
+          vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l);
+          vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m);
+          vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l;
+          vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m;
+          vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m;
+          kcount++;
+        }
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pre-compute coefficients for each Ewald K-vector for a triclinic 
+   system
+------------------------------------------------------------------------- */
+
+void Ewald::coeffs_triclinic()
+{
+  int k,l,m;
+  double sqk,vterm;
+
+  double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald);
+  double preu = 4.0*MY_PI/volume;
+
+  double unitk_lamda[3];
+
+  kcount = 0;
+
+  // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
+
+  for (k = 1; k <= kxmax; k++) {
+    for (l = -kymax; l <= kymax; l++) {
+      for (m = -kzmax; m <= kzmax; m++) {
+        unitk_lamda[0] = 2.0*MY_PI*k;
+        unitk_lamda[1] = 2.0*MY_PI*l;
+        unitk_lamda[2] = 2.0*MY_PI*m;
+        x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
+        sqk = unitk_lamda[0]*unitk_lamda[0] + unitk_lamda[1]*unitk_lamda[1] +
+          unitk_lamda[2]*unitk_lamda[2];
+        if (sqk <= gsqmx) {
+          kxvecs[kcount] = k;
+          kyvecs[kcount] = l;
+          kzvecs[kcount] = m;
+          ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+          eg[kcount][0] = 2.0*unitk_lamda[0]*ug[kcount];
+          eg[kcount][1] = 2.0*unitk_lamda[1]*ug[kcount];
+          eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount];
+          vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+          vg[kcount][0] = 1.0 + vterm*unitk_lamda[0]*unitk_lamda[0];
+          vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1];
+          vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2];
+          vg[kcount][3] = vterm*unitk_lamda[0]*unitk_lamda[1];
+          vg[kcount][4] = vterm*unitk_lamda[0]*unitk_lamda[2];
+          vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2];
+          kcount++;
+        }
+      }
+    }
+  }
+
+  // 1 = (0,l,m), 2 = (0,l,-m)
+
+  for (l = 1; l <= kymax; l++) {
+    for (m = -kzmax; m <= kzmax; m++) {
+      unitk_lamda[0] = 0.0;
+      unitk_lamda[1] = 2.0*MY_PI*l;
+      unitk_lamda[2] = 2.0*MY_PI*m;
+      x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
+      sqk = unitk_lamda[1]*unitk_lamda[1] + unitk_lamda[2]*unitk_lamda[2];
+      if (sqk <= gsqmx) {
+        kxvecs[kcount] = 0;
+        kyvecs[kcount] = l;
+        kzvecs[kcount] = m;
+        ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+        eg[kcount][0] =  0.0;
+        eg[kcount][1] =  2.0*unitk_lamda[1]*ug[kcount];
+        eg[kcount][2] =  2.0*unitk_lamda[2]*ug[kcount];
+        vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+        vg[kcount][0] = 1.0;
+        vg[kcount][1] = 1.0 + vterm*unitk_lamda[1]*unitk_lamda[1];
+        vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2];
+        vg[kcount][3] = 0.0;
+        vg[kcount][4] = 0.0;
+        vg[kcount][5] = vterm*unitk_lamda[1]*unitk_lamda[2];
+        kcount++;
+      }
+    }
+  }
+
+  // (0,0,m)
+
+  for (m = 1; m <= kmax; m++) {
+    unitk_lamda[0] = 0.0;
+    unitk_lamda[1] = 0.0;
+    unitk_lamda[2] = 2.0*MY_PI*m;
+    x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
+    sqk = unitk_lamda[2]*unitk_lamda[2];
+    if (sqk <= gsqmx) {
+      kxvecs[kcount] = 0;
+      kyvecs[kcount] = 0;
+      kzvecs[kcount] = m;
+      ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+      eg[kcount][0] = 0.0;
+      eg[kcount][1] = 0.0;
+      eg[kcount][2] = 2.0*unitk_lamda[2]*ug[kcount];
+      vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+      vg[kcount][0] = 1.0;
+      vg[kcount][1] = 1.0;
+      vg[kcount][2] = 1.0 + vterm*unitk_lamda[2]*unitk_lamda[2];
+      vg[kcount][3] = 0.0;
+      vg[kcount][4] = 0.0;
+      vg[kcount][5] = 0.0;
+      kcount++;
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   allocate memory that depends on # of K-vectors
+------------------------------------------------------------------------- */
+
+void Ewald::allocate()
+{
+  kxvecs = new int[kmax3d];
+  kyvecs = new int[kmax3d];
+  kzvecs = new int[kmax3d];
+
+  ug = new double[kmax3d];
+  memory->create(eg,kmax3d,3,"ewald:eg");
+  memory->create(vg,kmax3d,6,"ewald:vg");
+
+  sfacrl = new double[kmax3d];
+  sfacim = new double[kmax3d];
+  sfacrl_all = new double[kmax3d];
+  sfacim_all = new double[kmax3d];
+}
+
+/* ----------------------------------------------------------------------
+   deallocate memory that depends on # of K-vectors
+------------------------------------------------------------------------- */
+
+void Ewald::deallocate()
+{
+  delete [] kxvecs;
+  delete [] kyvecs;
+  delete [] kzvecs;
+
+  delete [] ug;
+  memory->destroy(eg);
+  memory->destroy(vg);
+
+  delete [] sfacrl;
+  delete [] sfacim;
+  delete [] sfacrl_all;
+  delete [] sfacim_all;
+}
+
+/* ----------------------------------------------------------------------
+   Slab-geometry correction term to dampen inter-slab interactions between
+   periodically repeating slabs.  Yields good approximation to 2D Ewald if
+   adequate empty space is left between repeating slabs (J. Chem. Phys.
+   111, 3155).  Slabs defined here to be parallel to the xy plane. Also
+   extended to non-neutral systems (J. Chem. Phys. 131, 094107).
+------------------------------------------------------------------------- */
+
+void Ewald::slabcorr()
+{
+  // compute local contribution to global dipole moment
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double zprd = domain->zprd;
+  int nlocal = atom->nlocal;
+
+  double dipole = 0.0;
+  for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
+
+  // sum local contributions to get global dipole moment
+
+  double dipole_all;
+  MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
+
+  // need to make non-neutral systems and/or
+  //  per-atom energy translationally invariant
+
+  double dipole_r2 = 0.0;
+  if (eflag_atom || fabs(qsum) > SMALL) {
+    for (int i = 0; i < nlocal; i++)
+      dipole_r2 += q[i]*x[i][2]*x[i][2];
+
+    // sum local contributions
+
+    double tmp;
+    MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+    dipole_r2 = tmp;
+  }
+
+  // compute corrections
+
+  const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
+    qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
+  const double qscale = force->qqrd2e * scale;
+
+  if (eflag_global) energy += qscale * e_slabcorr;
+
+  // per-atom energy
+
+  if (eflag_atom) {
+    double efact = qscale * MY_2PI/volume;
+    for (int i = 0; i < nlocal; i++)
+      eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
+        qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
+  }
+
+  // add on force corrections
+
+  double ffact = qscale * (-4.0*MY_PI/volume);
+  double **f = atom->f;
+
+  for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local arrays
+------------------------------------------------------------------------- */
+
+double Ewald::memory_usage()
+{
+  double bytes = 3 * kmax3d * sizeof(int);
+  bytes += (1 + 3 + 6) * kmax3d * sizeof(double);
+  bytes += 4 * kmax3d * sizeof(double);
+  bytes += nmax*3 * sizeof(double);
+  bytes += 2 * (2*kmax+1)*3*nmax * sizeof(double);
+  return bytes;
+}
+
+/* ----------------------------------------------------------------------
+   group-group interactions
+ ------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   compute the Ewald total long-range force and energy for groups A and B
+ ------------------------------------------------------------------------- */
+
+void Ewald::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
+{
+  if (slabflag && triclinic)
+    error->all(FLERR,"Cannot (yet) use K-space slab "
+               "correction with compute group/group for triclinic systems");
+
+  int i,k;
+
+  if (!group_allocate_flag) {
+    allocate_groups();
+    group_allocate_flag = 1;
+  }
+
+  e2group = 0.0; //energy
+  f2group[0] = 0.0; //force in x-direction
+  f2group[1] = 0.0; //force in y-direction
+  f2group[2] = 0.0; //force in z-direction
+
+  // partial and total structure factors for groups A and B
+
+  for (k = 0; k < kcount; k++) {
+
+    // group A
+
+    sfacrl_A[k] = 0.0;
+    sfacim_A[k] = 0.0;
+    sfacrl_A_all[k] = 0.0;
+    sfacim_A_all[k] = 0;
+
+    // group B
+
+    sfacrl_B[k] = 0.0;
+    sfacim_B[k] = 0.0;
+    sfacrl_B_all[k] = 0.0;
+    sfacim_B_all[k] = 0.0;
+  }
+
+  double *q = atom->q;
+  int nlocal = atom->nlocal;
+  int *mask = atom->mask;
+
+  int kx,ky,kz;
+  double cypz,sypz,exprl,expim;
+
+  // partial structure factors for groups A and B on each processor
+
+  for (k = 0; k < kcount; k++) {
+    kx = kxvecs[k];
+    ky = kyvecs[k];
+    kz = kzvecs[k];
+
+    for (i = 0; i < nlocal; i++) {
+
+      if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
+        if (AA_flag) continue;
+
+      if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
+
+        cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i];
+        sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i];
+        exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz;
+        expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz;
+
+        // group A
+
+        if (mask[i] & groupbit_A) {
+          sfacrl_A[k] += q[i]*exprl;
+          sfacim_A[k] += q[i]*expim;
+        }
+
+        // group B
+
+        if (mask[i] & groupbit_B) {
+          sfacrl_B[k] += q[i]*exprl;
+          sfacim_B[k] += q[i]*expim;
+        }
+      }
+    }
+  }
+
+  // total structure factor by summing over procs
+
+  MPI_Allreduce(sfacrl_A,sfacrl_A_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+  MPI_Allreduce(sfacim_A,sfacim_A_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+
+  MPI_Allreduce(sfacrl_B,sfacrl_B_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+  MPI_Allreduce(sfacim_B,sfacim_B_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+
+  const double qscale = force->qqrd2e * scale;
+  double partial_group;
+
+  // total group A <--> group B energy
+  // self and boundary correction terms are in compute_group_group.cpp
+
+  for (k = 0; k < kcount; k++) {
+    partial_group = sfacrl_A_all[k]*sfacrl_B_all[k] +
+      sfacim_A_all[k]*sfacim_B_all[k];
+    e2group += ug[k]*partial_group;
+  }
+
+  e2group *= qscale;
+
+  // total group A <--> group B force
+
+  for (k = 0; k < kcount; k++) {
+    partial_group = sfacim_A_all[k]*sfacrl_B_all[k] -
+      sfacrl_A_all[k]*sfacim_B_all[k];
+    f2group[0] += eg[k][0]*partial_group;
+    f2group[1] += eg[k][1]*partial_group;
+    if (slabflag != 2) f2group[2] += eg[k][2]*partial_group;
+  }
+
+  f2group[0] *= qscale;
+  f2group[1] *= qscale;
+  f2group[2] *= qscale;
+
+  // 2d slab correction
+
+  if (slabflag == 1)
+    slabcorr_groups(groupbit_A, groupbit_B, AA_flag);
+}
+
+/* ----------------------------------------------------------------------
+   Slab-geometry correction term to dampen inter-slab interactions between
+   periodically repeating slabs.  Yields good approximation to 2D Ewald if
+   adequate empty space is left between repeating slabs (J. Chem. Phys.
+   111, 3155).  Slabs defined here to be parallel to the xy plane. Also
+   extended to non-neutral systems (J. Chem. Phys. 131, 094107).
+------------------------------------------------------------------------- */
+
+void Ewald::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag)
+{
+  // compute local contribution to global dipole moment
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double zprd = domain->zprd;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  double qsum_A = 0.0;
+  double qsum_B = 0.0;
+  double dipole_A = 0.0;
+  double dipole_B = 0.0;
+  double dipole_r2_A = 0.0;
+  double dipole_r2_B = 0.0;
+
+  for (int i = 0; i < nlocal; i++) {
+    if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
+      if (AA_flag) continue;
+
+    if (mask[i] & groupbit_A) { 
+      qsum_A += q[i];
+      dipole_A += q[i]*x[i][2];
+      dipole_r2_A += q[i]*x[i][2]*x[i][2];
+    }
+
+    if (mask[i] & groupbit_B) {
+      qsum_B += q[i];
+      dipole_B += q[i]*x[i][2];
+      dipole_r2_B += q[i]*x[i][2]*x[i][2];
+    }
+  }
+
+  // sum local contributions to get total charge and global dipole moment
+  //  for each group
+
+  double tmp;
+  MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  qsum_A = tmp;
+
+  MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  qsum_B = tmp;
+
+  MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  dipole_A = tmp;
+
+  MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  dipole_B = tmp;
+
+  MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  dipole_r2_A = tmp;
+
+  MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  dipole_r2_B = tmp;
+
+  // compute corrections
+
+  const double qscale = force->qqrd2e * scale;
+  const double efact = qscale * MY_2PI/volume;
+
+  e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B +
+    qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0);
+
+  // add on force corrections
+
+  const double ffact = qscale * (-4.0*MY_PI/volume);
+  f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A);
+}
+
+/* ----------------------------------------------------------------------
+   allocate group-group memory that depends on # of K-vectors
+------------------------------------------------------------------------- */
+
+void Ewald::allocate_groups()
+{
+  // group A
+
+  sfacrl_A = new double[kmax3d];
+  sfacim_A = new double[kmax3d];
+  sfacrl_A_all = new double[kmax3d];
+  sfacim_A_all = new double[kmax3d];
+
+  // group B
+
+  sfacrl_B = new double[kmax3d];
+  sfacim_B = new double[kmax3d];
+  sfacrl_B_all = new double[kmax3d];
+  sfacim_B_all = new double[kmax3d];
+}
+
+/* ----------------------------------------------------------------------
+   deallocate group-group memory that depends on # of K-vectors
+------------------------------------------------------------------------- */
+
+void Ewald::deallocate_groups()
+{
+  // group A
+
+  delete [] sfacrl_A;
+  delete [] sfacim_A;
+  delete [] sfacrl_A_all;
+  delete [] sfacim_A_all;
+
+  // group B
+
+  delete [] sfacrl_B;
+  delete [] sfacim_B;
+  delete [] sfacrl_B_all;
+  delete [] sfacim_B_all;
+}
diff --git a/src/KSPACE/ewald_disp.cpp b/src/KSPACE/ewald_disp.cpp
index ba88e40f14..39951b8b0c 100644
--- a/src/KSPACE/ewald_disp.cpp
+++ b/src/KSPACE/ewald_disp.cpp
@@ -1,1475 +1,1475 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Pieter in 't Veld (SNL), Stan Moore (SNL)
-------------------------------------------------------------------------- */
-
-#include "mpi.h"
-#include "string.h"
-#include "stdio.h"
-#include "stdlib.h"
-#include "math.h"
-#include "ewald_disp.h"
-#include "math_vector.h"
-#include "math_const.h"
-#include "math_special.h"
-#include "atom.h"
-#include "comm.h"
-#include "force.h"
-#include "pair.h"
-#include "domain.h"
-#include "memory.h"
-#include "error.h"
-#include "update.h"
-
-using namespace LAMMPS_NS;
-using namespace MathConst;
-using namespace MathSpecial;
-
-#define SMALL 0.00001
-
-enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER};   // same as in pair.h
-
-//#define DEBUG
-
-/* ---------------------------------------------------------------------- */
-
-EwaldDisp::EwaldDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
-{
-  if (narg!=1) error->all(FLERR,"Illegal kspace_style ewald/n command");
-
-  ewaldflag = dispersionflag = dipoleflag = 1;
-  accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
-
-  memset(function, 0, EWALD_NORDER*sizeof(int));
-  kenergy = kvirial = NULL;
-  cek_local = cek_global = NULL;
-  ekr_local = NULL;
-  hvec = NULL;
-  kvec = NULL;
-  B = NULL;
-  first_output = 0;
-  energy_self_peratom = NULL;
-  virial_self_peratom = NULL;
-  nmax = 0;
-  q2 = 0;
-  b2 = 0;
-  M2 = 0;
-}
-
-/* ---------------------------------------------------------------------- */
-
-EwaldDisp::~EwaldDisp()
-{
-  deallocate();
-  deallocate_peratom();
-  delete [] ekr_local;
-  delete [] B;
-}
-
-/* --------------------------------------------------------------------- */
-
-void EwaldDisp::init()
-{
-  nkvec = nkvec_max = nevec = nevec_max = 0;
-  nfunctions = nsums = sums = 0;
-  nbox = -1;
-  bytes = 0.0;
-
-  if (!comm->me) {
-    if (screen) fprintf(screen,"EwaldDisp initialization ...\n");
-    if (logfile) fprintf(logfile,"EwaldDisp initialization ...\n");
-  }
-
-  triclinic_check();
-  if (domain->dimension == 2)
-    error->all(FLERR,"Cannot use EwaldDisp with 2d simulation");
-  if (slabflag == 0 && domain->nonperiodic > 0)
-    error->all(FLERR,"Cannot use nonperiodic boundaries with EwaldDisp");
-  if (slabflag == 1) {
-    if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
-        domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
-      error->all(FLERR,"Incorrect boundaries with slab EwaldDisp");
-  }
-
-  scale = 1.0;
-  mumurd2e = force->qqrd2e;
-  dielectric = force->dielectric;
-
-  int tmp;
-  Pair *pair = force->pair;
-  int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL;
-  double *cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL;
-  if (!(ptr||cutoff))
-    error->all(FLERR,"KSpace style is incompatible with Pair style");
-  int ewald_order = ptr ? *((int *) ptr) : 1<<1;
-  int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC;
-  memset(function, 0, EWALD_NFUNCS*sizeof(int));
-  for (int i=0; i<=EWALD_NORDER; ++i)                        // transcribe order
-    if (ewald_order&(1<<i)) {                                // from pair_style
-      int n[] = EWALD_NSUMS, k = 0;
-      char str[128];
-      switch (i) {
-        case 1:
-          k = 0; break;
-        case 3:
-          k = 3; break;
-        case 6:
-          if (ewald_mix==GEOMETRIC) { k = 1; break; }
-          else if (ewald_mix==ARITHMETIC) { k = 2; break; }
-          error->all(FLERR,
-                     "Unsupported mixing rule in kspace_style ewald/disp");
-        default:
-          error->all(FLERR,"Unsupported order in kspace_style ewald/disp");
-      }
-      nfunctions += function[k] = 1;
-      nsums += n[k];
-    }
-
-  if (!gewaldflag) g_ewald = 0.0;
-  pair->init();  // so B is defined
-  init_coeffs();
-  init_coeff_sums();
-
-  double qsum, qsqsum, bsbsum;
-  qsum = qsqsum = bsbsum = 0.0;
-  if (function[0]) {
-    qsum = sum[0].x;
-    qsqsum = sum[0].x2;
-  }
-
-  // turn off coulombic if no charge
-
-  if (function[0] && qsqsum == 0.0) {
-    function[0] = 0;
-    nfunctions -= 1;
-    nsums -= 1;
-  }
-
-  if (function[1]) bsbsum = sum[1].x2;
-  if (function[2]) bsbsum = sum[2].x2;
-
-  if (function[3]) M2 = sum[9].x2;
-
-  if (function[3] && strcmp(update->unit_style,"electron") == 0)
-    error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
-
-  if (qsqsum == 0.0 && bsbsum == 0.0 && M2 == 0.0)
-      error->all(FLERR,"Cannot use Ewald/disp solver "
-                 "on system with no charge, dipole, or LJ particles");
-  if (fabs(qsum) > SMALL && comm->me == 0) {
-      char str[128];
-      sprintf(str,"System is not charge neutral, net charge = %g",qsum);
-      error->warning(FLERR,str);
-  }
-
-  if (!function[1] && !function[2])
-    dispersionflag = 0;
-
-  if (!function[3])
-    dipoleflag = 0;
-
-  pair_check();
-
-  // set accuracy (force units) from accuracy_relative or accuracy_absolute
-
-  if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
-  else accuracy = accuracy_relative * two_charge_force;
-
-  // setup K-space resolution
-
-  q2 = qsqsum * force->qqrd2e;
-  M2 *= mumurd2e;
-  b2 = bsbsum; //Are these units right?
-  bigint natoms = atom->natoms;
-
-  if (!gewaldflag) {
-    if (function[0]) {
-      g_ewald = accuracy*sqrt(natoms*(*cutoff)*shape_det(domain->h)) / (2.0*q2);
-      if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/(*cutoff);
-      else g_ewald = sqrt(-log(g_ewald)) / (*cutoff);
-    }
-    else if (function[1] || function[2]) {
-      //Try Newton Solver
-      //Use old method to get guess
-      g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff;
-    
-      double g_ewald_new = 
-        NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),b2);
-      if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
-      else error->warning(FLERR,"Ewald/disp Newton solver failed, "
-                          "using old method to estimate g_ewald");
-    } else if (function[3]) {
-      //Try Newton Solver
-      //Use old method to get guess
-      g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff;
-      double g_ewald_new = 
-        NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),M2);
-      if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
-      else error->warning(FLERR,"Ewald/disp Newton solver failed, "
-                          "using old method to estimate g_ewald");
-    }
-  }
-
-  if (!comm->me) {
-      if (screen) fprintf(screen, "  G vector = %g\n", g_ewald);
-      if (logfile) fprintf(logfile, "  G vector = %g\n", g_ewald);
-  }
-
-  g_ewald_6 = g_ewald;
-  deallocate_peratom();
-  peratom_allocate_flag = 0;
-}
-
-/* ----------------------------------------------------------------------
-   adjust EwaldDisp coeffs, called initially and whenever volume has changed
-------------------------------------------------------------------------- */
-
-void EwaldDisp::setup()
-{
-  volume = shape_det(domain->h)*slab_volfactor;
-  memcpy(unit, domain->h_inv, sizeof(shape));
-  shape_scalar_mult(unit, 2.0*MY_PI);
-  unit[2] /= slab_volfactor;
-
-  // int nbox_old = nbox, nkvec_old = nkvec;
-
-  if (accuracy >= 1) {
-    nbox = 0;
-    error->all(FLERR,"KSpace accuracy too low");
-  }
-
-  bigint natoms = atom->natoms;
-  double err;
-  int kxmax = 1;
-  int kymax = 1;
-  int kzmax = 1;
-  err = rms(kxmax,domain->h[0],natoms,q2,b2,M2);
-  while (err > accuracy) {
-    kxmax++;
-    err = rms(kxmax,domain->h[0],natoms,q2,b2,M2);
-  }
-  err = rms(kymax,domain->h[1],natoms,q2,b2,M2);
-  while (err > accuracy) {
-    kymax++;
-    err = rms(kymax,domain->h[1],natoms,q2,b2,M2);
-  }
-  err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2);
-  while (err > accuracy) {
-    kzmax++;
-    err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2);
-  }
-  nbox = MAX(kxmax,kymax);
-  nbox = MAX(nbox,kzmax);
-  double gsqxmx = unit[0]*unit[0]*kxmax*kxmax;
-  double gsqymx = unit[1]*unit[1]*kymax*kymax;
-  double gsqzmx = unit[2]*unit[2]*kzmax*kzmax;
-  gsqmx = MAX(gsqxmx,gsqymx);
-  gsqmx = MAX(gsqmx,gsqzmx);
-  gsqmx *= 1.00001;
-
-  reallocate();
-  coefficients();
-  init_coeffs();
-  init_coeff_sums();
-  init_self();
-
-  if (!(first_output||comm->me)) {
-    first_output = 1;
-    if (screen) fprintf(screen,
-               "  vectors: nbox = %d, nkvec = %d\n", nbox, nkvec);
-    if (logfile) fprintf(logfile,
-        "  vectors: nbox = %d, nkvec = %d\n", nbox, nkvec);
-  }
-}
-
-/* ----------------------------------------------------------------------
-   compute RMS accuracy for a dimension
-------------------------------------------------------------------------- */
-
-double EwaldDisp::rms(int km, double prd, bigint natoms, double q2, double b2, double M2)
-{
-  double value = 0.0;
-
-  // Coulombic
-
-  double g2 = g_ewald*g_ewald;
-
-  value += 2.0*q2*g_ewald/prd *
-    sqrt(1.0/(MY_PI*km*natoms)) *
-    exp(-MY_PI*MY_PI*km*km/(g2*prd*prd));
-
-  // Lennard-Jones
-
-  double g7 = g2*g2*g2*g_ewald;
-
-  value += 4.0*b2*g7/3.0 *
-    sqrt(1.0/(MY_PI*natoms)) *
-    (exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)) *
-    (MY_PI*km/(g_ewald*prd) + 1));
-
-  // dipole
-
-  value += 8.0*MY_PI*M2/volume*g_ewald *
-    sqrt(2.0*MY_PI*km*km*km/(15.0*natoms)) *
-    exp(-pow(MY_PI*km/(g_ewald*prd),2.0));
-
-  return value;
-}
-
-void EwaldDisp::reallocate()
-{
-  int ix, iy, iz;
-  int nkvec_max = nkvec;
-  vector h;
-
-  nkvec = 0;
-  int *kflag = new int[(nbox+1)*(2*nbox+1)*(2*nbox+1)];
-  int *flag = kflag;
-
-  for (ix=0; ix<=nbox; ++ix)
-    for (iy=-nbox; iy<=nbox; ++iy)
-      for (iz=-nbox; iz<=nbox; ++iz)
-        if (!(ix||iy||iz)) *(flag++) = 0;
-        else if ((!ix)&&(iy<0)) *(flag++) = 0;
-        else if ((!(ix||iy))&&(iz<0)) *(flag++) = 0;        // use symmetry
-        else {
-          h[0] = unit[0]*ix;
-          h[1] = unit[5]*ix+unit[1]*iy;
-          h[2] = unit[4]*ix+unit[3]*iy+unit[2]*iz;
-          if ((*(flag++) = h[0]*h[0]+h[1]*h[1]+h[2]*h[2]<=gsqmx)) ++nkvec;
-        }
-
-  if (nkvec>nkvec_max) {
-    deallocate();                                        // free memory
-    hvec = new hvector[nkvec];                                // hvec
-    bytes += (nkvec-nkvec_max)*sizeof(hvector);
-    kvec = new kvector[nkvec];                                // kvec
-    bytes += (nkvec-nkvec_max)*sizeof(kvector);
-    kenergy = new double[nkvec*nfunctions];                // kenergy
-    bytes += (nkvec-nkvec_max)*nfunctions*sizeof(double);
-    kvirial = new double[6*nkvec*nfunctions];                // kvirial
-    bytes += 6*(nkvec-nkvec_max)*nfunctions*sizeof(double);
-    cek_local = new complex[nkvec*nsums];                // cek_local
-    bytes += (nkvec-nkvec_max)*nsums*sizeof(complex);
-    cek_global = new complex[nkvec*nsums];                // cek_global
-    bytes += (nkvec-nkvec_max)*nsums*sizeof(complex);
-    nkvec_max = nkvec;
-  }
-
-  flag = kflag;                                           // create index and
-  kvector *k = kvec;                                      // wave vectors
-  hvector *hi = hvec;
-  for (ix=0; ix<=nbox; ++ix)
-    for (iy=-nbox; iy<=nbox; ++iy)
-      for (iz=-nbox; iz<=nbox; ++iz)
-        if (*(flag++)) {
-          hi->x = unit[0]*ix;
-          hi->y = unit[5]*ix+unit[1]*iy;
-          (hi++)->z = unit[4]*ix+unit[3]*iy+unit[2]*iz;
-          k->x = ix+nbox; k->y = iy+nbox; (k++)->z = iz+nbox; }
-
-  delete [] kflag;
-}
-
-
-void EwaldDisp::reallocate_atoms()
-{
-  if (eflag_atom || vflag_atom)
-    if (atom->nlocal > nmax) {
-      deallocate_peratom();
-      allocate_peratom();
-      nmax = atom->nmax;
-    }
-
-  if ((nevec = atom->nmax*(2*nbox+1))<=nevec_max) return;
-  delete [] ekr_local;
-  ekr_local = new cvector[nevec];
-  bytes += (nevec-nevec_max)*sizeof(cvector);
-  nevec_max = nevec;
-}
-
-
-void EwaldDisp::allocate_peratom()
-{
-  memory->create(energy_self_peratom,
-      atom->nmax,EWALD_NFUNCS,"ewald/n:energy_self_peratom");
-  memory->create(virial_self_peratom,
-      atom->nmax,EWALD_NFUNCS,"ewald/n:virial_self_peratom");
-}
-
-
-void EwaldDisp::deallocate_peratom()                        // free memory
-{
-  memory->destroy(energy_self_peratom);
-  memory->destroy(virial_self_peratom);
-}
-
-
-void EwaldDisp::deallocate()                                // free memory
-{
-  delete [] hvec;                hvec = NULL;
-  delete [] kvec;                kvec = NULL;
-  delete [] kenergy;                kenergy = NULL;
-  delete [] kvirial;                kvirial = NULL;
-  delete [] cek_local;                cek_local = NULL;
-  delete [] cek_global;                cek_global = NULL;
-}
-
-
-void EwaldDisp::coefficients()
-{
-  vector h;
-  hvector *hi = hvec, *nh;
-  double eta2 = 0.25/(g_ewald*g_ewald);
-  double b1, b2, expb2, h1, h2, c1, c2;
-  double *ke = kenergy, *kv = kvirial;
-  int func0 = function[0], func12 = function[1]||function[2],
-      func3 = function[3];
-
-  for (nh = (hi = hvec)+nkvec; hi<nh; ++hi) {                // wave vectors
-    memcpy(h, hi, sizeof(vector));
-    expb2 = exp(-(b2 = (h2 = vec_dot(h, h))*eta2));
-    if (func0) {                                        // qi*qj/r coeffs
-      *(ke++) = c1 = expb2/h2;
-      *(kv++) = c1-(c2 = 2.0*c1*(1.0+b2)/h2)*h[0]*h[0];
-      *(kv++) = c1-c2*h[1]*h[1];                        // lammps convention
-      *(kv++) = c1-c2*h[2]*h[2];                        // instead of voigt
-      *(kv++) = -c2*h[1]*h[0];
-      *(kv++) = -c2*h[2]*h[0];
-      *(kv++) = -c2*h[2]*h[1];
-    }
-    if (func12) {                                        // -Bij/r^6 coeffs
-      b1 = sqrt(b2);                                        // minus sign folded
-      h1 = sqrt(h2);                                        // into constants
-      *(ke++) = c1 = -h1*h2*((c2=MY_PIS*erfc(b1))+(0.5/b2-1.0)*expb2/b1);
-      *(kv++) = c1-(c2 = 3.0*h1*(c2-expb2/b1))*h[0]*h[0];
-      *(kv++) = c1-c2*h[1]*h[1];                        // lammps convention
-      *(kv++) = c1-c2*h[2]*h[2];                        // instead of voigt
-      *(kv++) = -c2*h[1]*h[0];
-      *(kv++) = -c2*h[2]*h[0];
-      *(kv++) = -c2*h[2]*h[1];
-    }
-    if (func3) {                                        // dipole coeffs
-      *(ke++) = c1 = expb2/h2;
-      *(kv++) = c1-(c2 = 2.0*c1*(1.0+b2)/h2)*h[0]*h[0];
-      *(kv++) = c1-c2*h[1]*h[1];                        // lammps convention
-      *(kv++) = c1-c2*h[2]*h[2];                        // instead of voigt
-      *(kv++) = -c2*h[1]*h[0];
-      *(kv++) = -c2*h[2]*h[0];
-      *(kv++) = -c2*h[2]*h[1];
-    }
-  }
-}
-
-void EwaldDisp::init_coeffs()
-{
-  int tmp;
-  int n = atom->ntypes;
-
-  if (function[1]) {                                        // geometric 1/r^6
-    double **b = (double **) force->pair->extract("B",tmp);
-    delete [] B;
-    B = new double[n+1];
-    bytes += (n+1)*sizeof(double);
-    for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i]));
-  }
-  if (function[2]) {                                        // arithmetic 1/r^6
-    double **epsilon = (double **) force->pair->extract("epsilon",tmp);
-    double **sigma = (double **) force->pair->extract("sigma",tmp);
-    double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7];
-    double c[7] = {
-      1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0};
-
-    if (!(epsilon&&sigma))
-      error->all(
-          FLERR,"Epsilon or sigma reference not set by pair style in ewald/n");
-    for (int i=0; i<=n; ++i) {
-      eps_i = sqrt(epsilon[i][i]);
-      sigma_i = sigma[i][i];
-      sigma_n = 1.0;
-      for (int j=0; j<7; ++j) {
-        *(bi++) = sigma_n*eps_i*c[j]; sigma_n *= sigma_i;
-      }
-    }
-  }
-}
-
-void EwaldDisp::init_coeff_sums()
-{
-  if (sums) return;                            // calculated only once
-  sums = 1;
-
-  Sum sum_local[EWALD_MAX_NSUMS];
-
-  memset(sum_local, 0, EWALD_MAX_NSUMS*sizeof(Sum));
-  if (function[0]) {                                        // 1/r
-    double *q = atom->q, *qn = q+atom->nlocal;
-    for (double *i=q; i<qn; ++i) {
-      sum_local[0].x += i[0]; sum_local[0].x2 += i[0]*i[0]; }
-  }
-  if (function[1]) {                                        // geometric 1/r^6
-    int *type = atom->type, *ntype = type+atom->nlocal;
-    for (int *i=type; i<ntype; ++i) {
-      sum_local[1].x += B[i[0]]; sum_local[1].x2 += B[i[0]]*B[i[0]]; }
-  }
-  if (function[2]) {                                        // arithmetic 1/r^6
-    double *bi;
-    int *type = atom->type, *ntype = type+atom->nlocal;
-    for (int *i=type; i<ntype; ++i) {
-      bi = B+7*i[0];
-      sum_local[2].x2 += bi[0]*bi[6];
-      for (int k=2; k<9; ++k) sum_local[k].x += *(bi++);
-    }
-  }
-  if (function[3]&&atom->mu) {                                // dipole
-    double *mu = atom->mu[0], *nmu = mu+4*atom->nlocal;
-    for (double *i = mu; i < nmu; i += 4)
-      sum_local[9].x2 += i[3]*i[3];
-  }
-  MPI_Allreduce(sum_local, sum, 2*EWALD_MAX_NSUMS, MPI_DOUBLE, MPI_SUM, world);
-}
-
-
-void EwaldDisp::init_self()
-{
-  double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2;
-  const double qscale = force->qqrd2e * scale;
-
-  memset(energy_self, 0, EWALD_NFUNCS*sizeof(double));        // self energy
-  memset(virial_self, 0, EWALD_NFUNCS*sizeof(double));
-
-  if (function[0]) {                                        // 1/r
-    virial_self[0] = -0.5*MY_PI*qscale/(g2*volume)*sum[0].x*sum[0].x;
-    energy_self[0] = sum[0].x2*qscale*g1/MY_PIS-virial_self[0];
-  }
-  if (function[1]) {                                        // geometric 1/r^6
-    virial_self[1] = MY_PI*MY_PIS*g3/(6.0*volume)*sum[1].x*sum[1].x;
-    energy_self[1] = -sum[1].x2*g3*g3/12.0+virial_self[1];
-  }
-  if (function[2]) {                                        // arithmetic 1/r^6
-    virial_self[2] = MY_PI*MY_PIS*g3/(48.0*volume)*(sum[2].x*sum[8].x+
-        sum[3].x*sum[7].x+sum[4].x*sum[6].x+0.5*sum[5].x*sum[5].x);
-    energy_self[2] = -sum[2].x2*g3*g3/3.0+virial_self[2];
-  }
-  if (function[3]) {                                        // dipole
-    virial_self[3] = 0;                                        // in surface
-    energy_self[3] = sum[9].x2*mumurd2e*2.0*g3/3.0/MY_PIS-virial_self[3];
-  }
-}
-
-
-void EwaldDisp::init_self_peratom()
-{
-  if (!(vflag_atom || eflag_atom)) return;
-
-  double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2;
-  const double qscale = force->qqrd2e * scale;
-  double *energy = energy_self_peratom[0];
-  double *virial = virial_self_peratom[0];
-  int nlocal = atom->nlocal;
-
-  memset(energy, 0, EWALD_NFUNCS*nlocal*sizeof(double));
-  memset(virial, 0, EWALD_NFUNCS*nlocal*sizeof(double));
-
-  if (function[0]) {                                        // 1/r
-    double *ei = energy;
-    double *vi = virial;
-    double ce = qscale*g1/MY_PIS;
-    double cv = -0.5*MY_PI*qscale/(g2*volume);
-    double *qi = atom->q, *qn = qi + nlocal;
-    for (; qi < qn; qi++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
-      double q = *qi;
-      *vi = cv*q*sum[0].x;
-      *ei = ce*q*q-vi[0];
-    }
-  }
-  if (function[1]) {                                        // geometric 1/r^6
-    double *ei = energy+1;
-    double *vi = virial+1;
-    double ce = -g3*g3/12.0;
-    double cv = MY_PI*MY_PIS*g3/(6.0*volume);
-    int *typei = atom->type, *typen = typei + atom->nlocal;
-    for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
-      double b = B[*typei];
-      *vi = cv*b*sum[1].x;
-      *ei = ce*b*b+vi[0];
-    }
-  }
-  if (function[2]) {                                        // arithmetic 1/r^6
-    double *bi;
-    double *ei = energy+2;
-    double *vi = virial+2;
-    double ce = -g3*g3/3.0;
-    double cv = 0.5*MY_PI*MY_PIS*g3/(48.0*volume);
-    int *typei = atom->type, *typen = typei + atom->nlocal;
-    for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
-      bi = B+7*typei[0]+7;
-      for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(--bi)[0];
-
-      /* PJV 20120225:
-         should this be this instead?  above implies an inverse dependence
-         seems to be the above way in original;  i recall having tested
-         arithmetic mixing in the conception phase, but an extra test would
-         be prudent (pattern repeats in multiple functions below)
-
-      bi = B+7*typei[0];
-      for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(bi++)[0];
-
-      */
-
-      *ei = ce*bi[0]*bi[6]+vi[0];
-    }
-  }
-  if (function[3]&&atom->mu) {                                // dipole
-    double *ei = energy+3;
-    double *vi = virial+3;
-    double *imu = atom->mu[0], *nmu = imu+4*atom->nlocal;
-    double ce = mumurd2e*2.0*g3/3.0/MY_PIS;
-    for (; imu < nmu; imu += 4, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
-      *vi = 0;                                                // in surface
-      *ei = ce*imu[3]*imu[3]-vi[0];
-    }
-  }
-}
-
-
-/* ----------------------------------------------------------------------
-   compute the EwaldDisp long-range force, energy, virial
-------------------------------------------------------------------------- */
-
-void EwaldDisp::compute(int eflag, int vflag)
-{
-  if (!nbox) return;
-
-  // set energy/virial flags
-  // invoke allocate_peratom() if needed for first time
-
-  if (eflag || vflag) ev_setup(eflag,vflag);
-  else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0;
-
-  if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) {
-      allocate_peratom();
-      peratom_allocate_flag = 1;
-      nmax = atom->nmax;
-  }
-
-  reallocate_atoms();
-  init_self_peratom();
-  compute_ek();
-  compute_force();
-  //compute_surface(); // assume conducting metal (tinfoil) boundary conditions
-  compute_energy();
-  compute_energy_peratom();
-  compute_virial();
-  compute_virial_dipole();
-  compute_virial_peratom();
-}
-
-
-void EwaldDisp::compute_ek()
-{
-  cvector *ekr = ekr_local;
-  int lbytes = (2*nbox+1)*sizeof(cvector);
-  hvector *h = NULL;
-  kvector *k, *nk = kvec+nkvec;
-  cvector *z = new cvector[2*nbox+1];
-  cvector z1, *zx, *zy, *zz, *zn = z+2*nbox;
-  complex *cek, zxyz, zxy = COMPLEX_NULL, cx = COMPLEX_NULL;
-  vector mui;
-  double *x = atom->x[0], *xn = x+3*atom->nlocal, *q = atom->q, qi = 0.0;
-  double bi = 0.0, ci[7];
-  double *mu = atom->mu ? atom->mu[0] : NULL;
-  int i, kx, ky, n = nkvec*nsums, *type = atom->type, tri = domain->triclinic;
-  int func[EWALD_NFUNCS];
-
-  memcpy(func, function, EWALD_NFUNCS*sizeof(int));
-  memset(cek_local, 0, n*sizeof(complex));                // reset sums
-  while (x<xn) {
-    zx = (zy = (zz = z+nbox)+1)-2;
-    C_SET(zz->x, 1, 0); C_SET(zz->y, 1, 0); C_SET(zz->z, 1, 0);        // z[0]
-    if (tri) {                                                // triclinic z[1]
-      C_ANGLE(z1.x, unit[0]*x[0]+unit[5]*x[1]+unit[4]*x[2]);
-      C_ANGLE(z1.y, unit[1]*x[1]+unit[3]*x[2]);
-      C_ANGLE(z1.z, x[2]*unit[2]); x += 3;
-    }
-    else {                                                // orthogonal z[1]
-      C_ANGLE(z1.x, *(x++)*unit[0]);
-      C_ANGLE(z1.y, *(x++)*unit[1]);
-      C_ANGLE(z1.z, *(x++)*unit[2]);
-    }
-    for (; zz<zn; --zx, ++zy, ++zz) {                  // set up z[k]=e^(ik.r)
-      C_RMULT(zy->x, zz->x, z1.x);                        // 3D k-vector
-      C_RMULT(zy->y, zz->y, z1.y); C_CONJ(zx->y, zy->y);
-      C_RMULT(zy->z, zz->z, z1.z); C_CONJ(zx->z, zy->z);
-    }
-    kx = ky = -1;
-    cek = cek_local;
-    if (func[0]) qi = *(q++);
-    if (func[1]) bi = B[*type];
-    if (func[2]) memcpy(ci, B+7*type[0], 7*sizeof(double));
-    if (func[3]) {
-      memcpy(mui, mu, sizeof(vector));
-      mu += 4;
-      h = hvec;
-    }
-    for (k=kvec; k<nk; ++k) {                                // compute rho(k)
-      if (ky!=k->y) {                                   // based on order in
-        if (kx!=k->x) cx = z[kx = k->x].x;                // reallocate
-        C_RMULT(zxy, z[ky = k->y].y, cx);
-      }
-      C_RMULT(zxyz, z[k->z].z, zxy);
-      if (func[0]) {
-               cek->re += zxyz.re*qi; (cek++)->im += zxyz.im*qi;
-      }
-      if (func[1]) {
-               cek->re += zxyz.re*bi; (cek++)->im += zxyz.im*bi;
-      }
-      if (func[2]) for (i=0; i<7; ++i) {
-        cek->re += zxyz.re*ci[i]; (cek++)->im += zxyz.im*ci[i];
-      }
-      if (func[3]) {
-        register double muk = mui[0]*h->x+mui[1]*h->y+mui[2]*h->z; ++h;
-        cek->re += zxyz.re*muk; (cek++)->im += zxyz.im*muk;
-      }
-    }
-    ekr = (cvector *) ((char *) memcpy(ekr, z, lbytes)+lbytes);
-    ++type;
-  }
-  MPI_Allreduce(cek_local, cek_global, 2*n, MPI_DOUBLE, MPI_SUM, world);
-
-  delete [] z;
-}
-
-
-void EwaldDisp::compute_force()
-{
-  kvector *k;
-  hvector *h, *nh;
-  cvector *z = ekr_local;
-  vector sum[EWALD_MAX_NSUMS], mui = COMPLEX_NULL;
-  complex *cek, zc, zx = COMPLEX_NULL, zxy = COMPLEX_NULL;
-  complex *cek_coul;
-  double *f = atom->f[0], *fn = f+3*atom->nlocal, *q = atom->q, *t = NULL;
-  double *mu = atom->mu ? atom->mu[0] : NULL;
-  const double qscale = force->qqrd2e * scale;
-  double *ke, c[EWALD_NFUNCS] = {
-    8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume),
-    2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume};
-  double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3];
-  int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
-  int func[EWALD_NFUNCS];
-
-  if (atom->torque) t = atom->torque[0];
-  memcpy(func, function, EWALD_NFUNCS*sizeof(int));
-  memset(sum, 0, EWALD_MAX_NSUMS*sizeof(vector));        // fj = -dE/dr =
-  for (; f<fn; f+=3) {                                    //      -i*qj*fac*
-    k = kvec;                                         //       Sum[conj(d)-d]
-    kx = ky = -1;                                        // d = k*conj(ekj)*ek
-    ke = kenergy;
-    cek = cek_global;
-    memset(sum, 0, EWALD_MAX_NSUMS*sizeof(vector));
-    if (func[3]) {
-      register double di = c[3];
-      mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0];
-      mu++;
-    }
-    for (nh = (h = hvec)+nkvec; h<nh; ++h, ++k) {
-      if (ky!=k->y) {                                   // based on order in
-        if (kx!=k->x) zx = z[kx = k->x].x;                 // reallocate
-        C_RMULT(zxy, z[ky = k->y].y, zx);
-      }
-      C_CRMULT(zc, z[k->z].z, zxy);
-      if (func[0]) {                                        // 1/r
-        register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re);
-        if (func[3]) cek_coul = cek;
-        ++cek;
-        sum[0][0] += h->x*im; sum[0][1] += h->y*im; sum[0][2] += h->z*im;
-      }
-      if (func[1]) {                                        // geometric 1/r^6
-        register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); ++cek;
-        sum[1][0] += h->x*im; sum[1][1] += h->y*im; sum[1][2] += h->z*im;
-      }
-      if (func[2]) {                                        // arithmetic 1/r^6
-        register double im, c = *(ke++);
-        for (i=2; i<9; ++i) {
-          im = c*(zc.im*cek->re+cek->im*zc.re); ++cek;
-          sum[i][0] += h->x*im; sum[i][1] += h->y*im; sum[i][2] += h->z*im;
-        }
-      }
-      if (func[3]) {                                        // dipole
-        register double im = *(ke)*(zc.im*cek->re+
-            cek->im*zc.re)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z);
-        register double im2 = *(ke)*(zc.re*cek->re-
-            cek->im*zc.im);
-        sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im;
-        t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2;        // torque
-        t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2;
-        t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2;
-        if (func[0]) {                                      // charge-dipole
-          register double qi = *(q)*c[0];
-          im = - *(ke)*(zc.re*cek_coul->re -
-              cek_coul->im*zc.im)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z);
-          im += *(ke)*(zc.re*cek->re - cek->im*zc.im)*qi;
-          sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im;
-
-          im2 =  *(ke)*(zc.re*cek_coul->im + cek_coul->re*zc.im);
-          im2 += -*(ke)*(zc.re*cek->im - cek->im*zc.re);
-          t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2;        // torque
-          t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2;
-          t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2;
-        }
-        ++cek;
-        ke++;
-      }
-    }
-    if (func[0]) {                                        // 1/r
-      register double qi = *(q++)*c[0];
-      f[0] -= sum[0][0]*qi; f[1] -= sum[0][1]*qi; f[2] -= sum[0][2]*qi;
-    }
-    if (func[1]) {                                        // geometric 1/r^6
-      register double bi = B[*type]*c[1];
-      f[0] -= sum[1][0]*bi; f[1] -= sum[1][1]*bi; f[2] -= sum[1][2]*bi;
-    }
-    if (func[2]) {                                        // arithmetic 1/r^6
-      register double *bi = B+7*type[0]+7;
-      for (i=2; i<9; ++i) {
-        register double c2 = (--bi)[0]*c[2];
-        f[0] -= sum[i][0]*c2; f[1] -= sum[i][1]*c2; f[2] -= sum[i][2]*c2;
-      }
-    }
-    if (func[3]) {                                        // dipole
-      f[0] -= sum[9][0]; f[1] -= sum[9][1]; f[2] -= sum[9][2];
-    }
-    z = (cvector *) ((char *) z+lbytes);
-    ++type;
-    t += 3;
-  }
-}
-
-
-void EwaldDisp::compute_surface()
-{
-  // assume conducting metal (tinfoil) boundary conditions, so this function is
-  // not called because dielectric at the boundary --> infinity, which makes all
-  // the terms here zero.
-
-  if (!function[3]) return;
-  if (!atom->mu) return;
-
-  vector sum_local = VECTOR_NULL, sum_total;
-  memset(sum_local, 0, sizeof(vector));
-  double *i, *n, *mu = atom->mu[0];
-
-  for (n = (i = mu) + 4*atom->nlocal; i < n; ++i) {
-    sum_local[0] += (i++)[0];
-    sum_local[1] += (i++)[0];
-    sum_local[2] += (i++)[0];
-  }
-  MPI_Allreduce(sum_local, sum_total, 3, MPI_DOUBLE, MPI_SUM, world);
-
-  virial_self[3] =
-    mumurd2e*(2.0*MY_PI*vec_dot(sum_total,sum_total)/(2.0*dielectric+1)/volume);
-  energy_self[3] -= virial_self[3];
-
-  if (!(vflag_atom || eflag_atom)) return;
-
-  double *ei = energy_self_peratom[0]+3;
-  double *vi = virial_self_peratom[0]+3;
-  double cv = 2.0*mumurd2e*MY_PI/(2.0*dielectric+1)/volume;
-
-  for (i = mu; i < n; i += 4, ei += EWALD_NFUNCS, vi += EWALD_NFUNCS) {
-    *vi = cv*(i[0]*sum_total[0]+i[1]*sum_total[1]+i[2]*sum_total[2]);
-    *ei -= *vi;
-  }
-}
-
-
-void EwaldDisp::compute_energy()
-{
-  energy = 0.0;
-  if (!eflag_global) return;
-
-  complex *cek = cek_global;
-  complex *cek_coul;
-  double *ke = kenergy;
-  const double qscale = force->qqrd2e * scale;
-  double c[EWALD_NFUNCS] = {
-    4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
-    2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
-  double sum[EWALD_NFUNCS];
-  int func[EWALD_NFUNCS];
-
-  memcpy(func, function, EWALD_NFUNCS*sizeof(int));
-  memset(sum, 0, EWALD_NFUNCS*sizeof(double));                // reset sums
-  for (int k=0; k<nkvec; ++k) {                       // sum over k vectors
-    if (func[0]) {                                        // 1/r
-      sum[0] += *(ke++)*(cek->re*cek->re+cek->im*cek->im);
-      if (func[3]) cek_coul = cek;
-      ++cek; 
-    }
-    if (func[1]) {                                        // geometric 1/r^6
-      sum[1] += *(ke++)*(cek->re*cek->re+cek->im*cek->im); ++cek; }
-    if (func[2]) {                                        // arithmetic 1/r^6
-      register double r =
-            (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+
-            (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+
-            (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+
-        0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7;
-      sum[2] += *(ke++)*r;
-    }
-    if (func[3]) {                                        // dipole
-      sum[3] += *(ke)*(cek->re*cek->re+cek->im*cek->im);
-      if (func[0]) {                                      // charge-dipole
-        sum[3] += *(ke)*2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re);
-      }
-      ke++;
-      ++cek;
-    }
-  }
-  for (int k=0; k<EWALD_NFUNCS; ++k) energy += c[k]*sum[k]-energy_self[k];
-  if (slabflag) compute_slabcorr();
-}
-
-
-void EwaldDisp::compute_energy_peratom()
-{
-  if (!eflag_atom) return;
-
-  kvector *k;
-  hvector *h, *nh;
-  cvector *z = ekr_local;
-  vector  mui = VECTOR_NULL;
-  double sum[EWALD_MAX_NSUMS];
-  complex *cek, zc = COMPLEX_NULL, zx = COMPLEX_NULL, zxy = COMPLEX_NULL;
-  complex *cek_coul;
-  double *q = atom->q;
-  double *eatomj = eatom;
-  double *mu = atom->mu ? atom->mu[0] : NULL;
-  const double qscale = force->qqrd2e * scale;
-  double *ke = kenergy;
-  double c[EWALD_NFUNCS] = {
-      4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
-      2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
-  int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
-  int func[EWALD_NFUNCS];
-
-  memcpy(func, function, EWALD_NFUNCS*sizeof(int));
-  for (int j = 0; j < atom->nlocal; j++, ++eatomj) {
-    k = kvec;
-    kx = ky = -1;
-    ke = kenergy;
-    cek = cek_global;
-    memset(sum, 0, EWALD_MAX_NSUMS*sizeof(double));
-    if (func[3]) {
-      register double di = c[3];
-      mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0];
-      mu++;
-    }
-    for (nh = (h = hvec)+nkvec; h<nh; ++h, ++k) {
-      if (ky!=k->y) {                              // based on order in
-        if (kx!=k->x) zx = z[kx = k->x].x;                 // reallocate
-        C_RMULT(zxy, z[ky = k->y].y, zx);
-      }
-      C_CRMULT(zc, z[k->z].z, zxy);
-      if (func[0]) {                                        // 1/r
-        sum[0] += *(ke++)*(cek->re*zc.re - cek->im*zc.im);
-        if (func[3]) cek_coul = cek;
-        ++cek;
-      }
-      if (func[1]) {                                        // geometric 1/r^6
-        sum[1] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); ++cek; }
-      if (func[2]) {                                        // arithmetic 1/r^6
-        register double im, c = *(ke++);
-        for (i=2; i<9; ++i) {
-          im = c*(cek->re*zc.re - cek->im*zc.im); ++cek;
-          sum[i] += im;
-        }
-      }
-      if (func[3]) {                                        // dipole
-        double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z);
-        sum[9] += *(ke)*(cek->re*zc.re - cek->im*zc.im)*muk;
-        if (func[0]) {                                      // charge-dipole
-          register double qj = *(q)*c[0];
-          sum[9] += *(ke)*(cek_coul->im*zc.re + cek_coul->re*zc.im)*muk;
-          sum[9] -= *(ke)*(cek->re*zc.im + cek->im*zc.re)*qj;
-        }
-        ++cek;
-        ke++;
-      }
-    }
-
-    if (func[0]) {                                        // 1/r
-      register double qj = *(q++)*c[0];
-      *eatomj += sum[0]*qj - energy_self_peratom[j][0];
-    }
-    if (func[1]) {                                        // geometric 1/r^6
-      register double bj = B[*type]*c[1];
-      *eatomj += sum[1]*bj - energy_self_peratom[j][1];
-    }
-    if (func[2]) {                                        // arithmetic 1/r^6
-      register double *bj = B+7*type[0]+7;
-      for (i=2; i<9; ++i) {
-        register double c2 = (--bj)[0]*c[2];
-        *eatomj += 0.5*sum[i]*c2;
-      }
-      *eatomj -= energy_self_peratom[j][2];
-    }
-    if (func[3]) {                                        // dipole
-      *eatomj += sum[9] - energy_self_peratom[j][3];
-    }
-    z = (cvector *) ((char *) z+lbytes);
-    ++type;
-  }
-}
-
-
-#define swap(a, b) { register double t = a; a= b; b = t; }
-
-void EwaldDisp::compute_virial()
-{
-  memset(virial, 0, sizeof(shape));
-  if (!vflag_global) return;
-
-  complex *cek = cek_global;
-  complex *cek_coul;
-  double *kv = kvirial;
-  const double qscale = force->qqrd2e * scale;
-  double c[EWALD_NFUNCS] = {
-    4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
-    2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
-  shape sum[EWALD_NFUNCS];
-  int func[EWALD_NFUNCS];
-
-  memcpy(func, function, EWALD_NFUNCS*sizeof(int));
-  memset(sum, 0, EWALD_NFUNCS*sizeof(shape));
-  for (int k=0; k<nkvec; ++k) {                      // sum over k vectors
-    if (func[0]) {                                         // 1/r
-      register double r = cek->re*cek->re+cek->im*cek->im;
-      if (func[3]) cek_coul = cek;
-      ++cek;
-      sum[0][0] += *(kv++)*r; sum[0][1] += *(kv++)*r; sum[0][2] += *(kv++)*r;
-      sum[0][3] += *(kv++)*r; sum[0][4] += *(kv++)*r; sum[0][5] += *(kv++)*r;
-    }
-    if (func[1]) {                                        // geometric 1/r^6
-      register double r = cek->re*cek->re+cek->im*cek->im; ++cek;
-      sum[1][0] += *(kv++)*r; sum[1][1] += *(kv++)*r; sum[1][2] += *(kv++)*r;
-      sum[1][3] += *(kv++)*r; sum[1][4] += *(kv++)*r; sum[1][5] += *(kv++)*r;
-    }
-    if (func[2]) {                                        // arithmetic 1/r^6
-      register double r =
-            (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+
-            (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+
-            (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+
-        0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7;
-      sum[2][0] += *(kv++)*r; sum[2][1] += *(kv++)*r; sum[2][2] += *(kv++)*r;
-      sum[2][3] += *(kv++)*r; sum[2][4] += *(kv++)*r; sum[2][5] += *(kv++)*r;
-    }
-    if (func[3]) {
-      register double r = cek->re*cek->re+cek->im*cek->im;
-      sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r;
-      sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r;
-      if (func[0]) {                                      // charge-dipole
-        kv -= 6;
-        register double r = 2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re);
-        sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r;
-        sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r;
-      }
-      ++cek;
-    }
-  }
-  for (int k=0; k<EWALD_NFUNCS; ++k)
-    if (func[k]) {
-      shape self = {virial_self[k], virial_self[k], virial_self[k], 0, 0, 0};
-      shape_scalar_mult(sum[k], c[k]);
-      shape_add(virial, sum[k]);
-      shape_subtr(virial, self);
-    }
-}
-
-
-void EwaldDisp::compute_virial_dipole()
-{
-  if (!function[3]) return;
-  if (!vflag_atom && !vflag_global) return;
-  double test = 0.0;
-  kvector *k;
-  hvector *h, *nh;
-  cvector *z = ekr_local;
-  vector mui = COMPLEX_NULL;
-  double sum[6];
-  double sum_total[6];
-  complex *cek, zc, zx = COMPLEX_NULL, zxy = COMPLEX_NULL;
-  complex *cek_coul;
-  double *mu = atom->mu ? atom->mu[0] : NULL;
-  double *vatomj = NULL;
-  if (vflag_atom && vatom) vatomj = vatom[0];
-  const double qscale = force->qqrd2e * scale;
-  double *ke, c[EWALD_NFUNCS] = {
-    8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume),
-    2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume};
-  double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3];
-  int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
-  int func[EWALD_NFUNCS];
-
-  memcpy(func, function, EWALD_NFUNCS*sizeof(int));
-  memset(&sum[0], 0, 6*sizeof(double));
-  memset(&sum_total[0], 0, 6*sizeof(double));
-  for (int j = 0; j < atom->nlocal; j++) {
-    k = kvec;
-    kx = ky = -1;
-    ke = kenergy;
-    cek = cek_global;
-    memset(&sum[0], 0, 6*sizeof(double));
-    if (func[3]) {
-      register double di = c[3];
-      mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0];
-      mu++;
-    }
-    for (nh = (h = hvec)+nkvec; h<nh; ++h, ++k) {
-      if (ky!=k->y) {                                   // based on order in
-        if (kx!=k->x) zx = z[kx = k->x].x;                 // reallocate
-        C_RMULT(zxy, z[ky = k->y].y, zx);
-      }
-      C_CRMULT(zc, z[k->z].z, zxy);
-      double im = 0.0;
-      if (func[0]) {                                        // 1/r
-        ke++;
-        if (func[3]) cek_coul = cek;
-        ++cek;
-      }
-      if (func[1]) {                                        // geometric 1/r^6
-        ke++; 
-        ++cek;
-      }
-      if (func[2]) {                                        // arithmetic 1/r^6
-        ke++;
-        for (i=2; i<9; ++i) {
-          ++cek;
-        }
-      }
-      if (func[3]) {                                        // dipole
-        im = *(ke)*(zc.re*cek->re - cek->im*zc.im);
-        if (func[0]) {                                      // charge-dipole
-          im += *(ke)*(zc.im*cek_coul->re + cek_coul->im*zc.re);
-        }
-        sum[0] -= mui[0]*h->x*im;
-        sum[1] -= mui[1]*h->y*im;
-        sum[2] -= mui[2]*h->z*im;
-        sum[3] -= mui[0]*h->y*im;
-        sum[4] -= mui[0]*h->z*im;
-        sum[5] -= mui[1]*h->z*im;
-        ++cek;
-        ke++;
-      }
-    }
-
-    if (vflag_global)
-      for (int n = 0; n < 6; n++)
-        sum_total[n] -= sum[n];
-
-    if (vflag_atom)
-      for (int n = 0; n < 6; n++)
-        vatomj[n] -= sum[n];
-
-    z = (cvector *) ((char *) z+lbytes);
-    ++type;
-    if (vflag_atom) vatomj += 6;
-  }
-
-  if (vflag_global) {
-    MPI_Allreduce(&sum_total[0],&sum[0],6,MPI_DOUBLE,MPI_SUM,world);
-    for (int n = 0; n < 6; n++)
-      virial[n] += sum[n];
-  }
-
-}
-
-void EwaldDisp::compute_virial_peratom()
-{
-  if (!vflag_atom) return;
-
-  kvector *k;
-  hvector *h, *nh;
-  cvector *z = ekr_local;
-  vector  mui = VECTOR_NULL;
-  complex *cek, zc = COMPLEX_NULL, zx = COMPLEX_NULL, zxy = COMPLEX_NULL;
-  complex *cek_coul;
-  double *kv;
-  double *q = atom->q;
-  double *vatomj = vatom ? vatom[0] : NULL;
-  double *mu = atom->mu ? atom->mu[0] : NULL;
-  const double qscale = force->qqrd2e * scale;
-  double c[EWALD_NFUNCS] = {
-    4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
-    2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
-  shape sum[EWALD_MAX_NSUMS];
-  int func[EWALD_NFUNCS];
-
-  memcpy(func, function, EWALD_NFUNCS*sizeof(int));
-  int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
-  for (int j = 0; j < atom->nlocal; j++) {
-    k = kvec;
-    kx = ky = -1;
-    kv = kvirial;
-    cek = cek_global;
-    memset(sum, 0, EWALD_MAX_NSUMS*sizeof(shape));
-    if (func[3]) {
-      register double di = c[3];
-      mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0];
-      mu++;
-    }
-    for (nh = (h = hvec)+nkvec; h<nh; ++h, ++k) {
-      if (ky!=k->y) {                                // based on order in
-          if (kx!=k->x) zx = z[kx = k->x].x;                 // reallocate
-          C_RMULT(zxy, z[ky = k->y].y, zx);
-      }
-      C_CRMULT(zc, z[k->z].z, zxy);
-      if (func[0]) {                                        // 1/r
-          if (func[3]) cek_coul = cek;
-          register double r = cek->re*zc.re - cek->im*zc.im; ++cek;
-          sum[0][0] += *(kv++)*r;
-          sum[0][1] += *(kv++)*r;
-          sum[0][2] += *(kv++)*r;
-          sum[0][3] += *(kv++)*r;
-          sum[0][4] += *(kv++)*r;
-          sum[0][5] += *(kv++)*r;
-      }
-      if (func[1]) {                                        // geometric 1/r^6
-          register double r = cek->re*zc.re - cek->im*zc.im; ++cek;
-          sum[1][0] += *(kv++)*r;
-          sum[1][1] += *(kv++)*r;
-          sum[1][2] += *(kv++)*r;
-          sum[1][3] += *(kv++)*r;
-          sum[1][4] += *(kv++)*r;
-          sum[1][5] += *(kv++)*r;
-      }
-      if (func[2]) {                                        // arithmetic 1/r^6
-        register double r;
-        for (i=2; i<9; ++i) {
-          r = cek->re*zc.re - cek->im*zc.im; ++cek;
-          sum[i][0] += *(kv++)*r;
-          sum[i][1] += *(kv++)*r;
-          sum[i][2] += *(kv++)*r;
-          sum[i][3] += *(kv++)*r;
-          sum[i][4] += *(kv++)*r;
-          sum[i][5] += *(kv++)*r;
-      kv -= 6;
-        }
-    kv += 6;
-      }
-      if (func[3]) {                                        // dipole
-         double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z);
-         register double
-           r = (cek->re*zc.re - cek->im*zc.im)*muk;
-         sum[9][0] += *(kv++)*r;
-         sum[9][1] += *(kv++)*r;
-         sum[9][2] += *(kv++)*r;
-         sum[9][3] += *(kv++)*r;
-         sum[9][4] += *(kv++)*r;
-         sum[9][5] += *(kv++)*r;
-         if (func[0]) {                                      // charge-dipole
-           kv -= 6;
-           register double qj = *(q)*c[0];
-           r = (cek_coul->im*zc.re + cek_coul->re*zc.im)*muk;
-           r += -(cek->re*zc.im + cek->im*zc.re)*qj;
-           sum[9][0] += *(kv++)*r; sum[9][1] += *(kv++)*r; sum[9][2] += *(kv++)*r;
-           sum[9][3] += *(kv++)*r; sum[9][4] += *(kv++)*r; sum[9][5] += *(kv++)*r;
-         }
-         ++cek;
-      }
-    }
-
-    if (func[0]) {                                        // 1/r
-      register double qi = *(q++)*c[0];
-      for (int n = 0; n < 6; n++) vatomj[n] += sum[0][n]*qi;
-    }
-    if (func[1]) {                                        // geometric 1/r^6
-      register double bi = B[*type]*c[1];
-      for (int n = 0; n < 6; n++) vatomj[n] += sum[1][n]*bi;
-    }
-    if (func[2]) {                                        // arithmetic 1/r^6
-      register double *bj = B+7*type[0]+7;
-      for (i=2; i<9; ++i) {
-        register double c2 = (--bj)[0]*c[2];
-        for (int n = 0; n < 6; n++) vatomj[n] += 0.5*sum[i][n]*c2;
-      }
-    }
-    if (func[3]) {                                        // dipole
-      for (int n = 0; n < 6; n++) vatomj[n] += sum[9][n];
-    }
-
-    for (int k=0; k<EWALD_NFUNCS; ++k) {
-      if (func[k]) {
-        for (int n = 0; n < 3; n++) vatomj[n] -= virial_self_peratom[j][k];
-      }
-    }
-
-    z = (cvector *) ((char *) z+lbytes);
-    ++type;
-    vatomj += 6;
-  }
-}
-
-
-/* ----------------------------------------------------------------------
-   Slab-geometry correction term to dampen inter-slab interactions between
-   periodically repeating slabs.  Yields good approximation to 2D Ewald if
-   adequate empty space is left between repeating slabs (J. Chem. Phys.
-   111, 3155).  Slabs defined here to be parallel to the xy plane. Also
-   extended to non-neutral systems (J. Chem. Phys. 131, 094107).
-------------------------------------------------------------------------- */
-
-void EwaldDisp::compute_slabcorr()
-{
-  // compute local contribution to global dipole moment
-
-  double *q = atom->q;
-  double **x = atom->x;
-  double zprd = domain->zprd;
-  int nlocal = atom->nlocal;
-
-  double qsum = 0.0;
-  if (function[0]) qsum = sum[0].x;
-
-  double dipole = 0.0;
-  for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
-
-  if (function[3] && atom->mu) {
-    double **mu = atom->mu;
-    for (int i = 0; i < nlocal; i++) dipole += mu[i][2];
-  }
-
-  // sum local contributions to get global dipole moment
-
-  double dipole_all;
-  MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
-
-  // need to make non-neutral systems and/or
-  //  per-atom energy translationally invariant
-
-  double dipole_r2 = 0.0;
-  if (eflag_atom || fabs(qsum) > SMALL) {
-
-    if (function[3] && atom->mu)
-      error->all(FLERR,"Cannot (yet) use kspace slab correction with "
-        "long-range dipoles and non-neutral systems or per-atom energy");
-
-    for (int i = 0; i < nlocal; i++)
-      dipole_r2 += q[i]*x[i][2]*x[i][2];
-
-    // sum local contributions
-
-    double tmp;
-    MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-    dipole_r2 = tmp;
-  }
-
-  // compute corrections
-
-  const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
-    qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
-  const double qscale = force->qqrd2e * scale;
-
-  if (eflag_global) energy += qscale * e_slabcorr;
-
-  // per-atom energy
-
-  if (eflag_atom) {
-    double efact = qscale * MY_2PI/volume;
-    for (int i = 0; i < nlocal; i++)
-      eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
-        qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
-  }
-
-  // add on force corrections
-
-  double ffact = qscale * (-4.0*MY_PI/volume);
-  double **f = atom->f;
-
-  for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
-
-  // add on torque corrections
-
-  if (function[3] && atom->mu && atom->torque) {
-    double **mu = atom->mu;
-    double **torque = atom->torque;
-    for (int i = 0; i < nlocal; i++) {
-      torque[i][0] += ffact * dipole_all * mu[i][1];
-      torque[i][1] += -ffact * dipole_all * mu[i][0];
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-  Newton solver used to find g_ewald for LJ systems
- ------------------------------------------------------------------------- */
-
-double EwaldDisp::NewtonSolve(double x, double Rc, 
-                              bigint natoms, double vol, double b2)
-{
-  double dx,tol;
-  int maxit;
-
-  maxit = 10000; //Maximum number of iterations
-  tol = 0.00001; //Convergence tolerance
-
-  //Begin algorithm
-
-  for (int i = 0; i < maxit; i++) {
-    dx = f(x,Rc,natoms,vol,b2) / derivf(x,Rc,natoms,vol,b2);
-    x = x - dx; //Update x
-    if (fabs(dx) < tol) return x;
-    if (x < 0 || x != x) // solver failed
-      return -1;
-  }
-  return -1;
-}
-
-/* ----------------------------------------------------------------------
- Calculate f(x)
- ------------------------------------------------------------------------- */
-
-double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2)
-{
-  double a = Rc*x;
-  double f = 0.0;
-
-  if (function[1] || function[2]) { // LJ
-    f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) *
-      (6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy);
-  } else { // dipole
-    double rg2 = a*a;
-    double rg4 = rg2*rg2;
-    double rg6 = rg4*rg2;
-    double Cc = 4.0*rg4 + 6.0*rg2 + 3.0;
-    double Dc = 8.0*rg6 + 20.0*rg4 + 30.0*rg2 + 15.0;
-    f = (b2/(sqrt(vol*powint(x,4)*powint(Rc,9)*natoms)) *
-      sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) *
-      exp(-rg2)) - accuracy;
-    }
-
-  return f;
-}
-
-/* ----------------------------------------------------------------------
- Calculate numerical derivative f'(x)
- ------------------------------------------------------------------------- */
-
-double EwaldDisp::derivf(double x, double Rc, 
-                         bigint natoms, double vol, double b2)
-{
-  double h = 0.000001;  //Derivative step-size
-  return (f(x + h,Rc,natoms,vol,b2) - f(x,Rc,natoms,vol,b2)) / h;
-}
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Pieter in 't Veld (SNL), Stan Moore (SNL)
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "string.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "math.h"
+#include "ewald_disp.h"
+#include "math_vector.h"
+#include "math_const.h"
+#include "math_special.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "pair.h"
+#include "domain.h"
+#include "memory.h"
+#include "error.h"
+#include "update.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+using namespace MathSpecial;
+
+#define SMALL 0.00001
+
+enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER};   // same as in pair.h
+
+//#define DEBUG
+
+/* ---------------------------------------------------------------------- */
+
+EwaldDisp::EwaldDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
+{
+  if (narg!=1) error->all(FLERR,"Illegal kspace_style ewald/n command");
+
+  ewaldflag = dispersionflag = dipoleflag = 1;
+  accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
+
+  memset(function, 0, EWALD_NORDER*sizeof(int));
+  kenergy = kvirial = NULL;
+  cek_local = cek_global = NULL;
+  ekr_local = NULL;
+  hvec = NULL;
+  kvec = NULL;
+  B = NULL;
+  first_output = 0;
+  energy_self_peratom = NULL;
+  virial_self_peratom = NULL;
+  nmax = 0;
+  q2 = 0;
+  b2 = 0;
+  M2 = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+EwaldDisp::~EwaldDisp()
+{
+  deallocate();
+  deallocate_peratom();
+  delete [] ekr_local;
+  delete [] B;
+}
+
+/* --------------------------------------------------------------------- */
+
+void EwaldDisp::init()
+{
+  nkvec = nkvec_max = nevec = nevec_max = 0;
+  nfunctions = nsums = sums = 0;
+  nbox = -1;
+  bytes = 0.0;
+
+  if (!comm->me) {
+    if (screen) fprintf(screen,"EwaldDisp initialization ...\n");
+    if (logfile) fprintf(logfile,"EwaldDisp initialization ...\n");
+  }
+
+  triclinic_check();
+  if (domain->dimension == 2)
+    error->all(FLERR,"Cannot use EwaldDisp with 2d simulation");
+  if (slabflag == 0 && domain->nonperiodic > 0)
+    error->all(FLERR,"Cannot use nonperiodic boundaries with EwaldDisp");
+  if (slabflag == 1) {
+    if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
+        domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
+      error->all(FLERR,"Incorrect boundaries with slab EwaldDisp");
+  }
+
+  scale = 1.0;
+  mumurd2e = force->qqrd2e;
+  dielectric = force->dielectric;
+
+  int tmp;
+  Pair *pair = force->pair;
+  int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL;
+  double *cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL;
+  if (!(ptr||cutoff))
+    error->all(FLERR,"KSpace style is incompatible with Pair style");
+  int ewald_order = ptr ? *((int *) ptr) : 1<<1;
+  int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC;
+  memset(function, 0, EWALD_NFUNCS*sizeof(int));
+  for (int i=0; i<=EWALD_NORDER; ++i)                        // transcribe order
+    if (ewald_order&(1<<i)) {                                // from pair_style
+      int n[] = EWALD_NSUMS, k = 0;
+      char str[128];
+      switch (i) {
+        case 1:
+          k = 0; break;
+        case 3:
+          k = 3; break;
+        case 6:
+          if (ewald_mix==GEOMETRIC) { k = 1; break; }
+          else if (ewald_mix==ARITHMETIC) { k = 2; break; }
+          error->all(FLERR,
+                     "Unsupported mixing rule in kspace_style ewald/disp");
+        default:
+          error->all(FLERR,"Unsupported order in kspace_style ewald/disp");
+      }
+      nfunctions += function[k] = 1;
+      nsums += n[k];
+    }
+
+  if (!gewaldflag) g_ewald = 0.0;
+  pair->init();  // so B is defined
+  init_coeffs();
+  init_coeff_sums();
+
+  double qsum, qsqsum, bsbsum;
+  qsum = qsqsum = bsbsum = 0.0;
+  if (function[0]) {
+    qsum = sum[0].x;
+    qsqsum = sum[0].x2;
+  }
+
+  // turn off coulombic if no charge
+
+  if (function[0] && qsqsum == 0.0) {
+    function[0] = 0;
+    nfunctions -= 1;
+    nsums -= 1;
+  }
+
+  if (function[1]) bsbsum = sum[1].x2;
+  if (function[2]) bsbsum = sum[2].x2;
+
+  if (function[3]) M2 = sum[9].x2;
+
+  if (function[3] && strcmp(update->unit_style,"electron") == 0)
+    error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
+
+  if (qsqsum == 0.0 && bsbsum == 0.0 && M2 == 0.0)
+      error->all(FLERR,"Cannot use Ewald/disp solver "
+                 "on system with no charge, dipole, or LJ particles");
+  if (fabs(qsum) > SMALL && comm->me == 0) {
+      char str[128];
+      sprintf(str,"System is not charge neutral, net charge = %g",qsum);
+      error->warning(FLERR,str);
+  }
+
+  if (!function[1] && !function[2])
+    dispersionflag = 0;
+
+  if (!function[3])
+    dipoleflag = 0;
+
+  pair_check();
+
+  // set accuracy (force units) from accuracy_relative or accuracy_absolute
+
+  if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
+  else accuracy = accuracy_relative * two_charge_force;
+
+  // setup K-space resolution
+
+  q2 = qsqsum * force->qqrd2e;
+  M2 *= mumurd2e;
+  b2 = bsbsum; //Are these units right?
+  bigint natoms = atom->natoms;
+
+  if (!gewaldflag) {
+    if (function[0]) {
+      g_ewald = accuracy*sqrt(natoms*(*cutoff)*shape_det(domain->h)) / (2.0*q2);
+      if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/(*cutoff);
+      else g_ewald = sqrt(-log(g_ewald)) / (*cutoff);
+    }
+    else if (function[1] || function[2]) {
+      //Try Newton Solver
+      //Use old method to get guess
+      g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff;
+    
+      double g_ewald_new = 
+        NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),b2);
+      if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
+      else error->warning(FLERR,"Ewald/disp Newton solver failed, "
+                          "using old method to estimate g_ewald");
+    } else if (function[3]) {
+      //Try Newton Solver
+      //Use old method to get guess
+      g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff;
+      double g_ewald_new = 
+        NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),M2);
+      if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
+      else error->warning(FLERR,"Ewald/disp Newton solver failed, "
+                          "using old method to estimate g_ewald");
+    }
+  }
+
+  if (!comm->me) {
+      if (screen) fprintf(screen, "  G vector = %g\n", g_ewald);
+      if (logfile) fprintf(logfile, "  G vector = %g\n", g_ewald);
+  }
+
+  g_ewald_6 = g_ewald;
+  deallocate_peratom();
+  peratom_allocate_flag = 0;
+}
+
+/* ----------------------------------------------------------------------
+   adjust EwaldDisp coeffs, called initially and whenever volume has changed
+------------------------------------------------------------------------- */
+
+void EwaldDisp::setup()
+{
+  volume = shape_det(domain->h)*slab_volfactor;
+  memcpy(unit, domain->h_inv, sizeof(shape));
+  shape_scalar_mult(unit, 2.0*MY_PI);
+  unit[2] /= slab_volfactor;
+
+  // int nbox_old = nbox, nkvec_old = nkvec;
+
+  if (accuracy >= 1) {
+    nbox = 0;
+    error->all(FLERR,"KSpace accuracy too low");
+  }
+
+  bigint natoms = atom->natoms;
+  double err;
+  int kxmax = 1;
+  int kymax = 1;
+  int kzmax = 1;
+  err = rms(kxmax,domain->h[0],natoms,q2,b2,M2);
+  while (err > accuracy) {
+    kxmax++;
+    err = rms(kxmax,domain->h[0],natoms,q2,b2,M2);
+  }
+  err = rms(kymax,domain->h[1],natoms,q2,b2,M2);
+  while (err > accuracy) {
+    kymax++;
+    err = rms(kymax,domain->h[1],natoms,q2,b2,M2);
+  }
+  err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2);
+  while (err > accuracy) {
+    kzmax++;
+    err = rms(kzmax,domain->h[2]*slab_volfactor,natoms,q2,b2,M2);
+  }
+  nbox = MAX(kxmax,kymax);
+  nbox = MAX(nbox,kzmax);
+  double gsqxmx = unit[0]*unit[0]*kxmax*kxmax;
+  double gsqymx = unit[1]*unit[1]*kymax*kymax;
+  double gsqzmx = unit[2]*unit[2]*kzmax*kzmax;
+  gsqmx = MAX(gsqxmx,gsqymx);
+  gsqmx = MAX(gsqmx,gsqzmx);
+  gsqmx *= 1.00001;
+
+  reallocate();
+  coefficients();
+  init_coeffs();
+  init_coeff_sums();
+  init_self();
+
+  if (!(first_output||comm->me)) {
+    first_output = 1;
+    if (screen) fprintf(screen,
+               "  vectors: nbox = %d, nkvec = %d\n", nbox, nkvec);
+    if (logfile) fprintf(logfile,
+        "  vectors: nbox = %d, nkvec = %d\n", nbox, nkvec);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute RMS accuracy for a dimension
+------------------------------------------------------------------------- */
+
+double EwaldDisp::rms(int km, double prd, bigint natoms, double q2, double b2, double M2)
+{
+  double value = 0.0;
+
+  // Coulombic
+
+  double g2 = g_ewald*g_ewald;
+
+  value += 2.0*q2*g_ewald/prd *
+    sqrt(1.0/(MY_PI*km*natoms)) *
+    exp(-MY_PI*MY_PI*km*km/(g2*prd*prd));
+
+  // Lennard-Jones
+
+  double g7 = g2*g2*g2*g_ewald;
+
+  value += 4.0*b2*g7/3.0 *
+    sqrt(1.0/(MY_PI*natoms)) *
+    (exp(-MY_PI*MY_PI*km*km/(g2*prd*prd)) *
+    (MY_PI*km/(g_ewald*prd) + 1));
+
+  // dipole
+
+  value += 8.0*MY_PI*M2/volume*g_ewald *
+    sqrt(2.0*MY_PI*km*km*km/(15.0*natoms)) *
+    exp(-pow(MY_PI*km/(g_ewald*prd),2.0));
+
+  return value;
+}
+
+void EwaldDisp::reallocate()
+{
+  int ix, iy, iz;
+  int nkvec_max = nkvec;
+  vector h;
+
+  nkvec = 0;
+  int *kflag = new int[(nbox+1)*(2*nbox+1)*(2*nbox+1)];
+  int *flag = kflag;
+
+  for (ix=0; ix<=nbox; ++ix)
+    for (iy=-nbox; iy<=nbox; ++iy)
+      for (iz=-nbox; iz<=nbox; ++iz)
+        if (!(ix||iy||iz)) *(flag++) = 0;
+        else if ((!ix)&&(iy<0)) *(flag++) = 0;
+        else if ((!(ix||iy))&&(iz<0)) *(flag++) = 0;        // use symmetry
+        else {
+          h[0] = unit[0]*ix;
+          h[1] = unit[5]*ix+unit[1]*iy;
+          h[2] = unit[4]*ix+unit[3]*iy+unit[2]*iz;
+          if ((*(flag++) = h[0]*h[0]+h[1]*h[1]+h[2]*h[2]<=gsqmx)) ++nkvec;
+        }
+
+  if (nkvec>nkvec_max) {
+    deallocate();                                        // free memory
+    hvec = new hvector[nkvec];                                // hvec
+    bytes += (nkvec-nkvec_max)*sizeof(hvector);
+    kvec = new kvector[nkvec];                                // kvec
+    bytes += (nkvec-nkvec_max)*sizeof(kvector);
+    kenergy = new double[nkvec*nfunctions];                // kenergy
+    bytes += (nkvec-nkvec_max)*nfunctions*sizeof(double);
+    kvirial = new double[6*nkvec*nfunctions];                // kvirial
+    bytes += 6*(nkvec-nkvec_max)*nfunctions*sizeof(double);
+    cek_local = new complex[nkvec*nsums];                // cek_local
+    bytes += (nkvec-nkvec_max)*nsums*sizeof(complex);
+    cek_global = new complex[nkvec*nsums];                // cek_global
+    bytes += (nkvec-nkvec_max)*nsums*sizeof(complex);
+    nkvec_max = nkvec;
+  }
+
+  flag = kflag;                                           // create index and
+  kvector *k = kvec;                                      // wave vectors
+  hvector *hi = hvec;
+  for (ix=0; ix<=nbox; ++ix)
+    for (iy=-nbox; iy<=nbox; ++iy)
+      for (iz=-nbox; iz<=nbox; ++iz)
+        if (*(flag++)) {
+          hi->x = unit[0]*ix;
+          hi->y = unit[5]*ix+unit[1]*iy;
+          (hi++)->z = unit[4]*ix+unit[3]*iy+unit[2]*iz;
+          k->x = ix+nbox; k->y = iy+nbox; (k++)->z = iz+nbox; }
+
+  delete [] kflag;
+}
+
+
+void EwaldDisp::reallocate_atoms()
+{
+  if (eflag_atom || vflag_atom)
+    if (atom->nlocal > nmax) {
+      deallocate_peratom();
+      allocate_peratom();
+      nmax = atom->nmax;
+    }
+
+  if ((nevec = atom->nmax*(2*nbox+1))<=nevec_max) return;
+  delete [] ekr_local;
+  ekr_local = new cvector[nevec];
+  bytes += (nevec-nevec_max)*sizeof(cvector);
+  nevec_max = nevec;
+}
+
+
+void EwaldDisp::allocate_peratom()
+{
+  memory->create(energy_self_peratom,
+      atom->nmax,EWALD_NFUNCS,"ewald/n:energy_self_peratom");
+  memory->create(virial_self_peratom,
+      atom->nmax,EWALD_NFUNCS,"ewald/n:virial_self_peratom");
+}
+
+
+void EwaldDisp::deallocate_peratom()                        // free memory
+{
+  memory->destroy(energy_self_peratom);
+  memory->destroy(virial_self_peratom);
+}
+
+
+void EwaldDisp::deallocate()                                // free memory
+{
+  delete [] hvec;                hvec = NULL;
+  delete [] kvec;                kvec = NULL;
+  delete [] kenergy;                kenergy = NULL;
+  delete [] kvirial;                kvirial = NULL;
+  delete [] cek_local;                cek_local = NULL;
+  delete [] cek_global;                cek_global = NULL;
+}
+
+
+void EwaldDisp::coefficients()
+{
+  vector h;
+  hvector *hi = hvec, *nh;
+  double eta2 = 0.25/(g_ewald*g_ewald);
+  double b1, b2, expb2, h1, h2, c1, c2;
+  double *ke = kenergy, *kv = kvirial;
+  int func0 = function[0], func12 = function[1]||function[2],
+      func3 = function[3];
+
+  for (nh = (hi = hvec)+nkvec; hi<nh; ++hi) {                // wave vectors
+    memcpy(h, hi, sizeof(vector));
+    expb2 = exp(-(b2 = (h2 = vec_dot(h, h))*eta2));
+    if (func0) {                                        // qi*qj/r coeffs
+      *(ke++) = c1 = expb2/h2;
+      *(kv++) = c1-(c2 = 2.0*c1*(1.0+b2)/h2)*h[0]*h[0];
+      *(kv++) = c1-c2*h[1]*h[1];                        // lammps convention
+      *(kv++) = c1-c2*h[2]*h[2];                        // instead of voigt
+      *(kv++) = -c2*h[1]*h[0];
+      *(kv++) = -c2*h[2]*h[0];
+      *(kv++) = -c2*h[2]*h[1];
+    }
+    if (func12) {                                        // -Bij/r^6 coeffs
+      b1 = sqrt(b2);                                        // minus sign folded
+      h1 = sqrt(h2);                                        // into constants
+      *(ke++) = c1 = -h1*h2*((c2=MY_PIS*erfc(b1))+(0.5/b2-1.0)*expb2/b1);
+      *(kv++) = c1-(c2 = 3.0*h1*(c2-expb2/b1))*h[0]*h[0];
+      *(kv++) = c1-c2*h[1]*h[1];                        // lammps convention
+      *(kv++) = c1-c2*h[2]*h[2];                        // instead of voigt
+      *(kv++) = -c2*h[1]*h[0];
+      *(kv++) = -c2*h[2]*h[0];
+      *(kv++) = -c2*h[2]*h[1];
+    }
+    if (func3) {                                        // dipole coeffs
+      *(ke++) = c1 = expb2/h2;
+      *(kv++) = c1-(c2 = 2.0*c1*(1.0+b2)/h2)*h[0]*h[0];
+      *(kv++) = c1-c2*h[1]*h[1];                        // lammps convention
+      *(kv++) = c1-c2*h[2]*h[2];                        // instead of voigt
+      *(kv++) = -c2*h[1]*h[0];
+      *(kv++) = -c2*h[2]*h[0];
+      *(kv++) = -c2*h[2]*h[1];
+    }
+  }
+}
+
+void EwaldDisp::init_coeffs()
+{
+  int tmp;
+  int n = atom->ntypes;
+
+  if (function[1]) {                                        // geometric 1/r^6
+    double **b = (double **) force->pair->extract("B",tmp);
+    delete [] B;
+    B = new double[n+1];
+    bytes += (n+1)*sizeof(double);
+    for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i]));
+  }
+  if (function[2]) {                                        // arithmetic 1/r^6
+    double **epsilon = (double **) force->pair->extract("epsilon",tmp);
+    double **sigma = (double **) force->pair->extract("sigma",tmp);
+    double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7];
+    double c[7] = {
+      1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0};
+
+    if (!(epsilon&&sigma))
+      error->all(
+          FLERR,"Epsilon or sigma reference not set by pair style in ewald/n");
+    for (int i=0; i<=n; ++i) {
+      eps_i = sqrt(epsilon[i][i]);
+      sigma_i = sigma[i][i];
+      sigma_n = 1.0;
+      for (int j=0; j<7; ++j) {
+        *(bi++) = sigma_n*eps_i*c[j]; sigma_n *= sigma_i;
+      }
+    }
+  }
+}
+
+void EwaldDisp::init_coeff_sums()
+{
+  if (sums) return;                            // calculated only once
+  sums = 1;
+
+  Sum sum_local[EWALD_MAX_NSUMS];
+
+  memset(sum_local, 0, EWALD_MAX_NSUMS*sizeof(Sum));
+  if (function[0]) {                                        // 1/r
+    double *q = atom->q, *qn = q+atom->nlocal;
+    for (double *i=q; i<qn; ++i) {
+      sum_local[0].x += i[0]; sum_local[0].x2 += i[0]*i[0]; }
+  }
+  if (function[1]) {                                        // geometric 1/r^6
+    int *type = atom->type, *ntype = type+atom->nlocal;
+    for (int *i=type; i<ntype; ++i) {
+      sum_local[1].x += B[i[0]]; sum_local[1].x2 += B[i[0]]*B[i[0]]; }
+  }
+  if (function[2]) {                                        // arithmetic 1/r^6
+    double *bi;
+    int *type = atom->type, *ntype = type+atom->nlocal;
+    for (int *i=type; i<ntype; ++i) {
+      bi = B+7*i[0];
+      sum_local[2].x2 += bi[0]*bi[6];
+      for (int k=2; k<9; ++k) sum_local[k].x += *(bi++);
+    }
+  }
+  if (function[3]&&atom->mu) {                                // dipole
+    double *mu = atom->mu[0], *nmu = mu+4*atom->nlocal;
+    for (double *i = mu; i < nmu; i += 4)
+      sum_local[9].x2 += i[3]*i[3];
+  }
+  MPI_Allreduce(sum_local, sum, 2*EWALD_MAX_NSUMS, MPI_DOUBLE, MPI_SUM, world);
+}
+
+
+void EwaldDisp::init_self()
+{
+  double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2;
+  const double qscale = force->qqrd2e * scale;
+
+  memset(energy_self, 0, EWALD_NFUNCS*sizeof(double));        // self energy
+  memset(virial_self, 0, EWALD_NFUNCS*sizeof(double));
+
+  if (function[0]) {                                        // 1/r
+    virial_self[0] = -0.5*MY_PI*qscale/(g2*volume)*sum[0].x*sum[0].x;
+    energy_self[0] = sum[0].x2*qscale*g1/MY_PIS-virial_self[0];
+  }
+  if (function[1]) {                                        // geometric 1/r^6
+    virial_self[1] = MY_PI*MY_PIS*g3/(6.0*volume)*sum[1].x*sum[1].x;
+    energy_self[1] = -sum[1].x2*g3*g3/12.0+virial_self[1];
+  }
+  if (function[2]) {                                        // arithmetic 1/r^6
+    virial_self[2] = MY_PI*MY_PIS*g3/(48.0*volume)*(sum[2].x*sum[8].x+
+        sum[3].x*sum[7].x+sum[4].x*sum[6].x+0.5*sum[5].x*sum[5].x);
+    energy_self[2] = -sum[2].x2*g3*g3/3.0+virial_self[2];
+  }
+  if (function[3]) {                                        // dipole
+    virial_self[3] = 0;                                        // in surface
+    energy_self[3] = sum[9].x2*mumurd2e*2.0*g3/3.0/MY_PIS-virial_self[3];
+  }
+}
+
+
+void EwaldDisp::init_self_peratom()
+{
+  if (!(vflag_atom || eflag_atom)) return;
+
+  double g1 = g_ewald, g2 = g1*g1, g3 = g1*g2;
+  const double qscale = force->qqrd2e * scale;
+  double *energy = energy_self_peratom[0];
+  double *virial = virial_self_peratom[0];
+  int nlocal = atom->nlocal;
+
+  memset(energy, 0, EWALD_NFUNCS*nlocal*sizeof(double));
+  memset(virial, 0, EWALD_NFUNCS*nlocal*sizeof(double));
+
+  if (function[0]) {                                        // 1/r
+    double *ei = energy;
+    double *vi = virial;
+    double ce = qscale*g1/MY_PIS;
+    double cv = -0.5*MY_PI*qscale/(g2*volume);
+    double *qi = atom->q, *qn = qi + nlocal;
+    for (; qi < qn; qi++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
+      double q = *qi;
+      *vi = cv*q*sum[0].x;
+      *ei = ce*q*q-vi[0];
+    }
+  }
+  if (function[1]) {                                        // geometric 1/r^6
+    double *ei = energy+1;
+    double *vi = virial+1;
+    double ce = -g3*g3/12.0;
+    double cv = MY_PI*MY_PIS*g3/(6.0*volume);
+    int *typei = atom->type, *typen = typei + atom->nlocal;
+    for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
+      double b = B[*typei];
+      *vi = cv*b*sum[1].x;
+      *ei = ce*b*b+vi[0];
+    }
+  }
+  if (function[2]) {                                        // arithmetic 1/r^6
+    double *bi;
+    double *ei = energy+2;
+    double *vi = virial+2;
+    double ce = -g3*g3/3.0;
+    double cv = 0.5*MY_PI*MY_PIS*g3/(48.0*volume);
+    int *typei = atom->type, *typen = typei + atom->nlocal;
+    for (; typei < typen; typei++, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
+      bi = B+7*typei[0]+7;
+      for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(--bi)[0];
+
+      /* PJV 20120225:
+         should this be this instead?  above implies an inverse dependence
+         seems to be the above way in original;  i recall having tested
+         arithmetic mixing in the conception phase, but an extra test would
+         be prudent (pattern repeats in multiple functions below)
+
+      bi = B+7*typei[0];
+      for (int k=2; k<9; ++k) *vi += cv*sum[k].x*(bi++)[0];
+
+      */
+
+      *ei = ce*bi[0]*bi[6]+vi[0];
+    }
+  }
+  if (function[3]&&atom->mu) {                                // dipole
+    double *ei = energy+3;
+    double *vi = virial+3;
+    double *imu = atom->mu[0], *nmu = imu+4*atom->nlocal;
+    double ce = mumurd2e*2.0*g3/3.0/MY_PIS;
+    for (; imu < nmu; imu += 4, vi += EWALD_NFUNCS, ei += EWALD_NFUNCS) {
+      *vi = 0;                                                // in surface
+      *ei = ce*imu[3]*imu[3]-vi[0];
+    }
+  }
+}
+
+
+/* ----------------------------------------------------------------------
+   compute the EwaldDisp long-range force, energy, virial
+------------------------------------------------------------------------- */
+
+void EwaldDisp::compute(int eflag, int vflag)
+{
+  if (!nbox) return;
+
+  // set energy/virial flags
+  // invoke allocate_peratom() if needed for first time
+
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0;
+
+  if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) {
+      allocate_peratom();
+      peratom_allocate_flag = 1;
+      nmax = atom->nmax;
+  }
+
+  reallocate_atoms();
+  init_self_peratom();
+  compute_ek();
+  compute_force();
+  //compute_surface(); // assume conducting metal (tinfoil) boundary conditions
+  compute_energy();
+  compute_energy_peratom();
+  compute_virial();
+  compute_virial_dipole();
+  compute_virial_peratom();
+}
+
+
+void EwaldDisp::compute_ek()
+{
+  cvector *ekr = ekr_local;
+  int lbytes = (2*nbox+1)*sizeof(cvector);
+  hvector *h = NULL;
+  kvector *k, *nk = kvec+nkvec;
+  cvector *z = new cvector[2*nbox+1];
+  cvector z1, *zx, *zy, *zz, *zn = z+2*nbox;
+  complex *cek, zxyz, zxy = COMPLEX_NULL, cx = COMPLEX_NULL;
+  vector mui;
+  double *x = atom->x[0], *xn = x+3*atom->nlocal, *q = atom->q, qi = 0.0;
+  double bi = 0.0, ci[7];
+  double *mu = atom->mu ? atom->mu[0] : NULL;
+  int i, kx, ky, n = nkvec*nsums, *type = atom->type, tri = domain->triclinic;
+  int func[EWALD_NFUNCS];
+
+  memcpy(func, function, EWALD_NFUNCS*sizeof(int));
+  memset(cek_local, 0, n*sizeof(complex));                // reset sums
+  while (x<xn) {
+    zx = (zy = (zz = z+nbox)+1)-2;
+    C_SET(zz->x, 1, 0); C_SET(zz->y, 1, 0); C_SET(zz->z, 1, 0);        // z[0]
+    if (tri) {                                                // triclinic z[1]
+      C_ANGLE(z1.x, unit[0]*x[0]+unit[5]*x[1]+unit[4]*x[2]);
+      C_ANGLE(z1.y, unit[1]*x[1]+unit[3]*x[2]);
+      C_ANGLE(z1.z, x[2]*unit[2]); x += 3;
+    }
+    else {                                                // orthogonal z[1]
+      C_ANGLE(z1.x, *(x++)*unit[0]);
+      C_ANGLE(z1.y, *(x++)*unit[1]);
+      C_ANGLE(z1.z, *(x++)*unit[2]);
+    }
+    for (; zz<zn; --zx, ++zy, ++zz) {                  // set up z[k]=e^(ik.r)
+      C_RMULT(zy->x, zz->x, z1.x);                        // 3D k-vector
+      C_RMULT(zy->y, zz->y, z1.y); C_CONJ(zx->y, zy->y);
+      C_RMULT(zy->z, zz->z, z1.z); C_CONJ(zx->z, zy->z);
+    }
+    kx = ky = -1;
+    cek = cek_local;
+    if (func[0]) qi = *(q++);
+    if (func[1]) bi = B[*type];
+    if (func[2]) memcpy(ci, B+7*type[0], 7*sizeof(double));
+    if (func[3]) {
+      memcpy(mui, mu, sizeof(vector));
+      mu += 4;
+      h = hvec;
+    }
+    for (k=kvec; k<nk; ++k) {                                // compute rho(k)
+      if (ky!=k->y) {                                   // based on order in
+        if (kx!=k->x) cx = z[kx = k->x].x;                // reallocate
+        C_RMULT(zxy, z[ky = k->y].y, cx);
+      }
+      C_RMULT(zxyz, z[k->z].z, zxy);
+      if (func[0]) {
+               cek->re += zxyz.re*qi; (cek++)->im += zxyz.im*qi;
+      }
+      if (func[1]) {
+               cek->re += zxyz.re*bi; (cek++)->im += zxyz.im*bi;
+      }
+      if (func[2]) for (i=0; i<7; ++i) {
+        cek->re += zxyz.re*ci[i]; (cek++)->im += zxyz.im*ci[i];
+      }
+      if (func[3]) {
+        register double muk = mui[0]*h->x+mui[1]*h->y+mui[2]*h->z; ++h;
+        cek->re += zxyz.re*muk; (cek++)->im += zxyz.im*muk;
+      }
+    }
+    ekr = (cvector *) ((char *) memcpy(ekr, z, lbytes)+lbytes);
+    ++type;
+  }
+  MPI_Allreduce(cek_local, cek_global, 2*n, MPI_DOUBLE, MPI_SUM, world);
+
+  delete [] z;
+}
+
+
+void EwaldDisp::compute_force()
+{
+  kvector *k;
+  hvector *h, *nh;
+  cvector *z = ekr_local;
+  vector sum[EWALD_MAX_NSUMS], mui = COMPLEX_NULL;
+  complex *cek, zc, zx = COMPLEX_NULL, zxy = COMPLEX_NULL;
+  complex *cek_coul;
+  double *f = atom->f[0], *fn = f+3*atom->nlocal, *q = atom->q, *t = NULL;
+  double *mu = atom->mu ? atom->mu[0] : NULL;
+  const double qscale = force->qqrd2e * scale;
+  double *ke, c[EWALD_NFUNCS] = {
+    8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume),
+    2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume};
+  double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3];
+  int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
+  int func[EWALD_NFUNCS];
+
+  if (atom->torque) t = atom->torque[0];
+  memcpy(func, function, EWALD_NFUNCS*sizeof(int));
+  memset(sum, 0, EWALD_MAX_NSUMS*sizeof(vector));        // fj = -dE/dr =
+  for (; f<fn; f+=3) {                                    //      -i*qj*fac*
+    k = kvec;                                         //       Sum[conj(d)-d]
+    kx = ky = -1;                                        // d = k*conj(ekj)*ek
+    ke = kenergy;
+    cek = cek_global;
+    memset(sum, 0, EWALD_MAX_NSUMS*sizeof(vector));
+    if (func[3]) {
+      register double di = c[3];
+      mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0];
+      mu++;
+    }
+    for (nh = (h = hvec)+nkvec; h<nh; ++h, ++k) {
+      if (ky!=k->y) {                                   // based on order in
+        if (kx!=k->x) zx = z[kx = k->x].x;                 // reallocate
+        C_RMULT(zxy, z[ky = k->y].y, zx);
+      }
+      C_CRMULT(zc, z[k->z].z, zxy);
+      if (func[0]) {                                        // 1/r
+        register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re);
+        if (func[3]) cek_coul = cek;
+        ++cek;
+        sum[0][0] += h->x*im; sum[0][1] += h->y*im; sum[0][2] += h->z*im;
+      }
+      if (func[1]) {                                        // geometric 1/r^6
+        register double im = *(ke++)*(zc.im*cek->re+cek->im*zc.re); ++cek;
+        sum[1][0] += h->x*im; sum[1][1] += h->y*im; sum[1][2] += h->z*im;
+      }
+      if (func[2]) {                                        // arithmetic 1/r^6
+        register double im, c = *(ke++);
+        for (i=2; i<9; ++i) {
+          im = c*(zc.im*cek->re+cek->im*zc.re); ++cek;
+          sum[i][0] += h->x*im; sum[i][1] += h->y*im; sum[i][2] += h->z*im;
+        }
+      }
+      if (func[3]) {                                        // dipole
+        register double im = *(ke)*(zc.im*cek->re+
+            cek->im*zc.re)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z);
+        register double im2 = *(ke)*(zc.re*cek->re-
+            cek->im*zc.im);
+        sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im;
+        t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2;        // torque
+        t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2;
+        t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2;
+        if (func[0]) {                                      // charge-dipole
+          register double qi = *(q)*c[0];
+          im = - *(ke)*(zc.re*cek_coul->re -
+              cek_coul->im*zc.im)*(mui[0]*h->x+mui[1]*h->y+mui[2]*h->z);
+          im += *(ke)*(zc.re*cek->re - cek->im*zc.im)*qi;
+          sum[9][0] += h->x*im; sum[9][1] += h->y*im; sum[9][2] += h->z*im;
+
+          im2 =  *(ke)*(zc.re*cek_coul->im + cek_coul->re*zc.im);
+          im2 += -*(ke)*(zc.re*cek->im - cek->im*zc.re);
+          t[0] += -mui[1]*h->z*im2 + mui[2]*h->y*im2;        // torque
+          t[1] += -mui[2]*h->x*im2 + mui[0]*h->z*im2;
+          t[2] += -mui[0]*h->y*im2 + mui[1]*h->x*im2;
+        }
+        ++cek;
+        ke++;
+      }
+    }
+    if (func[0]) {                                        // 1/r
+      register double qi = *(q++)*c[0];
+      f[0] -= sum[0][0]*qi; f[1] -= sum[0][1]*qi; f[2] -= sum[0][2]*qi;
+    }
+    if (func[1]) {                                        // geometric 1/r^6
+      register double bi = B[*type]*c[1];
+      f[0] -= sum[1][0]*bi; f[1] -= sum[1][1]*bi; f[2] -= sum[1][2]*bi;
+    }
+    if (func[2]) {                                        // arithmetic 1/r^6
+      register double *bi = B+7*type[0]+7;
+      for (i=2; i<9; ++i) {
+        register double c2 = (--bi)[0]*c[2];
+        f[0] -= sum[i][0]*c2; f[1] -= sum[i][1]*c2; f[2] -= sum[i][2]*c2;
+      }
+    }
+    if (func[3]) {                                        // dipole
+      f[0] -= sum[9][0]; f[1] -= sum[9][1]; f[2] -= sum[9][2];
+    }
+    z = (cvector *) ((char *) z+lbytes);
+    ++type;
+    t += 3;
+  }
+}
+
+
+void EwaldDisp::compute_surface()
+{
+  // assume conducting metal (tinfoil) boundary conditions, so this function is
+  // not called because dielectric at the boundary --> infinity, which makes all
+  // the terms here zero.
+
+  if (!function[3]) return;
+  if (!atom->mu) return;
+
+  vector sum_local = VECTOR_NULL, sum_total;
+  memset(sum_local, 0, sizeof(vector));
+  double *i, *n, *mu = atom->mu[0];
+
+  for (n = (i = mu) + 4*atom->nlocal; i < n; ++i) {
+    sum_local[0] += (i++)[0];
+    sum_local[1] += (i++)[0];
+    sum_local[2] += (i++)[0];
+  }
+  MPI_Allreduce(sum_local, sum_total, 3, MPI_DOUBLE, MPI_SUM, world);
+
+  virial_self[3] =
+    mumurd2e*(2.0*MY_PI*vec_dot(sum_total,sum_total)/(2.0*dielectric+1)/volume);
+  energy_self[3] -= virial_self[3];
+
+  if (!(vflag_atom || eflag_atom)) return;
+
+  double *ei = energy_self_peratom[0]+3;
+  double *vi = virial_self_peratom[0]+3;
+  double cv = 2.0*mumurd2e*MY_PI/(2.0*dielectric+1)/volume;
+
+  for (i = mu; i < n; i += 4, ei += EWALD_NFUNCS, vi += EWALD_NFUNCS) {
+    *vi = cv*(i[0]*sum_total[0]+i[1]*sum_total[1]+i[2]*sum_total[2]);
+    *ei -= *vi;
+  }
+}
+
+
+void EwaldDisp::compute_energy()
+{
+  energy = 0.0;
+  if (!eflag_global) return;
+
+  complex *cek = cek_global;
+  complex *cek_coul;
+  double *ke = kenergy;
+  const double qscale = force->qqrd2e * scale;
+  double c[EWALD_NFUNCS] = {
+    4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
+    2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
+  double sum[EWALD_NFUNCS];
+  int func[EWALD_NFUNCS];
+
+  memcpy(func, function, EWALD_NFUNCS*sizeof(int));
+  memset(sum, 0, EWALD_NFUNCS*sizeof(double));                // reset sums
+  for (int k=0; k<nkvec; ++k) {                       // sum over k vectors
+    if (func[0]) {                                        // 1/r
+      sum[0] += *(ke++)*(cek->re*cek->re+cek->im*cek->im);
+      if (func[3]) cek_coul = cek;
+      ++cek; 
+    }
+    if (func[1]) {                                        // geometric 1/r^6
+      sum[1] += *(ke++)*(cek->re*cek->re+cek->im*cek->im); ++cek; }
+    if (func[2]) {                                        // arithmetic 1/r^6
+      register double r =
+            (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+
+            (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+
+            (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+
+        0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7;
+      sum[2] += *(ke++)*r;
+    }
+    if (func[3]) {                                        // dipole
+      sum[3] += *(ke)*(cek->re*cek->re+cek->im*cek->im);
+      if (func[0]) {                                      // charge-dipole
+        sum[3] += *(ke)*2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re);
+      }
+      ke++;
+      ++cek;
+    }
+  }
+  for (int k=0; k<EWALD_NFUNCS; ++k) energy += c[k]*sum[k]-energy_self[k];
+  if (slabflag) compute_slabcorr();
+}
+
+
+void EwaldDisp::compute_energy_peratom()
+{
+  if (!eflag_atom) return;
+
+  kvector *k;
+  hvector *h, *nh;
+  cvector *z = ekr_local;
+  vector  mui = VECTOR_NULL;
+  double sum[EWALD_MAX_NSUMS];
+  complex *cek, zc = COMPLEX_NULL, zx = COMPLEX_NULL, zxy = COMPLEX_NULL;
+  complex *cek_coul;
+  double *q = atom->q;
+  double *eatomj = eatom;
+  double *mu = atom->mu ? atom->mu[0] : NULL;
+  const double qscale = force->qqrd2e * scale;
+  double *ke = kenergy;
+  double c[EWALD_NFUNCS] = {
+      4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
+      2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
+  int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
+  int func[EWALD_NFUNCS];
+
+  memcpy(func, function, EWALD_NFUNCS*sizeof(int));
+  for (int j = 0; j < atom->nlocal; j++, ++eatomj) {
+    k = kvec;
+    kx = ky = -1;
+    ke = kenergy;
+    cek = cek_global;
+    memset(sum, 0, EWALD_MAX_NSUMS*sizeof(double));
+    if (func[3]) {
+      register double di = c[3];
+      mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0];
+      mu++;
+    }
+    for (nh = (h = hvec)+nkvec; h<nh; ++h, ++k) {
+      if (ky!=k->y) {                              // based on order in
+        if (kx!=k->x) zx = z[kx = k->x].x;                 // reallocate
+        C_RMULT(zxy, z[ky = k->y].y, zx);
+      }
+      C_CRMULT(zc, z[k->z].z, zxy);
+      if (func[0]) {                                        // 1/r
+        sum[0] += *(ke++)*(cek->re*zc.re - cek->im*zc.im);
+        if (func[3]) cek_coul = cek;
+        ++cek;
+      }
+      if (func[1]) {                                        // geometric 1/r^6
+        sum[1] += *(ke++)*(cek->re*zc.re - cek->im*zc.im); ++cek; }
+      if (func[2]) {                                        // arithmetic 1/r^6
+        register double im, c = *(ke++);
+        for (i=2; i<9; ++i) {
+          im = c*(cek->re*zc.re - cek->im*zc.im); ++cek;
+          sum[i] += im;
+        }
+      }
+      if (func[3]) {                                        // dipole
+        double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z);
+        sum[9] += *(ke)*(cek->re*zc.re - cek->im*zc.im)*muk;
+        if (func[0]) {                                      // charge-dipole
+          register double qj = *(q)*c[0];
+          sum[9] += *(ke)*(cek_coul->im*zc.re + cek_coul->re*zc.im)*muk;
+          sum[9] -= *(ke)*(cek->re*zc.im + cek->im*zc.re)*qj;
+        }
+        ++cek;
+        ke++;
+      }
+    }
+
+    if (func[0]) {                                        // 1/r
+      register double qj = *(q++)*c[0];
+      *eatomj += sum[0]*qj - energy_self_peratom[j][0];
+    }
+    if (func[1]) {                                        // geometric 1/r^6
+      register double bj = B[*type]*c[1];
+      *eatomj += sum[1]*bj - energy_self_peratom[j][1];
+    }
+    if (func[2]) {                                        // arithmetic 1/r^6
+      register double *bj = B+7*type[0]+7;
+      for (i=2; i<9; ++i) {
+        register double c2 = (--bj)[0]*c[2];
+        *eatomj += 0.5*sum[i]*c2;
+      }
+      *eatomj -= energy_self_peratom[j][2];
+    }
+    if (func[3]) {                                        // dipole
+      *eatomj += sum[9] - energy_self_peratom[j][3];
+    }
+    z = (cvector *) ((char *) z+lbytes);
+    ++type;
+  }
+}
+
+
+#define swap(a, b) { register double t = a; a= b; b = t; }
+
+void EwaldDisp::compute_virial()
+{
+  memset(virial, 0, sizeof(shape));
+  if (!vflag_global) return;
+
+  complex *cek = cek_global;
+  complex *cek_coul;
+  double *kv = kvirial;
+  const double qscale = force->qqrd2e * scale;
+  double c[EWALD_NFUNCS] = {
+    4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
+    2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
+  shape sum[EWALD_NFUNCS];
+  int func[EWALD_NFUNCS];
+
+  memcpy(func, function, EWALD_NFUNCS*sizeof(int));
+  memset(sum, 0, EWALD_NFUNCS*sizeof(shape));
+  for (int k=0; k<nkvec; ++k) {                      // sum over k vectors
+    if (func[0]) {                                         // 1/r
+      register double r = cek->re*cek->re+cek->im*cek->im;
+      if (func[3]) cek_coul = cek;
+      ++cek;
+      sum[0][0] += *(kv++)*r; sum[0][1] += *(kv++)*r; sum[0][2] += *(kv++)*r;
+      sum[0][3] += *(kv++)*r; sum[0][4] += *(kv++)*r; sum[0][5] += *(kv++)*r;
+    }
+    if (func[1]) {                                        // geometric 1/r^6
+      register double r = cek->re*cek->re+cek->im*cek->im; ++cek;
+      sum[1][0] += *(kv++)*r; sum[1][1] += *(kv++)*r; sum[1][2] += *(kv++)*r;
+      sum[1][3] += *(kv++)*r; sum[1][4] += *(kv++)*r; sum[1][5] += *(kv++)*r;
+    }
+    if (func[2]) {                                        // arithmetic 1/r^6
+      register double r =
+            (cek[0].re*cek[6].re+cek[0].im*cek[6].im)+
+            (cek[1].re*cek[5].re+cek[1].im*cek[5].im)+
+            (cek[2].re*cek[4].re+cek[2].im*cek[4].im)+
+        0.5*(cek[3].re*cek[3].re+cek[3].im*cek[3].im); cek += 7;
+      sum[2][0] += *(kv++)*r; sum[2][1] += *(kv++)*r; sum[2][2] += *(kv++)*r;
+      sum[2][3] += *(kv++)*r; sum[2][4] += *(kv++)*r; sum[2][5] += *(kv++)*r;
+    }
+    if (func[3]) {
+      register double r = cek->re*cek->re+cek->im*cek->im;
+      sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r;
+      sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r;
+      if (func[0]) {                                      // charge-dipole
+        kv -= 6;
+        register double r = 2.0*(cek->re*cek_coul->im - cek->im*cek_coul->re);
+        sum[3][0] += *(kv++)*r; sum[3][1] += *(kv++)*r; sum[3][2] += *(kv++)*r;
+        sum[3][3] += *(kv++)*r; sum[3][4] += *(kv++)*r; sum[3][5] += *(kv++)*r;
+      }
+      ++cek;
+    }
+  }
+  for (int k=0; k<EWALD_NFUNCS; ++k)
+    if (func[k]) {
+      shape self = {virial_self[k], virial_self[k], virial_self[k], 0, 0, 0};
+      shape_scalar_mult(sum[k], c[k]);
+      shape_add(virial, sum[k]);
+      shape_subtr(virial, self);
+    }
+}
+
+
+void EwaldDisp::compute_virial_dipole()
+{
+  if (!function[3]) return;
+  if (!vflag_atom && !vflag_global) return;
+  double test = 0.0;
+  kvector *k;
+  hvector *h, *nh;
+  cvector *z = ekr_local;
+  vector mui = COMPLEX_NULL;
+  double sum[6];
+  double sum_total[6];
+  complex *cek, zc, zx = COMPLEX_NULL, zxy = COMPLEX_NULL;
+  complex *cek_coul;
+  double *mu = atom->mu ? atom->mu[0] : NULL;
+  double *vatomj = NULL;
+  if (vflag_atom && vatom) vatomj = vatom[0];
+  const double qscale = force->qqrd2e * scale;
+  double *ke, c[EWALD_NFUNCS] = {
+    8.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(12.0*volume),
+    2.0*MY_PI*MY_PIS/(192.0*volume), 8.0*MY_PI*mumurd2e/volume};
+  double kt = 4.0*cube(g_ewald)/3.0/MY_PIS/c[3];
+  int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
+  int func[EWALD_NFUNCS];
+
+  memcpy(func, function, EWALD_NFUNCS*sizeof(int));
+  memset(&sum[0], 0, 6*sizeof(double));
+  memset(&sum_total[0], 0, 6*sizeof(double));
+  for (int j = 0; j < atom->nlocal; j++) {
+    k = kvec;
+    kx = ky = -1;
+    ke = kenergy;
+    cek = cek_global;
+    memset(&sum[0], 0, 6*sizeof(double));
+    if (func[3]) {
+      register double di = c[3];
+      mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0];
+      mu++;
+    }
+    for (nh = (h = hvec)+nkvec; h<nh; ++h, ++k) {
+      if (ky!=k->y) {                                   // based on order in
+        if (kx!=k->x) zx = z[kx = k->x].x;                 // reallocate
+        C_RMULT(zxy, z[ky = k->y].y, zx);
+      }
+      C_CRMULT(zc, z[k->z].z, zxy);
+      double im = 0.0;
+      if (func[0]) {                                        // 1/r
+        ke++;
+        if (func[3]) cek_coul = cek;
+        ++cek;
+      }
+      if (func[1]) {                                        // geometric 1/r^6
+        ke++; 
+        ++cek;
+      }
+      if (func[2]) {                                        // arithmetic 1/r^6
+        ke++;
+        for (i=2; i<9; ++i) {
+          ++cek;
+        }
+      }
+      if (func[3]) {                                        // dipole
+        im = *(ke)*(zc.re*cek->re - cek->im*zc.im);
+        if (func[0]) {                                      // charge-dipole
+          im += *(ke)*(zc.im*cek_coul->re + cek_coul->im*zc.re);
+        }
+        sum[0] -= mui[0]*h->x*im;
+        sum[1] -= mui[1]*h->y*im;
+        sum[2] -= mui[2]*h->z*im;
+        sum[3] -= mui[0]*h->y*im;
+        sum[4] -= mui[0]*h->z*im;
+        sum[5] -= mui[1]*h->z*im;
+        ++cek;
+        ke++;
+      }
+    }
+
+    if (vflag_global)
+      for (int n = 0; n < 6; n++)
+        sum_total[n] -= sum[n];
+
+    if (vflag_atom)
+      for (int n = 0; n < 6; n++)
+        vatomj[n] -= sum[n];
+
+    z = (cvector *) ((char *) z+lbytes);
+    ++type;
+    if (vflag_atom) vatomj += 6;
+  }
+
+  if (vflag_global) {
+    MPI_Allreduce(&sum_total[0],&sum[0],6,MPI_DOUBLE,MPI_SUM,world);
+    for (int n = 0; n < 6; n++)
+      virial[n] += sum[n];
+  }
+
+}
+
+void EwaldDisp::compute_virial_peratom()
+{
+  if (!vflag_atom) return;
+
+  kvector *k;
+  hvector *h, *nh;
+  cvector *z = ekr_local;
+  vector  mui = VECTOR_NULL;
+  complex *cek, zc = COMPLEX_NULL, zx = COMPLEX_NULL, zxy = COMPLEX_NULL;
+  complex *cek_coul;
+  double *kv;
+  double *q = atom->q;
+  double *vatomj = vatom ? vatom[0] : NULL;
+  double *mu = atom->mu ? atom->mu[0] : NULL;
+  const double qscale = force->qqrd2e * scale;
+  double c[EWALD_NFUNCS] = {
+    4.0*MY_PI*qscale/volume, 2.0*MY_PI*MY_PIS/(24.0*volume),
+    2.0*MY_PI*MY_PIS/(192.0*volume), 4.0*MY_PI*mumurd2e/volume};
+  shape sum[EWALD_MAX_NSUMS];
+  int func[EWALD_NFUNCS];
+
+  memcpy(func, function, EWALD_NFUNCS*sizeof(int));
+  int i, kx, ky, lbytes = (2*nbox+1)*sizeof(cvector), *type = atom->type;
+  for (int j = 0; j < atom->nlocal; j++) {
+    k = kvec;
+    kx = ky = -1;
+    kv = kvirial;
+    cek = cek_global;
+    memset(sum, 0, EWALD_MAX_NSUMS*sizeof(shape));
+    if (func[3]) {
+      register double di = c[3];
+      mui[0] = di*(mu++)[0]; mui[1] = di*(mu++)[0]; mui[2] = di*(mu++)[0];
+      mu++;
+    }
+    for (nh = (h = hvec)+nkvec; h<nh; ++h, ++k) {
+      if (ky!=k->y) {                                // based on order in
+          if (kx!=k->x) zx = z[kx = k->x].x;                 // reallocate
+          C_RMULT(zxy, z[ky = k->y].y, zx);
+      }
+      C_CRMULT(zc, z[k->z].z, zxy);
+      if (func[0]) {                                        // 1/r
+          if (func[3]) cek_coul = cek;
+          register double r = cek->re*zc.re - cek->im*zc.im; ++cek;
+          sum[0][0] += *(kv++)*r;
+          sum[0][1] += *(kv++)*r;
+          sum[0][2] += *(kv++)*r;
+          sum[0][3] += *(kv++)*r;
+          sum[0][4] += *(kv++)*r;
+          sum[0][5] += *(kv++)*r;
+      }
+      if (func[1]) {                                        // geometric 1/r^6
+          register double r = cek->re*zc.re - cek->im*zc.im; ++cek;
+          sum[1][0] += *(kv++)*r;
+          sum[1][1] += *(kv++)*r;
+          sum[1][2] += *(kv++)*r;
+          sum[1][3] += *(kv++)*r;
+          sum[1][4] += *(kv++)*r;
+          sum[1][5] += *(kv++)*r;
+      }
+      if (func[2]) {                                        // arithmetic 1/r^6
+        register double r;
+        for (i=2; i<9; ++i) {
+          r = cek->re*zc.re - cek->im*zc.im; ++cek;
+          sum[i][0] += *(kv++)*r;
+          sum[i][1] += *(kv++)*r;
+          sum[i][2] += *(kv++)*r;
+          sum[i][3] += *(kv++)*r;
+          sum[i][4] += *(kv++)*r;
+          sum[i][5] += *(kv++)*r;
+      kv -= 6;
+        }
+    kv += 6;
+      }
+      if (func[3]) {                                        // dipole
+         double muk = (mui[0]*h->x+mui[1]*h->y+mui[2]*h->z);
+         register double
+           r = (cek->re*zc.re - cek->im*zc.im)*muk;
+         sum[9][0] += *(kv++)*r;
+         sum[9][1] += *(kv++)*r;
+         sum[9][2] += *(kv++)*r;
+         sum[9][3] += *(kv++)*r;
+         sum[9][4] += *(kv++)*r;
+         sum[9][5] += *(kv++)*r;
+         if (func[0]) {                                      // charge-dipole
+           kv -= 6;
+           register double qj = *(q)*c[0];
+           r = (cek_coul->im*zc.re + cek_coul->re*zc.im)*muk;
+           r += -(cek->re*zc.im + cek->im*zc.re)*qj;
+           sum[9][0] += *(kv++)*r; sum[9][1] += *(kv++)*r; sum[9][2] += *(kv++)*r;
+           sum[9][3] += *(kv++)*r; sum[9][4] += *(kv++)*r; sum[9][5] += *(kv++)*r;
+         }
+         ++cek;
+      }
+    }
+
+    if (func[0]) {                                        // 1/r
+      register double qi = *(q++)*c[0];
+      for (int n = 0; n < 6; n++) vatomj[n] += sum[0][n]*qi;
+    }
+    if (func[1]) {                                        // geometric 1/r^6
+      register double bi = B[*type]*c[1];
+      for (int n = 0; n < 6; n++) vatomj[n] += sum[1][n]*bi;
+    }
+    if (func[2]) {                                        // arithmetic 1/r^6
+      register double *bj = B+7*type[0]+7;
+      for (i=2; i<9; ++i) {
+        register double c2 = (--bj)[0]*c[2];
+        for (int n = 0; n < 6; n++) vatomj[n] += 0.5*sum[i][n]*c2;
+      }
+    }
+    if (func[3]) {                                        // dipole
+      for (int n = 0; n < 6; n++) vatomj[n] += sum[9][n];
+    }
+
+    for (int k=0; k<EWALD_NFUNCS; ++k) {
+      if (func[k]) {
+        for (int n = 0; n < 3; n++) vatomj[n] -= virial_self_peratom[j][k];
+      }
+    }
+
+    z = (cvector *) ((char *) z+lbytes);
+    ++type;
+    vatomj += 6;
+  }
+}
+
+
+/* ----------------------------------------------------------------------
+   Slab-geometry correction term to dampen inter-slab interactions between
+   periodically repeating slabs.  Yields good approximation to 2D Ewald if
+   adequate empty space is left between repeating slabs (J. Chem. Phys.
+   111, 3155).  Slabs defined here to be parallel to the xy plane. Also
+   extended to non-neutral systems (J. Chem. Phys. 131, 094107).
+------------------------------------------------------------------------- */
+
+void EwaldDisp::compute_slabcorr()
+{
+  // compute local contribution to global dipole moment
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double zprd = domain->zprd;
+  int nlocal = atom->nlocal;
+
+  double qsum = 0.0;
+  if (function[0]) qsum = sum[0].x;
+
+  double dipole = 0.0;
+  for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
+
+  if (function[3] && atom->mu) {
+    double **mu = atom->mu;
+    for (int i = 0; i < nlocal; i++) dipole += mu[i][2];
+  }
+
+  // sum local contributions to get global dipole moment
+
+  double dipole_all;
+  MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
+
+  // need to make non-neutral systems and/or
+  //  per-atom energy translationally invariant
+
+  double dipole_r2 = 0.0;
+  if (eflag_atom || fabs(qsum) > SMALL) {
+
+    if (function[3] && atom->mu)
+      error->all(FLERR,"Cannot (yet) use kspace slab correction with "
+        "long-range dipoles and non-neutral systems or per-atom energy");
+
+    for (int i = 0; i < nlocal; i++)
+      dipole_r2 += q[i]*x[i][2]*x[i][2];
+
+    // sum local contributions
+
+    double tmp;
+    MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+    dipole_r2 = tmp;
+  }
+
+  // compute corrections
+
+  const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
+    qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
+  const double qscale = force->qqrd2e * scale;
+
+  if (eflag_global) energy += qscale * e_slabcorr;
+
+  // per-atom energy
+
+  if (eflag_atom) {
+    double efact = qscale * MY_2PI/volume;
+    for (int i = 0; i < nlocal; i++)
+      eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
+        qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
+  }
+
+  // add on force corrections
+
+  double ffact = qscale * (-4.0*MY_PI/volume);
+  double **f = atom->f;
+
+  for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
+
+  // add on torque corrections
+
+  if (function[3] && atom->mu && atom->torque) {
+    double **mu = atom->mu;
+    double **torque = atom->torque;
+    for (int i = 0; i < nlocal; i++) {
+      torque[i][0] += ffact * dipole_all * mu[i][1];
+      torque[i][1] += -ffact * dipole_all * mu[i][0];
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+  Newton solver used to find g_ewald for LJ systems
+ ------------------------------------------------------------------------- */
+
+double EwaldDisp::NewtonSolve(double x, double Rc, 
+                              bigint natoms, double vol, double b2)
+{
+  double dx,tol;
+  int maxit;
+
+  maxit = 10000; //Maximum number of iterations
+  tol = 0.00001; //Convergence tolerance
+
+  //Begin algorithm
+
+  for (int i = 0; i < maxit; i++) {
+    dx = f(x,Rc,natoms,vol,b2) / derivf(x,Rc,natoms,vol,b2);
+    x = x - dx; //Update x
+    if (fabs(dx) < tol) return x;
+    if (x < 0 || x != x) // solver failed
+      return -1;
+  }
+  return -1;
+}
+
+/* ----------------------------------------------------------------------
+ Calculate f(x)
+ ------------------------------------------------------------------------- */
+
+double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2)
+{
+  double a = Rc*x;
+  double f = 0.0;
+
+  if (function[1] || function[2]) { // LJ
+    f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) *
+      (6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy);
+  } else { // dipole
+    double rg2 = a*a;
+    double rg4 = rg2*rg2;
+    double rg6 = rg4*rg2;
+    double Cc = 4.0*rg4 + 6.0*rg2 + 3.0;
+    double Dc = 8.0*rg6 + 20.0*rg4 + 30.0*rg2 + 15.0;
+    f = (b2/(sqrt(vol*powint(x,4)*powint(Rc,9)*natoms)) *
+      sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) *
+      exp(-rg2)) - accuracy;
+    }
+
+  return f;
+}
+
+/* ----------------------------------------------------------------------
+ Calculate numerical derivative f'(x)
+ ------------------------------------------------------------------------- */
+
+double EwaldDisp::derivf(double x, double Rc, 
+                         bigint natoms, double vol, double b2)
+{
+  double h = 0.000001;  //Derivative step-size
+  return (f(x + h,Rc,natoms,vol,b2) - f(x,Rc,natoms,vol,b2)) / h;
+}
diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp
index 9c5db42ad8..c3b54559a0 100644
--- a/src/KSPACE/pppm.cpp
+++ b/src/KSPACE/pppm.cpp
@@ -1,3501 +1,3501 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
-     per-atom energy/virial & group/group energy/force added by Stan Moore (BYU)
-     analytic diff (2 FFT) option added by Rolf Isele-Holder (Aachen University)
-     triclinic added by Stan Moore (SNL)
-------------------------------------------------------------------------- */
-
-#include "lmptype.h"
-#include "mpi.h"
-#include "string.h"
-#include "stdio.h"
-#include "stdlib.h"
-#include "math.h"
-#include "pppm.h"
-#include "atom.h"
-#include "comm.h"
-#include "commgrid.h"
-#include "neighbor.h"
-#include "force.h"
-#include "pair.h"
-#include "bond.h"
-#include "angle.h"
-#include "domain.h"
-#include "fft3d_wrap.h"
-#include "remap_wrap.h"
-#include "memory.h"
-#include "error.h"
-
-#include "math_const.h"
-#include "math_special.h"
-
-using namespace LAMMPS_NS;
-using namespace MathConst;
-using namespace MathSpecial;
-
-#define MAXORDER 7
-#define OFFSET 16384
-#define SMALL 0.00001
-#define LARGE 10000.0
-#define EPS_HOC 1.0e-7
-
-enum{REVERSE_RHO};
-enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
-
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
-{
-  if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command");
- 
-  pppmflag = 1;
-  group_group_enable = 1;
-
-  accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
-
-  nfactors = 3;
-  factors = new int[nfactors];
-  factors[0] = 2;
-  factors[1] = 3;
-  factors[2] = 5;
-
-  MPI_Comm_rank(world,&me);
-  MPI_Comm_size(world,&nprocs);
-
-  density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
-  density_fft = NULL;
-  u_brick = NULL;
-  v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
-  greensfn = NULL;
-  work1 = work2 = NULL;
-  vg = NULL;
-  fkx = fky = fkz = NULL;
-
-  sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = 
-    sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL;
-
-  density_A_brick = density_B_brick = NULL;
-  density_A_fft = density_B_fft = NULL;
-
-  gf_b = NULL;
-  rho1d = rho_coeff = drho1d = drho_coeff = NULL;
-
-  fft1 = fft2 = NULL;
-  remap = NULL;
-  cg = NULL;
-  cg_peratom = NULL;
-
-  nmax = 0;
-  part2grid = NULL;
-
-  peratom_allocate_flag = 0;
-  group_allocate_flag = 0;
-
-  // define acons coefficients for estimation of kspace errors
-  // see JCP 109, pg 7698 for derivation of coefficients
-  // higher order coefficients may be computed if needed
-
-  memory->create(acons,8,7,"pppm:acons");
-  acons[1][0] = 2.0 / 3.0;
-  acons[2][0] = 1.0 / 50.0;
-  acons[2][1] = 5.0 / 294.0;
-  acons[3][0] = 1.0 / 588.0;
-  acons[3][1] = 7.0 / 1440.0;
-  acons[3][2] = 21.0 / 3872.0;
-  acons[4][0] = 1.0 / 4320.0;
-  acons[4][1] = 3.0 / 1936.0;
-  acons[4][2] = 7601.0 / 2271360.0;
-  acons[4][3] = 143.0 / 28800.0;
-  acons[5][0] = 1.0 / 23232.0;
-  acons[5][1] = 7601.0 / 13628160.0;
-  acons[5][2] = 143.0 / 69120.0;
-  acons[5][3] = 517231.0 / 106536960.0;
-  acons[5][4] = 106640677.0 / 11737571328.0;
-  acons[6][0] = 691.0 / 68140800.0;
-  acons[6][1] = 13.0 / 57600.0;
-  acons[6][2] = 47021.0 / 35512320.0;
-  acons[6][3] = 9694607.0 / 2095994880.0;
-  acons[6][4] = 733191589.0 / 59609088000.0;
-  acons[6][5] = 326190917.0 / 11700633600.0;
-  acons[7][0] = 1.0 / 345600.0;
-  acons[7][1] = 3617.0 / 35512320.0;
-  acons[7][2] = 745739.0 / 838397952.0;
-  acons[7][3] = 56399353.0 / 12773376000.0;
-  acons[7][4] = 25091609.0 / 1560084480.0;
-  acons[7][5] = 1755948832039.0 / 36229939200000.0;
-  acons[7][6] = 4887769399.0 / 37838389248.0;
-}
-
-/* ----------------------------------------------------------------------
-   free all memory
-------------------------------------------------------------------------- */
-
-PPPM::~PPPM()
-{
-  delete [] factors;
-  deallocate();
-  if (peratom_allocate_flag) deallocate_peratom();
-  if (group_allocate_flag) deallocate_groups();
-  memory->destroy(part2grid);
-  memory->destroy(acons);
-}
-
-/* ----------------------------------------------------------------------
-   called once before run
-------------------------------------------------------------------------- */
-
-void PPPM::init()
-{
-  if (me == 0) {
-    if (screen) fprintf(screen,"PPPM initialization ...\n");
-    if (logfile) fprintf(logfile,"PPPM initialization ...\n");
-  }
-
-  // error check
-
-  triclinic_check();
-  if (domain->triclinic && differentiation_flag == 1)
-    error->all(FLERR,"Cannot (yet) use PPPM with triclinic box "
-               "and kspace_modify diff ad");
-  if (domain->triclinic && slabflag)
-    error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and "
-               "slab correction");
-  if (domain->dimension == 2) error->all(FLERR,
-                                         "Cannot use PPPM with 2d simulation");
-
-  if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
-
-  if (slabflag == 0 && domain->nonperiodic > 0)
-    error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM");
-  if (slabflag) {
-    if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
-        domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
-      error->all(FLERR,"Incorrect boundaries with slab PPPM");
-  }
-
-  if (order < 2 || order > MAXORDER) {
-    char str[128];
-    sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER);
-    error->all(FLERR,str);
-  }
-
-  // extract short-range Coulombic cutoff from pair style
-
-  triclinic = domain->triclinic;
-  scale = 1.0;
-
-  pair_check();
-
-  int itmp = 0;
-  double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
-  if (p_cutoff == NULL)
-    error->all(FLERR,"KSpace style is incompatible with Pair style");
-  cutoff = *p_cutoff;
-
-  // if kspace is TIP4P, extract TIP4P params from pair style
-  // bond/angle are not yet init(), so insure equilibrium request is valid
-
-  qdist = 0.0;
-
-  if (tip4pflag) {
-    double *p_qdist = (double *) force->pair->extract("qdist",itmp);
-    int *p_typeO = (int *) force->pair->extract("typeO",itmp);
-    int *p_typeH = (int *) force->pair->extract("typeH",itmp);
-    int *p_typeA = (int *) force->pair->extract("typeA",itmp);
-    int *p_typeB = (int *) force->pair->extract("typeB",itmp);
-    if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
-      error->all(FLERR,"KSpace style is incompatible with Pair style");
-    qdist = *p_qdist;
-    typeO = *p_typeO;
-    typeH = *p_typeH;
-    int typeA = *p_typeA;
-    int typeB = *p_typeB;
-
-    if (force->angle == NULL || force->bond == NULL ||
-        force->angle->setflag == NULL || force->bond->setflag == NULL)
-      error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
-    if (typeA < 1 || typeA > atom->nangletypes ||
-        force->angle->setflag[typeA] == 0)
-      error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P");
-    if (typeB < 1 || typeB > atom->nbondtypes ||
-        force->bond->setflag[typeB] == 0)
-      error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P");
-    double theta = force->angle->equilibrium_angle(typeA);
-    double blen = force->bond->equilibrium_distance(typeB);
-    alpha = qdist / (cos(0.5*theta) * blen);
-    if (domain->triclinic)
-      error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and TIP4P");
-  }
-
-  // compute qsum & qsqsum and warn if not charge-neutral
-
-  qsum = qsqsum = 0.0;
-  for (int i = 0; i < atom->nlocal; i++) {
-    qsum += atom->q[i];
-    qsqsum += atom->q[i]*atom->q[i];
-  }
-
-  double tmp;
-  MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  qsum = tmp;
-  MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  qsqsum = tmp;
-  q2 = qsqsum * force->qqrd2e;
-
-  if (qsqsum == 0.0)
-    error->all(FLERR,"Cannot use kspace solver on system with no charge");
-  if (fabs(qsum) > SMALL && me == 0) {
-    char str[128];
-    sprintf(str,"System is not charge neutral, net charge = %g",qsum);
-    error->warning(FLERR,str);
-  }
-
-  // set accuracy (force units) from accuracy_relative or accuracy_absolute
-
-  if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
-  else accuracy = accuracy_relative * two_charge_force;
-
-  // free all arrays previously allocated
-
-  deallocate();
-  if (peratom_allocate_flag) deallocate_peratom();
-  if (group_allocate_flag) deallocate_groups();
-
-  // setup FFT grid resolution and g_ewald
-  // normally one iteration thru while loop is all that is required
-  // if grid stencil does not extend beyond neighbor proc
-  //   or overlap is allowed, then done
-  // else reduce order and try again
-
-  int (*procneigh)[2] = comm->procneigh;
-
-  CommGrid *cgtmp = NULL;
-  int iteration = 0;
-
-  while (order >= minorder) {
-    if (iteration && me == 0)
-      error->warning(FLERR,"Reducing PPPM order b/c stencil extends "
-                     "beyond nearest neighbor processor");
-
-    if (stagger_flag && !differentiation_flag) compute_gf_denom();
-    set_grid_global();
-    set_grid_local();
-    if (overlap_allowed) break;
-
-    cgtmp = new CommGrid(lmp,world,1,1,
-                         nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                         nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
-                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                         procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-    cgtmp->ghost_notify();
-    if (!cgtmp->ghost_overlap()) break;
-    delete cgtmp;
-
-    order--;
-    iteration++;
-  }
-  
-  if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order");
-  if (!overlap_allowed && cgtmp->ghost_overlap())
-    error->all(FLERR,"PPPM grid stencil extends "
-               "beyond nearest neighbor processor");
-  if (cgtmp) delete cgtmp;
-
-  // adjust g_ewald
-
-  if (!gewaldflag) adjust_gewald();
-
-  // calculate the final accuracy
-
-  double estimated_accuracy = final_accuracy();
-
-  // print stats
-
-  int ngrid_max,nfft_both_max;
-  MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
-  MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
-
-  if (me == 0) {
-
-#ifdef FFT_SINGLE
-    const char fft_prec[] = "single";
-#else
-    const char fft_prec[] = "double";
-#endif
-
-    if (screen) {
-      fprintf(screen,"  G vector (1/distance) = %g\n",g_ewald);
-      fprintf(screen,"  grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
-      fprintf(screen,"  stencil order = %d\n",order);
-      fprintf(screen,"  estimated absolute RMS force accuracy = %g\n",
-              estimated_accuracy);
-      fprintf(screen,"  estimated relative force accuracy = %g\n",
-              estimated_accuracy/two_charge_force);
-      fprintf(screen,"  using %s precision FFTs\n",fft_prec);
-      fprintf(screen,"  3d grid and FFT values/proc = %d %d\n",
-              ngrid_max,nfft_both_max);
-    }
-    if (logfile) {
-      fprintf(logfile,"  G vector (1/distance) = %g\n",g_ewald);
-      fprintf(logfile,"  grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
-      fprintf(logfile,"  stencil order = %d\n",order);
-      fprintf(logfile,"  estimated absolute RMS force accuracy = %g\n",
-              estimated_accuracy);
-      fprintf(logfile,"  estimated relative force accuracy = %g\n",
-              estimated_accuracy/two_charge_force);
-      fprintf(logfile,"  using %s precision FFTs\n",fft_prec);
-      fprintf(logfile,"  3d grid and FFT values/proc = %d %d\n",
-              ngrid_max,nfft_both_max);
-    }
-  }
-
-  // allocate K-space dependent memory
-  // don't invoke allocate peratom() or group(), will be allocated when needed
-
-  allocate();
-  cg->ghost_notify();
-  cg->setup();
-
-  // pre-compute Green's function denomiator expansion
-  // pre-compute 1d charge distribution coefficients
-
-  compute_gf_denom();
-  if (differentiation_flag == 1) compute_sf_precoeff();
-  compute_rho_coeff();
-}
-
-/* ----------------------------------------------------------------------
-   adjust PPPM coeffs, called initially and whenever volume has changed
-------------------------------------------------------------------------- */
-
-void PPPM::setup()
-{
-  if (triclinic) {
-    setup_triclinic();
-    return;
-  }
-
-  int i,j,k,n;
-  double *prd;
-
-  // volume-dependent factors
-  // adjust z dimension for 2d slab PPPM
-  // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-  volume = xprd * yprd * zprd_slab;
-
-  delxinv = nx_pppm/xprd;
-  delyinv = ny_pppm/yprd;
-  delzinv = nz_pppm/zprd_slab;
-
-  delvolinv = delxinv*delyinv*delzinv;
-
-  double unitkx = (MY_2PI/xprd);
-  double unitky = (MY_2PI/yprd);
-  double unitkz = (MY_2PI/zprd_slab);
-
-  // fkx,fky,fkz for my FFT grid pts
-
-  double per;
-
-  for (i = nxlo_fft; i <= nxhi_fft; i++) {
-    per = i - nx_pppm*(2*i/nx_pppm);
-    fkx[i] = unitkx*per;
-  }
-
-  for (i = nylo_fft; i <= nyhi_fft; i++) {
-    per = i - ny_pppm*(2*i/ny_pppm);
-    fky[i] = unitky*per;
-  }
-
-  for (i = nzlo_fft; i <= nzhi_fft; i++) {
-    per = i - nz_pppm*(2*i/nz_pppm);
-    fkz[i] = unitkz*per;
-  }
-
-  // virial coefficients
-
-  double sqk,vterm;
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++) {
-    for (j = nylo_fft; j <= nyhi_fft; j++) {
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
-        if (sqk == 0.0) {
-          vg[n][0] = 0.0;
-          vg[n][1] = 0.0;
-          vg[n][2] = 0.0;
-          vg[n][3] = 0.0;
-          vg[n][4] = 0.0;
-          vg[n][5] = 0.0;
-        } else {
-          vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
-          vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
-          vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
-          vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
-          vg[n][3] = vterm*fkx[i]*fky[j];
-          vg[n][4] = vterm*fkx[i]*fkz[k];
-          vg[n][5] = vterm*fky[j]*fkz[k];
-        }
-        n++;
-      }
-    }
-  }
-
-  if (differentiation_flag == 1) compute_gf_ad();
-  else compute_gf_ik();
-}
-
-/* ----------------------------------------------------------------------
-   adjust PPPM coeffs, called initially and whenever volume has changed
-   for a triclinic system
-------------------------------------------------------------------------- */
-
-void PPPM::setup_triclinic()
-{
-  int i,j,k,n;
-  double *prd;
-
-  // volume-dependent factors
-  // adjust z dimension for 2d slab PPPM
-  // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
-
-  prd = domain->prd;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-  volume = xprd * yprd * zprd_slab;
-
-  // use lamda (0-1) coordinates
-
-  delxinv = nx_pppm;
-  delyinv = ny_pppm;
-  delzinv = nz_pppm;
-  delvolinv = delxinv*delyinv*delzinv/volume;
-
-  // fkx,fky,fkz for my FFT grid pts
-
-  double per_i,per_j,per_k;
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++) {
-    per_k = k - nz_pppm*(2*k/nz_pppm);
-    for (j = nylo_fft; j <= nyhi_fft; j++) {
-      per_j = j - ny_pppm*(2*j/ny_pppm);
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        per_i = i - nx_pppm*(2*i/nx_pppm);
-
-        double unitk_lamda[3];
-        unitk_lamda[0] = 2.0*MY_PI*per_i;
-        unitk_lamda[1] = 2.0*MY_PI*per_j;
-        unitk_lamda[2] = 2.0*MY_PI*per_k;
-        x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
-        fkx[n] = unitk_lamda[0];
-        fky[n] = unitk_lamda[1];
-        fkz[n] = unitk_lamda[2];
-        n++;
-      }
-    }
-  }
-
-  // virial coefficients
-
-  double sqk,vterm;
-
-  for (n = 0; n < nfft; n++) {
-    sqk = fkx[n]*fkx[n] + fky[n]*fky[n] + fkz[n]*fkz[n];
-    if (sqk == 0.0) {
-      vg[n][0] = 0.0;
-      vg[n][1] = 0.0;
-      vg[n][2] = 0.0;
-      vg[n][3] = 0.0;
-      vg[n][4] = 0.0;
-      vg[n][5] = 0.0;
-    } else {
-      vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
-      vg[n][0] = 1.0 + vterm*fkx[n]*fkx[n];
-      vg[n][1] = 1.0 + vterm*fky[n]*fky[n];
-      vg[n][2] = 1.0 + vterm*fkz[n]*fkz[n];
-      vg[n][3] = vterm*fkx[n]*fky[n];
-      vg[n][4] = vterm*fkx[n]*fkz[n];
-      vg[n][5] = vterm*fky[n]*fkz[n];
-    }
-  }
-
-  compute_gf_ik_triclinic();
-}
-
-/* ----------------------------------------------------------------------
-   reset local grid arrays and communication stencils
-   called by fix balance b/c it changed sizes of processor sub-domains
-------------------------------------------------------------------------- */
-
-void PPPM::setup_grid()
-{
-  // free all arrays previously allocated
-
-  deallocate();
-  if (peratom_allocate_flag) deallocate_peratom();
-  if (group_allocate_flag) deallocate_groups();
-
-  // reset portion of global grid that each proc owns
-
-  set_grid_local();
-
-  // reallocate K-space dependent memory
-  // check if grid communication is now overlapping if not allowed
-  // don't invoke allocate peratom() or group(), will be allocated when needed
-
-  allocate();
-
-  cg->ghost_notify();
-  if (overlap_allowed == 0 && cg->ghost_overlap())
-    error->all(FLERR,"PPPM grid stencil extends "
-               "beyond nearest neighbor processor");
-  cg->setup();
-
-  // pre-compute Green's function denomiator expansion
-  // pre-compute 1d charge distribution coefficients
-
-  compute_gf_denom();
-  if (differentiation_flag == 1) compute_sf_precoeff();
-  compute_rho_coeff();
-
-  // pre-compute volume-dependent coeffs
-
-  setup();
-}
-
-/* ----------------------------------------------------------------------
-   compute the PPPM long-range force, energy, virial
-------------------------------------------------------------------------- */
-
-void PPPM::compute(int eflag, int vflag)
-{
-  int i,j;
-
-  // set energy/virial flags
-  // invoke allocate_peratom() if needed for first time
-
-  if (eflag || vflag) ev_setup(eflag,vflag);
-  else evflag = evflag_atom = eflag_global = vflag_global =
-         eflag_atom = vflag_atom = 0;
-
-  if (evflag_atom && !peratom_allocate_flag) {
-    allocate_peratom();
-    cg_peratom->ghost_notify();
-    cg_peratom->setup();
-  }
-
-  // convert atoms from box to lamda coords
-
-  if (triclinic == 0) boxlo = domain->boxlo;
-  else {
-    boxlo = domain->boxlo_lamda;
-    domain->x2lamda(atom->nlocal);
-  }
-
-  // extend size of per-atom arrays if necessary
-
-  if (atom->nlocal > nmax) {
-    memory->destroy(part2grid);
-    nmax = atom->nmax;
-    memory->create(part2grid,nmax,3,"pppm:part2grid");
-  }
-
-  // find grid points for all my particles
-  // map my particle charge onto my local 3d density grid
-
-  particle_map();
-  make_rho();
-
-  // all procs communicate density values from their ghost cells
-  //   to fully sum contribution in their 3d bricks
-  // remap from 3d decomposition to FFT decomposition
-
-  cg->reverse_comm(this,REVERSE_RHO);
-  brick2fft();
-
-  // compute potential gradient on my FFT grid and
-  //   portion of e_long on this proc's FFT grid
-  // return gradients (electric fields) in 3d brick decomposition
-  // also performs per-atom calculations via poisson_peratom()
-
-  poisson();
-
-  // all procs communicate E-field values
-  // to fill ghost cells surrounding their 3d bricks
-
-  if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
-  else cg->forward_comm(this,FORWARD_IK);
-
-  // extra per-atom energy/virial communication
-
-  if (evflag_atom) {
-    if (differentiation_flag == 1 && vflag_atom) 
-      cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
-    else if (differentiation_flag == 0)
-      cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
-  }
-
-  // calculate the force on my particles
-
-  fieldforce();
-
-  // extra per-atom energy/virial communication
-
-  if (evflag_atom) fieldforce_peratom();
-
-  // sum global energy across procs and add in volume-dependent term
-
-  const double qscale = force->qqrd2e * scale;
-
-  if (eflag_global) {
-    double energy_all;
-    MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
-    energy = energy_all;
-
-    energy *= 0.5*volume;
-    energy -= g_ewald*qsqsum/MY_PIS +
-      MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
-    energy *= qscale;
-  }
-
-  // sum global virial across procs
-
-  if (vflag_global) {
-    double virial_all[6];
-    MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
-    for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
-  }
-
-  // per-atom energy/virial
-  // energy includes self-energy correction
-  // notal accounts for TIP4P tallying eatom/vatom for ghost atoms
-
-  if (evflag_atom) {
-    double *q = atom->q;
-    int nlocal = atom->nlocal;
-    int ntotal = nlocal;
-    if (tip4pflag) ntotal += atom->nghost;
-
-    if (eflag_atom) {
-      for (i = 0; i < nlocal; i++) {
-        eatom[i] *= 0.5;
-        eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
-          (g_ewald*g_ewald*volume);
-        eatom[i] *= qscale;
-      }
-      for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5*qscale;
-    }
-
-    if (vflag_atom) {
-      for (i = 0; i < ntotal; i++)
-        for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
-    }
-  }
-
-  // 2d slab correction
-
-  if (slabflag == 1) slabcorr();
-
-  // convert atoms back from lamda to box coords
-
-  if (triclinic) domain->lamda2x(atom->nlocal);
-}
-
-/* ----------------------------------------------------------------------
-   allocate memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPM::allocate()
-{
-  memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:density_brick");
-
-  memory->create(density_fft,nfft_both,"pppm:density_fft");
-  memory->create(greensfn,nfft_both,"pppm:greensfn");
-  memory->create(work1,2*nfft_both,"pppm:work1");
-  memory->create(work2,2*nfft_both,"pppm:work2");
-  memory->create(vg,nfft_both,6,"pppm:vg");
-
-  if (triclinic == 0) {
-    memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx");
-    memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky");
-    memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz");
-  } else {
-    memory->create(fkx,nfft_both,"pppm:fkx");
-    memory->create(fky,nfft_both,"pppm:fky");
-    memory->create(fkz,nfft_both,"pppm:fkz");
-  }
-
-  if (differentiation_flag == 1) {
-    memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:u_brick");
-
-    memory->create(sf_precoeff1,nfft_both,"pppm:sf_precoeff1");
-    memory->create(sf_precoeff2,nfft_both,"pppm:sf_precoeff2");
-    memory->create(sf_precoeff3,nfft_both,"pppm:sf_precoeff3");
-    memory->create(sf_precoeff4,nfft_both,"pppm:sf_precoeff4");
-    memory->create(sf_precoeff5,nfft_both,"pppm:sf_precoeff5");
-    memory->create(sf_precoeff6,nfft_both,"pppm:sf_precoeff6");
-
-  } else {
-    memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                            nxlo_out,nxhi_out,"pppm:vdx_brick");
-    memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                            nxlo_out,nxhi_out,"pppm:vdy_brick");
-    memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                            nxlo_out,nxhi_out,"pppm:vdz_brick");
-  }
-
-  // summation coeffs
-
-  order_allocated = order;
-  if (!stagger_flag) memory->create(gf_b,order,"pppm:gf_b");
-  memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
-  memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d");
-  memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
-  memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,
-                          "pppm:drho_coeff");
-
-  // create 2 FFTs and a Remap
-  // 1st FFT keeps data in FFT decompostion
-  // 2nd FFT returns data in 3d brick decomposition
-  // remap takes data from 3d brick to FFT decomposition
-
-  int tmp;
-
-  fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
-                   nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
-                   nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
-                   0,0,&tmp);
-
-  fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
-                   nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
-                   nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                   0,0,&tmp);
-
-  remap = new Remap(lmp,world,
-                    nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                    nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
-                    1,0,0,FFT_PRECISION);
-
-  // create ghost grid object for rho and electric field communication
-
-  int (*procneigh)[2] = comm->procneigh;
-
-  if (differentiation_flag == 1)
-    cg = new CommGrid(lmp,world,1,1,
-                      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                      nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
-                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-  else
-    cg = new CommGrid(lmp,world,3,1,
-                      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                      nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
-                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-}
-
-/* ----------------------------------------------------------------------
-   deallocate memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPM::deallocate()
-{
-  memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
-
-  if (differentiation_flag == 1) {
-    memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
-    memory->destroy(sf_precoeff1);
-    memory->destroy(sf_precoeff2);
-    memory->destroy(sf_precoeff3);
-    memory->destroy(sf_precoeff4);
-    memory->destroy(sf_precoeff5);
-    memory->destroy(sf_precoeff6);
-  } else {
-    memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
-    memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
-    memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
-  }
-
-  memory->destroy(density_fft);
-  memory->destroy(greensfn);
-  memory->destroy(work1);
-  memory->destroy(work2);
-  memory->destroy(vg);
-
-  if (triclinic == 0) {
-    memory->destroy1d_offset(fkx,nxlo_fft);
-    memory->destroy1d_offset(fky,nylo_fft);
-    memory->destroy1d_offset(fkz,nzlo_fft);
-  } else {
-    memory->destroy(fkx);
-    memory->destroy(fky);
-    memory->destroy(fkz);
-  }
-
-  memory->destroy(gf_b);
-  if (stagger_flag) gf_b = NULL;
-  memory->destroy2d_offset(rho1d,-order_allocated/2);
-  memory->destroy2d_offset(drho1d,-order_allocated/2);
-  memory->destroy2d_offset(rho_coeff,(1-order_allocated)/2);
-  memory->destroy2d_offset(drho_coeff,(1-order_allocated)/2);
-
-  delete fft1;
-  delete fft2;
-  delete remap;
-  delete cg;
-}
-
-/* ----------------------------------------------------------------------
-   allocate per-atom memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPM::allocate_peratom()
-{
-  peratom_allocate_flag = 1;
-
-  if (differentiation_flag != 1)
-    memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                            nxlo_out,nxhi_out,"pppm:u_brick");
-
-  memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:v0_brick");
-
-  memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:v1_brick");
-  memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:v2_brick");
-  memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:v3_brick");
-  memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:v4_brick");
-  memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:v5_brick");
-
-  // create ghost grid object for rho and electric field communication
-
-  int (*procneigh)[2] = comm->procneigh;
-
-  if (differentiation_flag == 1)
-    cg_peratom =
-      new CommGrid(lmp,world,6,1,
-                   nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                   nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
-                   procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                   procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-  else
-    cg_peratom =
-      new CommGrid(lmp,world,7,1,
-                   nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                   nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
-                   procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                   procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-}
-
-/* ----------------------------------------------------------------------
-   deallocate per-atom memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPM::deallocate_peratom()
-{
-  peratom_allocate_flag = 0;
-
-  memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out);
-
-  if (differentiation_flag != 1)
-    memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
-
-  delete cg_peratom;
-}
-
-/* ----------------------------------------------------------------------
-   set global size of PPPM grid = nx,ny,nz_pppm
-   used for charge accumulation, FFTs, and electric field interpolation
-------------------------------------------------------------------------- */
-
-void PPPM::set_grid_global()
-{
-  // use xprd,yprd,zprd (even if triclinic, and then scale later)
-  // adjust z dimension for 2d slab PPPM
-  // 3d PPPM just uses zprd since slab_volfactor = 1.0
-
-  double xprd = domain->xprd;
-  double yprd = domain->yprd;
-  double zprd = domain->zprd;
-  double zprd_slab = zprd*slab_volfactor;
-
-  // make initial g_ewald estimate
-  // based on desired accuracy and real space cutoff
-  // fluid-occupied volume used to estimate real-space error
-  // zprd used rather than zprd_slab
-
-  double h;
-  bigint natoms = atom->natoms;
-
-  if (!gewaldflag) {
-    if (accuracy <= 0.0)
-      error->all(FLERR,"KSpace accuracy must be > 0");
-    g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
-    if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff;
-    else g_ewald = sqrt(-log(g_ewald)) / cutoff;
-  }
-
-  // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
-  // nz_pppm uses extended zprd_slab instead of zprd
-  // reduce it until accuracy target is met
-
-  if (!gridflag) {
-
-    if (differentiation_flag == 1 || stagger_flag) {
-
-      h = h_x = h_y = h_z = 4.0/g_ewald;
-      int count = 0;
-      while (1) {
-
-        // set grid dimension
-        nx_pppm = static_cast<int> (xprd/h_x);
-        ny_pppm = static_cast<int> (yprd/h_y);
-        nz_pppm = static_cast<int> (zprd_slab/h_z);
-
-        if (nx_pppm <= 1) nx_pppm = 2;
-        if (ny_pppm <= 1) ny_pppm = 2;
-        if (nz_pppm <= 1) nz_pppm = 2;
-
-        //set local grid dimension
-        int npey_fft,npez_fft;
-        if (nz_pppm >= nprocs) {
-          npey_fft = 1;
-          npez_fft = nprocs;
-        } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
-
-        int me_y = me % npey_fft;
-        int me_z = me / npey_fft;
-
-        nxlo_fft = 0;
-        nxhi_fft = nx_pppm - 1;
-        nylo_fft = me_y*ny_pppm/npey_fft;
-        nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
-        nzlo_fft = me_z*nz_pppm/npez_fft;
-        nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
-
-        double df_kspace = compute_df_kspace();
-
-        count++;
-
-        // break loop if the accuracy has been reached or
-        // too many loops have been performed
-
-        if (df_kspace <= accuracy) break;
-        if (count > 500) error->all(FLERR, "Could not compute grid size");
-        h *= 0.95;
-        h_x = h_y = h_z = h;
-      }
-
-    } else {
-
-      double err;
-      h_x = h_y = h_z = 1.0/g_ewald;
-
-      nx_pppm = static_cast<int> (xprd/h_x) + 1;
-      ny_pppm = static_cast<int> (yprd/h_y) + 1;
-      nz_pppm = static_cast<int> (zprd_slab/h_z) + 1;
-
-      err = estimate_ik_error(h_x,xprd,natoms);
-      while (err > accuracy) {
-        err = estimate_ik_error(h_x,xprd,natoms);
-        nx_pppm++;
-        h_x = xprd/nx_pppm;
-      }
-
-      err = estimate_ik_error(h_y,yprd,natoms);
-      while (err > accuracy) {
-        err = estimate_ik_error(h_y,yprd,natoms);
-        ny_pppm++;
-        h_y = yprd/ny_pppm;
-      }
-
-      err = estimate_ik_error(h_z,zprd_slab,natoms);
-      while (err > accuracy) {
-        err = estimate_ik_error(h_z,zprd_slab,natoms);
-        nz_pppm++;
-        h_z = zprd_slab/nz_pppm;
-      }
-    }
-
-    // scale grid for triclinic skew
-    
-    if (triclinic) {
-      double tmp[3];
-      tmp[0] = nx_pppm/xprd;
-      tmp[1] = ny_pppm/yprd;
-      tmp[2] = nz_pppm/zprd;
-      lamda2xT(&tmp[0],&tmp[0]);
-      nx_pppm = static_cast<int>(tmp[0]) + 1;
-      ny_pppm = static_cast<int>(tmp[1]) + 1;
-      nz_pppm = static_cast<int>(tmp[2]) + 1;
-    }
-  }
-
-  // boost grid size until it is factorable
-
-  while (!factorable(nx_pppm)) nx_pppm++;
-  while (!factorable(ny_pppm)) ny_pppm++;
-  while (!factorable(nz_pppm)) nz_pppm++;
-
-  if (triclinic == 0) {
-    h_x = xprd/nx_pppm;
-    h_y = yprd/ny_pppm;
-    h_z = zprd_slab/nz_pppm;
-  } else {
-    double tmp[3];
-    tmp[0] = nx_pppm;
-    tmp[1] = ny_pppm;
-    tmp[2] = nz_pppm;
-    x2lamdaT(&tmp[0],&tmp[0]);
-    h_x = 1.0/tmp[0];
-    h_y = 1.0/tmp[1];
-    h_z = 1.0/tmp[2];
-  }
-
-  if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
-    error->all(FLERR,"PPPM grid is too large");
-}
-
-/* ----------------------------------------------------------------------
-   check if all factors of n are in list of factors
-   return 1 if yes, 0 if no
-------------------------------------------------------------------------- */
-
-int PPPM::factorable(int n)
-{
-  int i;
-
-  while (n > 1) {
-    for (i = 0; i < nfactors; i++) {
-      if (n % factors[i] == 0) {
-        n /= factors[i];
-        break;
-      }
-    }
-    if (i == nfactors) return 0;
-  }
-
-  return 1;
-}
-
-/* ----------------------------------------------------------------------
-   compute estimated kspace force error
-------------------------------------------------------------------------- */
-
-double PPPM::compute_df_kspace()
-{
-  double xprd = domain->xprd;
-  double yprd = domain->yprd;
-  double zprd = domain->zprd;
-  double zprd_slab = zprd*slab_volfactor;
-  bigint natoms = atom->natoms;
-  double df_kspace = 0.0;
-  if (differentiation_flag == 1 || stagger_flag) {
-    double qopt = compute_qopt();
-    df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
-  } else {
-    double lprx = estimate_ik_error(h_x,xprd,natoms);
-    double lpry = estimate_ik_error(h_y,yprd,natoms);
-    double lprz = estimate_ik_error(h_z,zprd_slab,natoms);
-    df_kspace = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
-  }
-  return df_kspace;
-}
-
-/* ----------------------------------------------------------------------
-   compute qopt
-------------------------------------------------------------------------- */
-
-double PPPM::compute_qopt()
-{
-  double qopt = 0.0;
-  double *prd = domain->prd;
-  
-  const double xprd = prd[0];
-  const double yprd = prd[1];
-  const double zprd = prd[2];
-  const double zprd_slab = zprd*slab_volfactor;
-  volume = xprd * yprd * zprd_slab;
-
-  const double unitkx = (MY_2PI/xprd);
-  const double unitky = (MY_2PI/yprd);
-  const double unitkz = (MY_2PI/zprd_slab);
-
-  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
-  double u1, u2, sqk;
-  double sum1,sum2,sum3,sum4,dot2;
-
-  int k,l,m,nx,ny,nz;
-  const int twoorder = 2*order;
-
-  for (m = nzlo_fft; m <= nzhi_fft; m++) {
-    const int mper = m - nz_pppm*(2*m/nz_pppm);
-
-    for (l = nylo_fft; l <= nyhi_fft; l++) {
-      const int lper = l - ny_pppm*(2*l/ny_pppm);
-
-      for (k = nxlo_fft; k <= nxhi_fft; k++) {
-        const int kper = k - nx_pppm*(2*k/nx_pppm);
-
-        sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
-
-        if (sqk != 0.0) {
-
-          sum1 = 0.0;
-          sum2 = 0.0;
-          sum3 = 0.0;
-          sum4 = 0.0;
-          for (nx = -2; nx <= 2; nx++) {
-            qx = unitkx*(kper+nx_pppm*nx);
-            sx = exp(-0.25*square(qx/g_ewald));
-            argx = 0.5*qx*xprd/nx_pppm;
-            wx = powsinxx(argx,twoorder);
-            qx *= qx;
-
-            for (ny = -2; ny <= 2; ny++) {
-              qy = unitky*(lper+ny_pppm*ny);
-              sy = exp(-0.25*square(qy/g_ewald));
-              argy = 0.5*qy*yprd/ny_pppm;
-              wy = powsinxx(argy,twoorder);
-              qy *= qy;
-
-              for (nz = -2; nz <= 2; nz++) {
-                qz = unitkz*(mper+nz_pppm*nz);
-                sz = exp(-0.25*square(qz/g_ewald));
-                argz = 0.5*qz*zprd_slab/nz_pppm;
-                wz = powsinxx(argz,twoorder);
-                qz *= qz;
-
-                dot2 = qx+qy+qz;
-                u1   = sx*sy*sz;
-                u2   = wx*wy*wz;
-                sum1 += u1*u1/dot2*MY_4PI*MY_4PI;
-                sum2 += u1 * u2 * MY_4PI;
-                sum3 += u2;
-                sum4 += dot2*u2;
-              }
-            }
-          }
-          sum2 *= sum2;
-          qopt += sum1 - sum2/(sum3*sum4);
-        }
-      }
-    }
-  }
-  double qopt_all;
-  MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
-  return qopt_all;
-}
-
-/* ----------------------------------------------------------------------
-   estimate kspace force error for ik method
-------------------------------------------------------------------------- */
-
-double PPPM::estimate_ik_error(double h, double prd, bigint natoms)
-{
-  double sum = 0.0;
-  for (int m = 0; m < order; m++)
-    sum += acons[order][m] * pow(h*g_ewald,2.0*m);
-  double value = q2 * pow(h*g_ewald,(double)order) *
-    sqrt(g_ewald*prd*sqrt(MY_2PI)*sum/natoms) / (prd*prd);
-
-  return value;
-}
-
-/* ----------------------------------------------------------------------
-   adjust the g_ewald parameter to near its optimal value
-   using a Newton-Raphson solver
-------------------------------------------------------------------------- */
-
-void PPPM::adjust_gewald()
-{
-  double dx;
-
-  for (int i = 0; i < LARGE; i++) {
-    dx = newton_raphson_f() / derivf();
-    g_ewald -= dx;
-    if (fabs(newton_raphson_f()) < SMALL) return;
-  }
-
-  char str[128];
-  sprintf(str, "Could not compute g_ewald");
-  error->all(FLERR, str);
-}
-
-/* ----------------------------------------------------------------------
- Calculate f(x) using Newton-Raphson solver
- ------------------------------------------------------------------------- */
-
-double PPPM::newton_raphson_f()
-{
-  double xprd = domain->xprd;
-  double yprd = domain->yprd;
-  double zprd = domain->zprd;
-  bigint natoms = atom->natoms;
-
-  double df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) /
-       sqrt(natoms*cutoff*xprd*yprd*zprd);
-
-  double df_kspace = compute_df_kspace();
-
-  return df_rspace - df_kspace;
-}
-
-/* ----------------------------------------------------------------------
- Calculate numerical derivative f'(x) using forward difference
- [f(x + h) - f(x)] / h
- ------------------------------------------------------------------------- */
-
-double PPPM::derivf()
-{
-  double h = 0.000001;  //Derivative step-size
-  double df,f1,f2,g_ewald_old;
-
-  f1 = newton_raphson_f();
-  g_ewald_old = g_ewald;
-  g_ewald += h;
-  f2 = newton_raphson_f();
-  g_ewald = g_ewald_old;
-  df = (f2 - f1)/h;
-
-  return df;
-}
-
-/* ----------------------------------------------------------------------
-   Calculate the final estimate of the accuracy
-------------------------------------------------------------------------- */
-
-double PPPM::final_accuracy()
-{
-  double xprd = domain->xprd;
-  double yprd = domain->yprd;
-  double zprd = domain->zprd;
-  double zprd_slab = zprd*slab_volfactor;
-  bigint natoms = atom->natoms;
-
-  double df_kspace = compute_df_kspace();
-  double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd);
-  double df_rspace = 2.0 * q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff);
-  double df_table = estimate_table_accuracy(q2_over_sqrt,df_rspace);
-  double estimated_accuracy = sqrt(df_kspace*df_kspace + df_rspace*df_rspace +
-   df_table*df_table);
-
-  return estimated_accuracy;
-}
-
-/* ----------------------------------------------------------------------
-   set local subset of PPPM/FFT grid that I own
-   n xyz lo/hi in = 3d brick that I own (inclusive)
-   n xyz lo/hi out = 3d brick + ghost cells in 6 directions (inclusive)
-   n xyz lo/hi fft = FFT columns that I own (all of x dim, 2d decomp in yz)
-------------------------------------------------------------------------- */
-
-void PPPM::set_grid_local()
-{
-  // global indices of PPPM grid range from 0 to N-1
-  // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
-  //   global PPPM grid that I own without ghost cells
-  // for slab PPPM, assign z grid as if it were not extended
-
-  nxlo_in = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_pppm);
-  nxhi_in = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
-
-  nylo_in = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_pppm);
-  nyhi_in = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
-
-  nzlo_in = static_cast<int>
-      (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor);
-  nzhi_in = static_cast<int>
-      (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1;
-
-  // nlower,nupper = stencil size for mapping particles to PPPM grid
-
-  nlower = -(order-1)/2;
-  nupper = order/2;
-
-  // shift values for particle <-> grid mapping
-  // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
-
-  if (order % 2) shift = OFFSET + 0.5;
-  else shift = OFFSET;
-  if (order % 2) shiftone = 0.0;
-  else shiftone = 0.5;
-
-  // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
-  //   global PPPM grid that my particles can contribute charge to
-  // effectively nlo_in,nhi_in + ghost cells
-  // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
-  //           position a particle in my box can be at
-  // dist[3] = particle position bound = subbox + skin/2.0 + qdist
-  //   qdist = offset due to TIP4P fictitious charge
-  //   convert to triclinic if necessary
-  // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
-  // for slab PPPM, assign z grid as if it were not extended
-
-  double *prd,*sublo,*subhi;
-
-  if (triclinic == 0) {
-    prd = domain->prd;
-    boxlo = domain->boxlo;
-    sublo = domain->sublo;
-    subhi = domain->subhi;
-  } else {
-    prd = domain->prd_lamda;
-    boxlo = domain->boxlo_lamda;
-    sublo = domain->sublo_lamda;
-    subhi = domain->subhi_lamda;
-  }
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-
-  double dist[3];
-  double cuthalf = 0.5*neighbor->skin + qdist;
-  if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
-  else kspacebbox(cuthalf,&dist[0]);
-
-  int nlo,nhi;
-
-  nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) *
-                            nx_pppm/xprd + shift) - OFFSET;
-  nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) *
-                            nx_pppm/xprd + shift) - OFFSET;
-  nxlo_out = nlo + nlower;
-  nxhi_out = nhi + nupper;
-
-  nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) *
-                            ny_pppm/yprd + shift) - OFFSET;
-  nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) *
-                            ny_pppm/yprd + shift) - OFFSET;
-  nylo_out = nlo + nlower;
-  nyhi_out = nhi + nupper;
-
-  nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) *
-                            nz_pppm/zprd_slab + shift) - OFFSET;
-  nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) *
-                            nz_pppm/zprd_slab + shift) - OFFSET;
-  nzlo_out = nlo + nlower;
-  nzhi_out = nhi + nupper;
-
-  if (stagger_flag) {
-    nxhi_out++;
-    nyhi_out++;
-    nzhi_out++;
-  }
-
-  // for slab PPPM, change the grid boundary for processors at +z end
-  //   to include the empty volume between periodically repeating slabs
-  // for slab PPPM, want charge data communicated from -z proc to +z proc,
-  //   but not vice versa, also want field data communicated from +z proc to
-  //   -z proc, but not vice versa
-  // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells)
-  // also insure no other procs use ghost cells beyond +z limit
-
-  if (slabflag == 1) {
-    if (comm->myloc[2] == comm->procgrid[2]-1)
-      nzhi_in = nzhi_out = nz_pppm - 1;
-    nzhi_out = MIN(nzhi_out,nz_pppm-1);
-  }
-    
-  // decomposition of FFT mesh
-  // global indices range from 0 to N-1
-  // proc owns entire x-dimension, clumps of columns in y,z dimensions
-  // npey_fft,npez_fft = # of procs in y,z dims
-  // if nprocs is small enough, proc can own 1 or more entire xy planes,
-  //   else proc owns 2d sub-blocks of yz plane
-  // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
-  // nlo_fft,nhi_fft = lower/upper limit of the section
-  //   of the global FFT mesh that I own
-
-  int npey_fft,npez_fft;
-  if (nz_pppm >= nprocs) {
-    npey_fft = 1;
-    npez_fft = nprocs;
-  } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
-
-  int me_y = me % npey_fft;
-  int me_z = me / npey_fft;
-
-  nxlo_fft = 0;
-  nxhi_fft = nx_pppm - 1;
-  nylo_fft = me_y*ny_pppm/npey_fft;
-  nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
-  nzlo_fft = me_z*nz_pppm/npez_fft;
-  nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
-
-  // PPPM grid pts owned by this proc, including ghosts
-
-  ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
-    (nzhi_out-nzlo_out+1);
-
-  // FFT grids owned by this proc, without ghosts
-  // nfft = FFT points in FFT decomposition on this proc
-  // nfft_brick = FFT points in 3d brick-decomposition on this proc
-  // nfft_both = greater of 2 values
-
-  nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
-    (nzhi_fft-nzlo_fft+1);
-  int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
-    (nzhi_in-nzlo_in+1);
-  nfft_both = MAX(nfft,nfft_brick);
-}
-
-/* ----------------------------------------------------------------------
-   pre-compute Green's function denominator expansion coeffs, Gamma(2n)
-------------------------------------------------------------------------- */
-
-void PPPM::compute_gf_denom()
-{
-  int k,l,m;
-
-  for (l = 1; l < order; l++) gf_b[l] = 0.0;
-  gf_b[0] = 1.0;
-
-  for (m = 1; m < order; m++) {
-    for (l = m; l > 0; l--)
-      gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1));
-    gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5));
-  }
-
-  bigint ifact = 1;
-  for (k = 1; k < 2*order; k++) ifact *= k;
-  double gaminv = 1.0/ifact;
-  for (l = 0; l < order; l++) gf_b[l] *= gaminv;
-}
-
-/* ----------------------------------------------------------------------
-   pre-compute modified (Hockney-Eastwood) Coulomb Green's function
-------------------------------------------------------------------------- */
-
-void PPPM::compute_gf_ik()
-{
-  const double * const prd = domain->prd;
-
-  const double xprd = prd[0];
-  const double yprd = prd[1];
-  const double zprd = prd[2];
-  const double zprd_slab = zprd*slab_volfactor;
-  const double unitkx = (MY_2PI/xprd);
-  const double unitky = (MY_2PI/yprd);
-  const double unitkz = (MY_2PI/zprd_slab);
-
-  double snx,sny,snz;
-  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
-  double sum1,dot1,dot2;
-  double numerator,denominator;
-  double sqk;
-
-  int k,l,m,n,nx,ny,nz,kper,lper,mper;
-
-  const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
-                                    pow(-log(EPS_HOC),0.25));
-  const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
-                                    pow(-log(EPS_HOC),0.25));
-  const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
-                                    pow(-log(EPS_HOC),0.25));
-  const int twoorder = 2*order;
-
-  n = 0;
-  for (m = nzlo_fft; m <= nzhi_fft; m++) {
-    mper = m - nz_pppm*(2*m/nz_pppm);
-    snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
-
-    for (l = nylo_fft; l <= nyhi_fft; l++) {
-      lper = l - ny_pppm*(2*l/ny_pppm);
-      sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
-
-      for (k = nxlo_fft; k <= nxhi_fft; k++) {
-        kper = k - nx_pppm*(2*k/nx_pppm);
-        snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
-
-        sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
-
-        if (sqk != 0.0) {
-          numerator = 12.5663706/sqk;
-          denominator = gf_denom(snx,sny,snz);
-          sum1 = 0.0;
-
-          for (nx = -nbx; nx <= nbx; nx++) {
-            qx = unitkx*(kper+nx_pppm*nx);
-            sx = exp(-0.25*square(qx/g_ewald));
-            argx = 0.5*qx*xprd/nx_pppm;
-            wx = powsinxx(argx,twoorder);
-
-            for (ny = -nby; ny <= nby; ny++) {
-              qy = unitky*(lper+ny_pppm*ny);
-              sy = exp(-0.25*square(qy/g_ewald));
-              argy = 0.5*qy*yprd/ny_pppm;
-              wy = powsinxx(argy,twoorder);
-
-              for (nz = -nbz; nz <= nbz; nz++) {
-                qz = unitkz*(mper+nz_pppm*nz);
-                sz = exp(-0.25*square(qz/g_ewald));
-                argz = 0.5*qz*zprd_slab/nz_pppm;
-                wz = powsinxx(argz,twoorder);
-
-                dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
-                dot2 = qx*qx+qy*qy+qz*qz;
-                sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
-              }
-            }
-          }
-          greensfn[n++] = numerator*sum1/denominator;
-        } else greensfn[n++] = 0.0;
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   pre-compute modified (Hockney-Eastwood) Coulomb Green's function
-   for a triclinic system
-------------------------------------------------------------------------- */
-
-void PPPM::compute_gf_ik_triclinic()
-{
-  double snx,sny,snz;
-  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
-  double sum1,dot1,dot2;
-  double numerator,denominator;
-  double sqk;
-
-  int k,l,m,n,nx,ny,nz,kper,lper,mper;
-
-  double tmp[3];
-  tmp[0] = (g_ewald/(MY_PI*nx_pppm)) * pow(-log(EPS_HOC),0.25);
-  tmp[1] = (g_ewald/(MY_PI*ny_pppm)) * pow(-log(EPS_HOC),0.25);
-  tmp[2] = (g_ewald/(MY_PI*nz_pppm)) * pow(-log(EPS_HOC),0.25);
-  lamda2xT(&tmp[0],&tmp[0]);
-  const int nbx = static_cast<int> (tmp[0]);
-  const int nby = static_cast<int> (tmp[1]);
-  const int nbz = static_cast<int> (tmp[2]);
-
-  const int twoorder = 2*order;
-
-  n = 0;
-  for (m = nzlo_fft; m <= nzhi_fft; m++) {
-    mper = m - nz_pppm*(2*m/nz_pppm);
-    snz = square(sin(MY_PI*mper/nz_pppm));
-
-    for (l = nylo_fft; l <= nyhi_fft; l++) {
-      lper = l - ny_pppm*(2*l/ny_pppm);
-      sny = square(sin(MY_PI*lper/ny_pppm));
-
-      for (k = nxlo_fft; k <= nxhi_fft; k++) {
-        kper = k - nx_pppm*(2*k/nx_pppm);
-        snx = square(sin(MY_PI*kper/nx_pppm));
-
-        double unitk_lamda[3];
-        unitk_lamda[0] = 2.0*MY_PI*kper;
-        unitk_lamda[1] = 2.0*MY_PI*lper;
-        unitk_lamda[2] = 2.0*MY_PI*mper;
-        x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
-
-        sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]);
-
-        if (sqk != 0.0) {
-          numerator = 12.5663706/sqk;
-          denominator = gf_denom(snx,sny,snz);
-          sum1 = 0.0;
-
-          for (nx = -nbx; nx <= nbx; nx++) {
-            argx = MY_PI*kper/nx_pppm + MY_PI*nx;
-            wx = powsinxx(argx,twoorder);
-
-            for (ny = -nby; ny <= nby; ny++) {
-              argy = MY_PI*lper/ny_pppm + MY_PI*ny;
-              wy = powsinxx(argy,twoorder);
-
-              for (nz = -nbz; nz <= nbz; nz++) {
-                argz = MY_PI*mper/nz_pppm + MY_PI*nz;
-                wz = powsinxx(argz,twoorder);
-
-                double b[3];
-                b[0] = 2.0*MY_PI*nx_pppm*nx;
-                b[1] = 2.0*MY_PI*ny_pppm*ny;
-                b[2] = 2.0*MY_PI*nz_pppm*nz;
-                x2lamdaT(&b[0],&b[0]);
-
-                qx = unitk_lamda[0]+b[0];
-                sx = exp(-0.25*square(qx/g_ewald));
-
-                qy = unitk_lamda[1]+b[1];
-                sy = exp(-0.25*square(qy/g_ewald));
-
-                qz = unitk_lamda[2]+b[2];
-                sz = exp(-0.25*square(qz/g_ewald));
-
-                dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz;
-                dot2 = qx*qx+qy*qy+qz*qz;
-                sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
-              }
-            }
-          }
-          greensfn[n++] = numerator*sum1/denominator;
-        } else greensfn[n++] = 0.0;
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   compute optimized Green's function for energy calculation
-------------------------------------------------------------------------- */
-
-void PPPM::compute_gf_ad()
-{
-  const double * const prd = domain->prd;
-
-  const double xprd = prd[0];
-  const double yprd = prd[1];
-  const double zprd = prd[2];
-  const double zprd_slab = zprd*slab_volfactor;
-  const double unitkx = (MY_2PI/xprd);
-  const double unitky = (MY_2PI/yprd);
-  const double unitkz = (MY_2PI/zprd_slab);
-
-  double snx,sny,snz,sqk;
-  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
-  double numerator,denominator;
-  int k,l,m,n,kper,lper,mper;
-
-  const int twoorder = 2*order;
-
-  for (int i = 0; i < 6; i++) sf_coeff[i] = 0.0;
-
-  n = 0;
-  for (m = nzlo_fft; m <= nzhi_fft; m++) {
-    mper = m - nz_pppm*(2*m/nz_pppm);
-    qz = unitkz*mper;
-    snz = square(sin(0.5*qz*zprd_slab/nz_pppm));
-    sz = exp(-0.25*square(qz/g_ewald));
-    argz = 0.5*qz*zprd_slab/nz_pppm;
-    wz = powsinxx(argz,twoorder);
-
-    for (l = nylo_fft; l <= nyhi_fft; l++) {
-      lper = l - ny_pppm*(2*l/ny_pppm);
-      qy = unitky*lper;
-      sny = square(sin(0.5*qy*yprd/ny_pppm));
-      sy = exp(-0.25*square(qy/g_ewald));
-      argy = 0.5*qy*yprd/ny_pppm;
-      wy = powsinxx(argy,twoorder);
-
-      for (k = nxlo_fft; k <= nxhi_fft; k++) {
-        kper = k - nx_pppm*(2*k/nx_pppm);
-        qx = unitkx*kper;
-        snx = square(sin(0.5*qx*xprd/nx_pppm));
-        sx = exp(-0.25*square(qx/g_ewald));
-        argx = 0.5*qx*xprd/nx_pppm;
-        wx = powsinxx(argx,twoorder);
-
-        sqk = qx*qx + qy*qy + qz*qz;
-
-        if (sqk != 0.0) {
-          numerator = MY_4PI/sqk;
-          denominator = gf_denom(snx,sny,snz);
-          greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
-          sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
-          sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
-          sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
-          sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
-          sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
-          sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
-          n++;
-        } else {
-          greensfn[n] = 0.0;
-          sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
-          sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
-          sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
-          sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
-          sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
-          sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
-          n++;
-        }
-      }
-    }
-  }
-
-  // compute the coefficients for the self-force correction
-
-  double prex, prey, prez;
-  prex = prey = prez = MY_PI/volume;
-  prex *= nx_pppm/xprd;
-  prey *= ny_pppm/yprd;
-  prez *= nz_pppm/zprd_slab;
-  sf_coeff[0] *= prex;
-  sf_coeff[1] *= prex*2;
-  sf_coeff[2] *= prey;
-  sf_coeff[3] *= prey*2;
-  sf_coeff[4] *= prez;
-  sf_coeff[5] *= prez*2;
-
-  // communicate values with other procs
-
-  double tmp[6];
-  MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
-  for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
-}
-
-/* ----------------------------------------------------------------------
-   compute self force coefficients for ad-differentiation scheme
-------------------------------------------------------------------------- */
-
-void PPPM::compute_sf_precoeff()
-{
-  int i,k,l,m,n;
-  int nx,ny,nz,kper,lper,mper;
-  double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5];
-  double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2;
-  double u0,u1,u2,u3,u4,u5,u6;
-  double sum1,sum2,sum3,sum4,sum5,sum6;
-
-  n = 0;
-  for (m = nzlo_fft; m <= nzhi_fft; m++) {
-    mper = m - nz_pppm*(2*m/nz_pppm);
-
-    for (l = nylo_fft; l <= nyhi_fft; l++) {
-      lper = l - ny_pppm*(2*l/ny_pppm);
-
-      for (k = nxlo_fft; k <= nxhi_fft; k++) {
-        kper = k - nx_pppm*(2*k/nx_pppm);
-
-        sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
-        for (i = 0; i < 5; i++) {
-
-          qx0 = MY_2PI*(kper+nx_pppm*(i-2));
-          qx1 = MY_2PI*(kper+nx_pppm*(i-1));
-          qx2 = MY_2PI*(kper+nx_pppm*(i  ));
-          wx0[i] = powsinxx(0.5*qx0/nx_pppm,order);
-          wx1[i] = powsinxx(0.5*qx1/nx_pppm,order);
-          wx2[i] = powsinxx(0.5*qx2/nx_pppm,order);
-
-          qy0 = MY_2PI*(lper+ny_pppm*(i-2));
-          qy1 = MY_2PI*(lper+ny_pppm*(i-1));
-          qy2 = MY_2PI*(lper+ny_pppm*(i  ));
-          wy0[i] = powsinxx(0.5*qy0/ny_pppm,order);
-          wy1[i] = powsinxx(0.5*qy1/ny_pppm,order);
-          wy2[i] = powsinxx(0.5*qy2/ny_pppm,order);
-
-          qz0 = MY_2PI*(mper+nz_pppm*(i-2));
-          qz1 = MY_2PI*(mper+nz_pppm*(i-1));
-          qz2 = MY_2PI*(mper+nz_pppm*(i  ));
-
-          wz0[i] = powsinxx(0.5*qz0/nz_pppm,order);
-          wz1[i] = powsinxx(0.5*qz1/nz_pppm,order);
-          wz2[i] = powsinxx(0.5*qz2/nz_pppm,order);
-        }
-
-        for (nx = 0; nx < 5; nx++) {
-          for (ny = 0; ny < 5; ny++) {
-            for (nz = 0; nz < 5; nz++) {
-              u0 = wx0[nx]*wy0[ny]*wz0[nz];
-              u1 = wx1[nx]*wy0[ny]*wz0[nz];
-              u2 = wx2[nx]*wy0[ny]*wz0[nz];
-              u3 = wx0[nx]*wy1[ny]*wz0[nz];
-              u4 = wx0[nx]*wy2[ny]*wz0[nz];
-              u5 = wx0[nx]*wy0[ny]*wz1[nz];
-              u6 = wx0[nx]*wy0[ny]*wz2[nz];
-
-              sum1 += u0*u1;
-              sum2 += u0*u2;
-              sum3 += u0*u3;
-              sum4 += u0*u4;
-              sum5 += u0*u5;
-              sum6 += u0*u6;
-            }
-          }
-        }
-
-        // store values
-
-        sf_precoeff1[n] = sum1;
-        sf_precoeff2[n] = sum2;
-        sf_precoeff3[n] = sum3;
-        sf_precoeff4[n] = sum4;
-        sf_precoeff5[n] = sum5;
-        sf_precoeff6[n++] = sum6;
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   find center grid pt for each of my particles
-   check that full stencil for the particle will fit in my 3d brick
-   store central grid pt indices in part2grid array
-------------------------------------------------------------------------- */
-
-void PPPM::particle_map()
-{
-  int nx,ny,nz;
-
-  double **x = atom->x;
-  int nlocal = atom->nlocal;
-
-  int flag = 0;
-  for (int i = 0; i < nlocal; i++) {
-
-    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-    // current particle coord can be outside global and local box
-    // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
-
-    nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
-    ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
-    nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
-
-    part2grid[i][0] = nx;
-    part2grid[i][1] = ny;
-    part2grid[i][2] = nz;
-
-    // check that entire stencil around nx,ny,nz will fit in my 3d brick
-
-    if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
-        ny+nlower < nylo_out || ny+nupper > nyhi_out ||
-        nz+nlower < nzlo_out || nz+nupper > nzhi_out)
-      flag = 1;
-  }
-
-  if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
-}
-
-/* ----------------------------------------------------------------------
-   create discretized "density" on section of global grid due to my particles
-   density(x,y,z) = charge "density" at grid points of my 3d brick
-   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
-   in global grid
-------------------------------------------------------------------------- */
-
-void PPPM::make_rho()
-{
-  int l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-
-  // clear 3d density array
-
-  memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
-         ngrid*sizeof(FFT_SCALAR));
-
-  // loop over my charges, add their contribution to nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-
-  double *q = atom->q;
-  double **x = atom->x;
-  int nlocal = atom->nlocal;
-
-  for (int i = 0; i < nlocal; i++) {
-
-    nx = part2grid[i][0];
-    ny = part2grid[i][1];
-    nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-    compute_rho1d(dx,dy,dz);
-
-    z0 = delvolinv * q[i];
-    for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
-      y0 = z0*rho1d[2][n];
-      for (m = nlower; m <= nupper; m++) {
-        my = m+ny;
-        x0 = y0*rho1d[1][m];
-        for (l = nlower; l <= nupper; l++) {
-          mx = l+nx;
-          density_brick[mz][my][mx] += x0*rho1d[0][l];
-        }
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   remap density from 3d brick decomposition to FFT decomposition
-------------------------------------------------------------------------- */
-
-void PPPM::brick2fft()
-{
-  int n,ix,iy,iz;
-
-  // copy grabs inner portion of density from 3d brick
-  // remap could be done as pre-stage of FFT,
-  //   but this works optimally on only double values, not complex values
-
-  n = 0;
-  for (iz = nzlo_in; iz <= nzhi_in; iz++)
-    for (iy = nylo_in; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++)
-        density_fft[n++] = density_brick[iz][iy][ix];
-
-  remap->perform(density_fft,density_fft,work1);
-}
-
-/* ----------------------------------------------------------------------
-   FFT-based Poisson solver
-------------------------------------------------------------------------- */
-
-void PPPM::poisson()
-{
-  if (differentiation_flag == 1) poisson_ad();
-  else poisson_ik();
-}
-
-/* ----------------------------------------------------------------------
-   FFT-based Poisson solver for ik
-------------------------------------------------------------------------- */
-
-void PPPM::poisson_ik()
-{
-  int i,j,k,n;
-  double eng;
-
-  // transform charge density (r -> k)
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work1[n++] = density_fft[i];
-    work1[n++] = ZEROF;
-  }
-
-  fft1->compute(work1,work1,1);
-
-  // global energy and virial contribution
-
-  double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
-  double s2 = scaleinv*scaleinv;
-
-  if (eflag_global || vflag_global) {
-    if (vflag_global) {
-      n = 0;
-      for (i = 0; i < nfft; i++) {
-        eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
-        for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
-        if (eflag_global) energy += eng;
-        n += 2;
-      }
-    } else {
-      n = 0;
-      for (i = 0; i < nfft; i++) {
-        energy +=
-          s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
-        n += 2;
-      }
-    }
-  }
-
-  // scale by 1/total-grid-pts to get rho(k)
-  // multiply by Green's function to get V(k)
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work1[n++] *= scaleinv * greensfn[i];
-    work1[n++] *= scaleinv * greensfn[i];
-  }
-
-  // extra FFTs for per-atom energy/virial
-
-  if (evflag_atom) poisson_peratom();
-
-  // triclinic system
-
-  if (triclinic) {
-    poisson_ik_triclinic();
-    return;
-  }
-
-  // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
-  // FFT leaves data in 3d brick decomposition
-  // copy it into inner portion of vdx,vdy,vdz arrays
-
-  // x direction gradient
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++)
-    for (j = nylo_fft; j <= nyhi_fft; j++)
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        work2[n] = fkx[i]*work1[n+1];
-        work2[n+1] = -fkx[i]*work1[n];
-        n += 2;
-      }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        vdx_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-
-  // y direction gradient
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++)
-    for (j = nylo_fft; j <= nyhi_fft; j++)
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        work2[n] = fky[j]*work1[n+1];
-        work2[n+1] = -fky[j]*work1[n];
-        n += 2;
-      }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        vdy_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-
-  // z direction gradient
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++)
-    for (j = nylo_fft; j <= nyhi_fft; j++)
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        work2[n] = fkz[k]*work1[n+1];
-        work2[n+1] = -fkz[k]*work1[n];
-        n += 2;
-      }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        vdz_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-}
-
-/* ----------------------------------------------------------------------
-   FFT-based Poisson solver for ik for a triclinic system
-------------------------------------------------------------------------- */
-
-void PPPM::poisson_ik_triclinic()
-{
-  int i,j,k,n;
-
-  // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
-  // FFT leaves data in 3d brick decomposition
-  // copy it into inner portion of vdx,vdy,vdz arrays
-
-  // x direction gradient
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work2[n] = fkx[i]*work1[n+1];
-    work2[n+1] = -fkx[i]*work1[n];
-    n += 2;
-  }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        vdx_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-
-  // y direction gradient
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work2[n] = fky[i]*work1[n+1];
-    work2[n+1] = -fky[i]*work1[n];
-    n += 2;
-  }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        vdy_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-
-  // z direction gradient
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work2[n] = fkz[i]*work1[n+1];
-    work2[n+1] = -fkz[i]*work1[n];
-    n += 2;
-  }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        vdz_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-}
-
-/* ----------------------------------------------------------------------
-   FFT-based Poisson solver for ad
-------------------------------------------------------------------------- */
-
-void PPPM::poisson_ad()
-{
-  int i,j,k,n;
-  double eng;
-
-  // transform charge density (r -> k)
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work1[n++] = density_fft[i];
-    work1[n++] = ZEROF;
-  }
-
-  fft1->compute(work1,work1,1);
-
-  // global energy and virial contribution
-
-  double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
-  double s2 = scaleinv*scaleinv;
-
-  if (eflag_global || vflag_global) {
-    if (vflag_global) {
-      n = 0;
-      for (i = 0; i < nfft; i++) {
-        eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
-        for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
-        if (eflag_global) energy += eng;
-        n += 2;
-      }
-    } else {
-      n = 0;
-      for (i = 0; i < nfft; i++) {
-        energy +=
-          s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
-        n += 2;
-      }
-    }
-  }
-
-  // scale by 1/total-grid-pts to get rho(k)
-  // multiply by Green's function to get V(k)
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work1[n++] *= scaleinv * greensfn[i];
-    work1[n++] *= scaleinv * greensfn[i];
-  }
-
-  // extra FFTs for per-atom energy/virial
-
-  if (vflag_atom) poisson_peratom();
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work2[n] = work1[n];
-    work2[n+1] = work1[n+1];
-    n += 2;
-  }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        u_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-}
-
-/* ----------------------------------------------------------------------
-   FFT-based Poisson solver for per-atom energy/virial
-------------------------------------------------------------------------- */
-
-void PPPM::poisson_peratom()
-{
-  int i,j,k,n;
-
-  // energy
-
-  if (eflag_atom && differentiation_flag != 1) {
-    n = 0;
-    for (i = 0; i < nfft; i++) {
-      work2[n] = work1[n];
-      work2[n+1] = work1[n+1];
-      n += 2;
-    }
-
-    fft2->compute(work2,work2,-1);
-
-    n = 0;
-    for (k = nzlo_in; k <= nzhi_in; k++)
-      for (j = nylo_in; j <= nyhi_in; j++)
-        for (i = nxlo_in; i <= nxhi_in; i++) {
-          u_brick[k][j][i] = work2[n];
-          n += 2;
-        }
-  }
-
-  // 6 components of virial in v0 thru v5
-
-  if (!vflag_atom) return;
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work2[n] = work1[n]*vg[i][0];
-    work2[n+1] = work1[n+1]*vg[i][0];
-    n += 2;
-  }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        v0_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work2[n] = work1[n]*vg[i][1];
-    work2[n+1] = work1[n+1]*vg[i][1];
-    n += 2;
-  }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        v1_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work2[n] = work1[n]*vg[i][2];
-    work2[n+1] = work1[n+1]*vg[i][2];
-    n += 2;
-  }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        v2_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work2[n] = work1[n]*vg[i][3];
-    work2[n+1] = work1[n+1]*vg[i][3];
-    n += 2;
-  }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        v3_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work2[n] = work1[n]*vg[i][4];
-    work2[n+1] = work1[n+1]*vg[i][4];
-    n += 2;
-  }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        v4_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work2[n] = work1[n]*vg[i][5];
-    work2[n+1] = work1[n+1]*vg[i][5];
-    n += 2;
-  }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        v5_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get electric field & force on my particles
-------------------------------------------------------------------------- */
-
-void PPPM::fieldforce()
-{
-  if (differentiation_flag == 1) fieldforce_ad();
-  else fieldforce_ik();
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get electric field & force on my particles for ik
-------------------------------------------------------------------------- */
-
-void PPPM::fieldforce_ik()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-  FFT_SCALAR ekx,eky,ekz;
-
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of E-field on particle
-
-  double *q = atom->q;
-  double **x = atom->x;
-  double **f = atom->f;
-
-  int nlocal = atom->nlocal;
-
-  for (i = 0; i < nlocal; i++) {
-    nx = part2grid[i][0];
-    ny = part2grid[i][1];
-    nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-    compute_rho1d(dx,dy,dz);
-
-    ekx = eky = ekz = ZEROF;
-    for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
-      z0 = rho1d[2][n];
-      for (m = nlower; m <= nupper; m++) {
-        my = m+ny;
-        y0 = z0*rho1d[1][m];
-        for (l = nlower; l <= nupper; l++) {
-          mx = l+nx;
-          x0 = y0*rho1d[0][l];
-          ekx -= x0*vdx_brick[mz][my][mx];
-          eky -= x0*vdy_brick[mz][my][mx];
-          ekz -= x0*vdz_brick[mz][my][mx];
-        }
-      }
-    }
-
-    // convert E-field to force
-
-    const double qfactor = force->qqrd2e * scale * q[i];
-    f[i][0] += qfactor*ekx;
-    f[i][1] += qfactor*eky;
-    if (slabflag != 2) f[i][2] += qfactor*ekz;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get electric field & force on my particles for ad
-------------------------------------------------------------------------- */
-
-void PPPM::fieldforce_ad()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz;
-  FFT_SCALAR ekx,eky,ekz;
-  double s1,s2,s3;
-  double sf = 0.0;
-  double *prd;
-
-  prd = domain->prd;
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-
-  double hx_inv = nx_pppm/xprd;
-  double hy_inv = ny_pppm/yprd;
-  double hz_inv = nz_pppm/zprd;
-
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of E-field on particle
-
-  double *q = atom->q;
-  double **x = atom->x;
-  double **f = atom->f;
-
-  int nlocal = atom->nlocal;
-
-  for (i = 0; i < nlocal; i++) {
-    nx = part2grid[i][0];
-    ny = part2grid[i][1];
-    nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-    compute_rho1d(dx,dy,dz);
-    compute_drho1d(dx,dy,dz);
-
-    ekx = eky = ekz = ZEROF;
-    for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
-      for (m = nlower; m <= nupper; m++) {
-        my = m+ny;
-        for (l = nlower; l <= nupper; l++) {
-          mx = l+nx;
-          ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
-          eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
-          ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
-        }
-      }
-    }
-    ekx *= hx_inv;
-    eky *= hy_inv;
-    ekz *= hz_inv;
-
-    // convert E-field to force and substract self forces
-
-    const double qfactor = force->qqrd2e * scale;
-
-    s1 = x[i][0]*hx_inv;
-    s2 = x[i][1]*hy_inv;
-    s3 = x[i][2]*hz_inv;
-    sf = sf_coeff[0]*sin(2*MY_PI*s1);
-    sf += sf_coeff[1]*sin(4*MY_PI*s1);
-    sf *= 2*q[i]*q[i];
-    f[i][0] += qfactor*(ekx*q[i] - sf);
-
-    sf = sf_coeff[2]*sin(2*MY_PI*s2);
-    sf += sf_coeff[3]*sin(4*MY_PI*s2);
-    sf *= 2*q[i]*q[i];
-    f[i][1] += qfactor*(eky*q[i] - sf);
-
-
-    sf = sf_coeff[4]*sin(2*MY_PI*s3);
-    sf += sf_coeff[5]*sin(4*MY_PI*s3);
-    sf *= 2*q[i]*q[i];
-    if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
-  }
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get per-atom energy/virial
-------------------------------------------------------------------------- */
-
-void PPPM::fieldforce_peratom()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-  FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
-
-  // loop over my charges, interpolate from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-
-  double *q = atom->q;
-  double **x = atom->x;
-
-  int nlocal = atom->nlocal;
-
-  for (i = 0; i < nlocal; i++) {
-    nx = part2grid[i][0];
-    ny = part2grid[i][1];
-    nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-    compute_rho1d(dx,dy,dz);
-
-    u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
-    for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
-      z0 = rho1d[2][n];
-      for (m = nlower; m <= nupper; m++) {
-        my = m+ny;
-        y0 = z0*rho1d[1][m];
-        for (l = nlower; l <= nupper; l++) {
-          mx = l+nx;
-          x0 = y0*rho1d[0][l];
-          if (eflag_atom) u += x0*u_brick[mz][my][mx];
-          if (vflag_atom) {
-            v0 += x0*v0_brick[mz][my][mx];
-            v1 += x0*v1_brick[mz][my][mx];
-            v2 += x0*v2_brick[mz][my][mx];
-            v3 += x0*v3_brick[mz][my][mx];
-            v4 += x0*v4_brick[mz][my][mx];
-            v5 += x0*v5_brick[mz][my][mx];
-          }
-        }
-      }
-    }
-
-    if (eflag_atom) eatom[i] += q[i]*u;
-    if (vflag_atom) {
-      vatom[i][0] += q[i]*v0;
-      vatom[i][1] += q[i]*v1;
-      vatom[i][2] += q[i]*v2;
-      vatom[i][3] += q[i]*v3;
-      vatom[i][4] += q[i]*v4;
-      vatom[i][5] += q[i]*v5;
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   pack own values to buf to send to another proc
-------------------------------------------------------------------------- */
-
-void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
-  int n = 0;
-
-  if (flag == FORWARD_IK) {
-    FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++) {
-      buf[n++] = xsrc[list[i]];
-      buf[n++] = ysrc[list[i]];
-      buf[n++] = zsrc[list[i]];
-    }
-  } else if (flag == FORWARD_AD) {
-    FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++)
-      buf[i] = src[list[i]];
-  } else if (flag == FORWARD_IK_PERATOM) {
-    FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++) {
-      if (eflag_atom) buf[n++] = esrc[list[i]];
-      if (vflag_atom) {
-        buf[n++] = v0src[list[i]];
-        buf[n++] = v1src[list[i]];
-        buf[n++] = v2src[list[i]];
-        buf[n++] = v3src[list[i]];
-        buf[n++] = v4src[list[i]];
-        buf[n++] = v5src[list[i]];
-      }
-    }
-  } else if (flag == FORWARD_AD_PERATOM) {
-    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++) {
-      buf[n++] = v0src[list[i]];
-      buf[n++] = v1src[list[i]];
-      buf[n++] = v2src[list[i]];
-      buf[n++] = v3src[list[i]];
-      buf[n++] = v4src[list[i]];
-      buf[n++] = v5src[list[i]];
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   unpack another proc's own values from buf and set own ghost values
-------------------------------------------------------------------------- */
-
-void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
-  int n = 0;
-
-  if (flag == FORWARD_IK) {
-    FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++) {
-      xdest[list[i]] = buf[n++];
-      ydest[list[i]] = buf[n++];
-      zdest[list[i]] = buf[n++];
-    }
-  } else if (flag == FORWARD_AD) {
-    FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++)
-      dest[list[i]] = buf[i];
-  } else if (flag == FORWARD_IK_PERATOM) {
-    FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++) {
-      if (eflag_atom) esrc[list[i]] = buf[n++];
-      if (vflag_atom) {
-        v0src[list[i]] = buf[n++];
-        v1src[list[i]] = buf[n++];
-        v2src[list[i]] = buf[n++];
-        v3src[list[i]] = buf[n++];
-        v4src[list[i]] = buf[n++];
-        v5src[list[i]] = buf[n++];
-      }
-    }
-  } else if (flag == FORWARD_AD_PERATOM) {
-    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++) {
-      v0src[list[i]] = buf[n++];
-      v1src[list[i]] = buf[n++];
-      v2src[list[i]] = buf[n++];
-      v3src[list[i]] = buf[n++];
-      v4src[list[i]] = buf[n++];
-      v5src[list[i]] = buf[n++];
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   pack ghost values into buf to send to another proc
-------------------------------------------------------------------------- */
-
-void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
-  if (flag == REVERSE_RHO) {
-    FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++)
-      buf[i] = src[list[i]];
-  }
-}
-
-/* ----------------------------------------------------------------------
-   unpack another proc's ghost values from buf and add to own values
-------------------------------------------------------------------------- */
-
-void PPPM::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
-  if (flag == REVERSE_RHO) {
-    FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++)
-      dest[list[i]] += buf[i];
-  } 
-}
-
-/* ----------------------------------------------------------------------
-   map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
-------------------------------------------------------------------------- */
-
-void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
-{
-  // loop thru all possible factorizations of nprocs
-  // surf = surface area of largest proc sub-domain
-  // innermost if test minimizes surface area and surface/volume ratio
-
-  int bestsurf = 2 * (nx + ny);
-  int bestboxx = 0;
-  int bestboxy = 0;
-
-  int boxx,boxy,surf,ipx,ipy;
-
-  ipx = 1;
-  while (ipx <= nprocs) {
-    if (nprocs % ipx == 0) {
-      ipy = nprocs/ipx;
-      boxx = nx/ipx;
-      if (nx % ipx) boxx++;
-      boxy = ny/ipy;
-      if (ny % ipy) boxy++;
-      surf = boxx + boxy;
-      if (surf < bestsurf ||
-          (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
-        bestsurf = surf;
-        bestboxx = boxx;
-        bestboxy = boxy;
-        *px = ipx;
-        *py = ipy;
-      }
-    }
-    ipx++;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   charge assignment into rho1d
-   dx,dy,dz = distance of particle from "lower left" grid point
-------------------------------------------------------------------------- */
-
-void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
-                         const FFT_SCALAR &dz)
-{
-  int k,l;
-  FFT_SCALAR r1,r2,r3;
-
-  for (k = (1-order)/2; k <= order/2; k++) {
-    r1 = r2 = r3 = ZEROF;
-
-    for (l = order-1; l >= 0; l--) {
-      r1 = rho_coeff[l][k] + r1*dx;
-      r2 = rho_coeff[l][k] + r2*dy;
-      r3 = rho_coeff[l][k] + r3*dz;
-    }
-    rho1d[0][k] = r1;
-    rho1d[1][k] = r2;
-    rho1d[2][k] = r3;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   charge assignment into drho1d
-   dx,dy,dz = distance of particle from "lower left" grid point
-------------------------------------------------------------------------- */
-
-void PPPM::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
-                          const FFT_SCALAR &dz)
-{
-  int k,l;
-  FFT_SCALAR r1,r2,r3;
-
-  for (k = (1-order)/2; k <= order/2; k++) {
-    r1 = r2 = r3 = ZEROF;
-
-    for (l = order-2; l >= 0; l--) {
-      r1 = drho_coeff[l][k] + r1*dx;
-      r2 = drho_coeff[l][k] + r2*dy;
-      r3 = drho_coeff[l][k] + r3*dz;
-    }
-    drho1d[0][k] = r1;
-    drho1d[1][k] = r2;
-    drho1d[2][k] = r3;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   generate coeffients for the weight function of order n
-
-              (n-1)
-  Wn(x) =     Sum    wn(k,x) , Sum is over every other integer
-           k=-(n-1)
-  For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
-      k is odd integers if n is even and even integers if n is odd
-              ---
-             | n-1
-             | Sum a(l,j)*(x-k/2)**l   if abs(x-k/2) < 1/2
-  wn(k,x) = <  l=0
-             |
-             |  0                       otherwise
-              ---
-  a coeffients are packed into the array rho_coeff to eliminate zeros
-  rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k)
-------------------------------------------------------------------------- */
-
-void PPPM::compute_rho_coeff()
-{
-  int j,k,l,m;
-  FFT_SCALAR s;
-
-  FFT_SCALAR **a;
-  memory->create2d_offset(a,order,-order,order,"pppm:a");
-
-  for (k = -order; k <= order; k++)
-    for (l = 0; l < order; l++)
-      a[l][k] = 0.0;
-
-  a[0][0] = 1.0;
-  for (j = 1; j < order; j++) {
-    for (k = -j; k <= j; k += 2) {
-      s = 0.0;
-      for (l = 0; l < j; l++) {
-        a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
-#ifdef FFT_SINGLE
-        s += powf(0.5,(float) l+1) *
-          (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
-#else
-        s += pow(0.5,(double) l+1) *
-          (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
-#endif
-      }
-      a[0][k] = s;
-    }
-  }
-
-  m = (1-order)/2;
-  for (k = -(order-1); k < order; k += 2) {
-    for (l = 0; l < order; l++)
-      rho_coeff[l][m] = a[l][k];
-    for (l = 1; l < order; l++)
-      drho_coeff[l-1][m] = l*a[l][k];
-    m++;
-  }
-
-  memory->destroy2d_offset(a,-order);
-}
-
-/* ----------------------------------------------------------------------
-   Slab-geometry correction term to dampen inter-slab interactions between
-   periodically repeating slabs.  Yields good approximation to 2D Ewald if
-   adequate empty space is left between repeating slabs (J. Chem. Phys.
-   111, 3155).  Slabs defined here to be parallel to the xy plane. Also
-   extended to non-neutral systems (J. Chem. Phys. 131, 094107).
-------------------------------------------------------------------------- */
-
-void PPPM::slabcorr()
-{
-  // compute local contribution to global dipole moment
-
-  double *q = atom->q;
-  double **x = atom->x;
-  double zprd = domain->zprd;
-  int nlocal = atom->nlocal;
-
-  double dipole = 0.0;
-  for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
-
-  // sum local contributions to get global dipole moment
-
-  double dipole_all;
-  MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
-
-  // need to make non-neutral systems and/or
-  //  per-atom energy translationally invariant
-
-  double dipole_r2 = 0.0;
-  if (eflag_atom || fabs(qsum) > SMALL) {
-    for (int i = 0; i < nlocal; i++)
-      dipole_r2 += q[i]*x[i][2]*x[i][2];
-
-    // sum local contributions
-
-    double tmp;
-    MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-    dipole_r2 = tmp;
-  }
-
-  // compute corrections
-
-  const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
-    qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
-  const double qscale = force->qqrd2e * scale;
-
-  if (eflag_global) energy += qscale * e_slabcorr;
-
-  // per-atom energy
-
-  if (eflag_atom) {
-    double efact = qscale * MY_2PI/volume;
-    for (int i = 0; i < nlocal; i++)
-      eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
-        qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
-  }
-
-  // add on force corrections
-
-  double ffact = qscale * (-4.0*MY_PI/volume);
-  double **f = atom->f;
-
-  for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
-}
-
-/* ----------------------------------------------------------------------
-   perform and time the 1d FFTs required for N timesteps
-------------------------------------------------------------------------- */
-
-int PPPM::timing_1d(int n, double &time1d)
-{
-  double time1,time2;
-
-  for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
-
-  MPI_Barrier(world);
-  time1 = MPI_Wtime();
-
-  for (int i = 0; i < n; i++) {
-    fft1->timing1d(work1,nfft_both,1);
-    fft2->timing1d(work1,nfft_both,-1);
-    if (differentiation_flag != 1) {
-      fft2->timing1d(work1,nfft_both,-1);
-      fft2->timing1d(work1,nfft_both,-1);
-    }
-  }
-
-  MPI_Barrier(world);
-  time2 = MPI_Wtime();
-  time1d = time2 - time1;
-
-  if (differentiation_flag) return 2;
-  return 4;
-}
-
-/* ----------------------------------------------------------------------
-   perform and time the 3d FFTs required for N timesteps
-------------------------------------------------------------------------- */
-
-int PPPM::timing_3d(int n, double &time3d)
-{
-  double time1,time2;
-
-  for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
-
-  MPI_Barrier(world);
-  time1 = MPI_Wtime();
-
-  for (int i = 0; i < n; i++) {
-    fft1->compute(work1,work1,1);
-    fft2->compute(work1,work1,-1);
-    if (differentiation_flag != 1) {
-      fft2->compute(work1,work1,-1);
-      fft2->compute(work1,work1,-1);
-    }
-  }
-
-  MPI_Barrier(world);
-  time2 = MPI_Wtime();
-  time3d = time2 - time1;
-
-  if (differentiation_flag) return 2;
-  return 4;
-}
-
-/* ----------------------------------------------------------------------
-   memory usage of local arrays
-------------------------------------------------------------------------- */
-
-double PPPM::memory_usage()
-{
-  double bytes = nmax*3 * sizeof(double);
-  int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
-    (nzhi_out-nzlo_out+1);
-  if (differentiation_flag == 1) {
-    bytes += 2 * nbrick * sizeof(FFT_SCALAR);
-  } else {
-    bytes += 4 * nbrick * sizeof(FFT_SCALAR);
-  }
-  if (triclinic) bytes += 3 * nfft_both * sizeof(double);
-  bytes += 6 * nfft_both * sizeof(double);
-  bytes += nfft_both * sizeof(double);
-  bytes += nfft_both*5 * sizeof(FFT_SCALAR);
-
-  if (peratom_allocate_flag)
-    bytes += 6 * nbrick * sizeof(FFT_SCALAR);
-
-  if (group_allocate_flag) {
-    bytes += 2 * nbrick * sizeof(FFT_SCALAR);
-    bytes += 2 * nfft_both * sizeof(FFT_SCALAR);;
-  }
-
-  bytes += cg->memory_usage();
-
-  return bytes;
-}
-
-/* ----------------------------------------------------------------------
-   group-group interactions
- ------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   compute the PPPM total long-range force and energy for groups A and B
- ------------------------------------------------------------------------- */
-
-void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
-{
-  if (slabflag && triclinic)
-    error->all(FLERR,"Cannot (yet) use K-space slab "
-               "correction with compute group/group for triclinic systems");
-
-  if (differentiation_flag)
-    error->all(FLERR,"Cannot (yet) use kspace_modify "
-               "diff ad with compute group/group");
-
-  if (!group_allocate_flag) allocate_groups();
-
-  // convert atoms from box to lamda coords
-
-  if (triclinic == 0) boxlo = domain->boxlo;
-  else {
-    boxlo = domain->boxlo_lamda;
-    domain->x2lamda(atom->nlocal);
-  }
-
-  e2group = 0.0; //energy
-  f2group[0] = 0.0; //force in x-direction
-  f2group[1] = 0.0; //force in y-direction
-  f2group[2] = 0.0; //force in z-direction
-
-  // map my particle charge onto my local 3d density grid
-
-  make_rho_groups(groupbit_A,groupbit_B,AA_flag);
-
-  // all procs communicate density values from their ghost cells
-  //   to fully sum contribution in their 3d bricks
-  // remap from 3d decomposition to FFT decomposition
-
-  // temporarily store and switch pointers so we can
-  //  use brick2fft() for groups A and B (without
-  //  writing an additional function)
-
-  FFT_SCALAR ***density_brick_real = density_brick;
-  FFT_SCALAR *density_fft_real = density_fft;
-
-  // group A
-
-  density_brick = density_A_brick;
-  density_fft = density_A_fft;
-
-  cg->reverse_comm(this,REVERSE_RHO);
-  brick2fft();
-
-  // group B
-
-  density_brick = density_B_brick;
-  density_fft = density_B_fft;
-
-  cg->reverse_comm(this,REVERSE_RHO);
-  brick2fft();
-
-  // switch back pointers
-
-  density_brick = density_brick_real;
-  density_fft = density_fft_real;
-
-  // compute potential gradient on my FFT grid and
-  //   portion of group-group energy/force on this proc's FFT grid
-
-  poisson_groups(AA_flag);
-
-  const double qscale = force->qqrd2e * scale;
-
-  // total group A <--> group B energy
-  // self and boundary correction terms are in compute_group_group.cpp
-
-  double e2group_all;
-  MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world);
-  e2group = e2group_all;
-
-  e2group *= qscale*0.5*volume;
-
-  // total group A <--> group B force
-
-  double f2group_all[3];
-  MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world);
-
-  f2group[0] = qscale*volume*f2group_all[0];
-  f2group[1] = qscale*volume*f2group_all[1];
-  if (slabflag != 2) f2group[2] = qscale*volume*f2group_all[2];
-
-  // convert atoms back from lamda to box coords
-
-  if (triclinic) domain->lamda2x(atom->nlocal);
-
-  if (slabflag == 1)
-    slabcorr_groups(groupbit_A, groupbit_B, AA_flag);
-}
-
-/* ----------------------------------------------------------------------
- allocate group-group memory that depends on # of K-vectors and order
- ------------------------------------------------------------------------- */
-
-void PPPM::allocate_groups()
-{
-  group_allocate_flag = 1;
-
-  memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:density_A_brick");
-  memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:density_B_brick");
-  memory->create(density_A_fft,nfft_both,"pppm:density_A_fft");
-  memory->create(density_B_fft,nfft_both,"pppm:density_B_fft");
-}
-
-/* ----------------------------------------------------------------------
- deallocate group-group memory that depends on # of K-vectors and order
- ------------------------------------------------------------------------- */
-
-void PPPM::deallocate_groups()
-{
-  group_allocate_flag = 0;
-
-  memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy(density_A_fft);
-  memory->destroy(density_B_fft);
-}
-
-/* ----------------------------------------------------------------------
- create discretized "density" on section of global grid due to my particles
- density(x,y,z) = charge "density" at grid points of my 3d brick
- (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
- in global grid for group-group interactions
- ------------------------------------------------------------------------- */
-
-void PPPM::make_rho_groups(int groupbit_A, int groupbit_B, int AA_flag)
-{
-  int l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-
-  // clear 3d density arrays
-
-  memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0,
-         ngrid*sizeof(FFT_SCALAR));
-
-  memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0,
-         ngrid*sizeof(FFT_SCALAR));
-
-  // loop over my charges, add their contribution to nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-
-  double *q = atom->q;
-  double **x = atom->x;
-  int nlocal = atom->nlocal;
-  int *mask = atom->mask;
-
-  for (int i = 0; i < nlocal; i++) {
-
-    if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
-      if (AA_flag) continue;
-
-    if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
-
-      nx = part2grid[i][0];
-      ny = part2grid[i][1];
-      nz = part2grid[i][2];
-      dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-      dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-      dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-      compute_rho1d(dx,dy,dz);
-
-      z0 = delvolinv * q[i];
-      for (n = nlower; n <= nupper; n++) {
-        mz = n+nz;
-        y0 = z0*rho1d[2][n];
-        for (m = nlower; m <= nupper; m++) {
-          my = m+ny;
-          x0 = y0*rho1d[1][m];
-          for (l = nlower; l <= nupper; l++) {
-            mx = l+nx;
-
-            // group A
-
-            if (mask[i] & groupbit_A)
-              density_A_brick[mz][my][mx] += x0*rho1d[0][l];
-
-            // group B
-
-            if (mask[i] & groupbit_B)
-              density_B_brick[mz][my][mx] += x0*rho1d[0][l];
-          }
-        }
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   FFT-based Poisson solver for group-group interactions
- ------------------------------------------------------------------------- */
-
-void PPPM::poisson_groups(int AA_flag)
-{
-  int i,j,k,n;
-
-  // reuse memory (already declared)
-
-  FFT_SCALAR *work_A = work1;
-  FFT_SCALAR *work_B = work2;
-
-  // transform charge density (r -> k)
-
-  // group A
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work_A[n++] = density_A_fft[i];
-    work_A[n++] = ZEROF;
-  }
-
-  fft1->compute(work_A,work_A,1);
-
-  // group B
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work_B[n++] = density_B_fft[i];
-    work_B[n++] = ZEROF;
-  }
-
-  fft1->compute(work_B,work_B,1);
-
-  // group-group energy and force contribution,
-  //  keep everything in reciprocal space so
-  //  no inverse FFTs needed
-
-  double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
-  double s2 = scaleinv*scaleinv;
-
-  // energy
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    e2group += s2 * greensfn[i] *
-      (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]);
-    n += 2;
-  }
-
-  if (AA_flag) return;
-
-
-  // multiply by Green's function and s2
-  //  (only for work_A so it is not squared below)
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work_A[n++] *= s2 * greensfn[i];
-    work_A[n++] *= s2 * greensfn[i];
-  }
-
-  // triclinic system
-  
-  if (triclinic) {
-    poisson_groups_triclinic();
-    return;
-  }
-
-  double partial_group;
-
-  // force, x direction
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++)
-    for (j = nylo_fft; j <= nyhi_fft; j++)
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
-        f2group[0] += fkx[i] * partial_group;
-        n += 2;
-      }
-
-  // force, y direction
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++)
-    for (j = nylo_fft; j <= nyhi_fft; j++)
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
-        f2group[1] += fky[j] * partial_group;
-        n += 2;
-      }
-
-  // force, z direction
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++)
-    for (j = nylo_fft; j <= nyhi_fft; j++)
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
-        f2group[2] += fkz[k] * partial_group;
-        n += 2;
-      }
-}
-
-/* ----------------------------------------------------------------------
-   FFT-based Poisson solver for group-group interactions
-   for a triclinic system
- ------------------------------------------------------------------------- */
-
-void PPPM::poisson_groups_triclinic()
-{
-  int i,j,k,n;
-
-  // reuse memory (already declared)
-
-  FFT_SCALAR *work_A = work1;
-  FFT_SCALAR *work_B = work2;
-
-  double partial_group;
-
-  // force, x direction
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
-    f2group[0] += fkx[i] * partial_group;
-    n += 2;
-  }
-
-  // force, y direction
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
-    f2group[1] += fky[i] * partial_group;
-    n += 2;
-  }
-
-  // force, z direction
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
-    f2group[2] += fkz[i] * partial_group;
-    n += 2;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   Slab-geometry correction term to dampen inter-slab interactions between
-   periodically repeating slabs.  Yields good approximation to 2D Ewald if
-   adequate empty space is left between repeating slabs (J. Chem. Phys.
-   111, 3155).  Slabs defined here to be parallel to the xy plane. Also
-   extended to non-neutral systems (J. Chem. Phys. 131, 094107).
-------------------------------------------------------------------------- */
-
-void PPPM::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag)
-{
-  // compute local contribution to global dipole moment
-
-  double *q = atom->q;
-  double **x = atom->x;
-  double zprd = domain->zprd;
-  int *mask = atom->mask;
-  int nlocal = atom->nlocal;
-
-  double qsum_A = 0.0;
-  double qsum_B = 0.0;
-  double dipole_A = 0.0;
-  double dipole_B = 0.0;
-  double dipole_r2_A = 0.0;
-  double dipole_r2_B = 0.0;
-
-  for (int i = 0; i < nlocal; i++) {
-    if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
-      if (AA_flag) continue;
-
-    if (mask[i] & groupbit_A) { 
-      qsum_A += q[i];
-      dipole_A += q[i]*x[i][2];
-      dipole_r2_A += q[i]*x[i][2]*x[i][2];
-    }
-
-    if (mask[i] & groupbit_B) {
-      qsum_B += q[i];
-      dipole_B += q[i]*x[i][2];
-      dipole_r2_B += q[i]*x[i][2]*x[i][2];
-    }
-  }
-
-  // sum local contributions to get total charge and global dipole moment
-  //  for each group
-
-  double tmp;
-  MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  qsum_A = tmp;
-
-  MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  qsum_B = tmp;
-
-  MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  dipole_A = tmp;
-
-  MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  dipole_B = tmp;
-
-  MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  dipole_r2_A = tmp;
-
-  MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  dipole_r2_B = tmp;
-
-  // compute corrections
-
-  const double qscale = force->qqrd2e * scale;
-  const double efact = qscale * MY_2PI/volume;
-
-  e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B +
-    qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0);
-
-  // add on force corrections
-
-  const double ffact = qscale * (-4.0*MY_PI/volume);
-  f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A);
-}
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
+     per-atom energy/virial & group/group energy/force added by Stan Moore (BYU)
+     analytic diff (2 FFT) option added by Rolf Isele-Holder (Aachen University)
+     triclinic added by Stan Moore (SNL)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "string.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "math.h"
+#include "pppm.h"
+#include "atom.h"
+#include "comm.h"
+#include "commgrid.h"
+#include "neighbor.h"
+#include "force.h"
+#include "pair.h"
+#include "bond.h"
+#include "angle.h"
+#include "domain.h"
+#include "fft3d_wrap.h"
+#include "remap_wrap.h"
+#include "memory.h"
+#include "error.h"
+
+#include "math_const.h"
+#include "math_special.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+using namespace MathSpecial;
+
+#define MAXORDER 7
+#define OFFSET 16384
+#define SMALL 0.00001
+#define LARGE 10000.0
+#define EPS_HOC 1.0e-7
+
+enum{REVERSE_RHO};
+enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
+
+#ifdef FFT_SINGLE
+#define ZEROF 0.0f
+#define ONEF  1.0f
+#else
+#define ZEROF 0.0
+#define ONEF  1.0
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
+{
+  if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command");
+ 
+  pppmflag = 1;
+  group_group_enable = 1;
+
+  accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
+
+  nfactors = 3;
+  factors = new int[nfactors];
+  factors[0] = 2;
+  factors[1] = 3;
+  factors[2] = 5;
+
+  MPI_Comm_rank(world,&me);
+  MPI_Comm_size(world,&nprocs);
+
+  density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
+  density_fft = NULL;
+  u_brick = NULL;
+  v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
+  greensfn = NULL;
+  work1 = work2 = NULL;
+  vg = NULL;
+  fkx = fky = fkz = NULL;
+
+  sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = 
+    sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL;
+
+  density_A_brick = density_B_brick = NULL;
+  density_A_fft = density_B_fft = NULL;
+
+  gf_b = NULL;
+  rho1d = rho_coeff = drho1d = drho_coeff = NULL;
+
+  fft1 = fft2 = NULL;
+  remap = NULL;
+  cg = NULL;
+  cg_peratom = NULL;
+
+  nmax = 0;
+  part2grid = NULL;
+
+  peratom_allocate_flag = 0;
+  group_allocate_flag = 0;
+
+  // define acons coefficients for estimation of kspace errors
+  // see JCP 109, pg 7698 for derivation of coefficients
+  // higher order coefficients may be computed if needed
+
+  memory->create(acons,8,7,"pppm:acons");
+  acons[1][0] = 2.0 / 3.0;
+  acons[2][0] = 1.0 / 50.0;
+  acons[2][1] = 5.0 / 294.0;
+  acons[3][0] = 1.0 / 588.0;
+  acons[3][1] = 7.0 / 1440.0;
+  acons[3][2] = 21.0 / 3872.0;
+  acons[4][0] = 1.0 / 4320.0;
+  acons[4][1] = 3.0 / 1936.0;
+  acons[4][2] = 7601.0 / 2271360.0;
+  acons[4][3] = 143.0 / 28800.0;
+  acons[5][0] = 1.0 / 23232.0;
+  acons[5][1] = 7601.0 / 13628160.0;
+  acons[5][2] = 143.0 / 69120.0;
+  acons[5][3] = 517231.0 / 106536960.0;
+  acons[5][4] = 106640677.0 / 11737571328.0;
+  acons[6][0] = 691.0 / 68140800.0;
+  acons[6][1] = 13.0 / 57600.0;
+  acons[6][2] = 47021.0 / 35512320.0;
+  acons[6][3] = 9694607.0 / 2095994880.0;
+  acons[6][4] = 733191589.0 / 59609088000.0;
+  acons[6][5] = 326190917.0 / 11700633600.0;
+  acons[7][0] = 1.0 / 345600.0;
+  acons[7][1] = 3617.0 / 35512320.0;
+  acons[7][2] = 745739.0 / 838397952.0;
+  acons[7][3] = 56399353.0 / 12773376000.0;
+  acons[7][4] = 25091609.0 / 1560084480.0;
+  acons[7][5] = 1755948832039.0 / 36229939200000.0;
+  acons[7][6] = 4887769399.0 / 37838389248.0;
+}
+
+/* ----------------------------------------------------------------------
+   free all memory
+------------------------------------------------------------------------- */
+
+PPPM::~PPPM()
+{
+  delete [] factors;
+  deallocate();
+  if (peratom_allocate_flag) deallocate_peratom();
+  if (group_allocate_flag) deallocate_groups();
+  memory->destroy(part2grid);
+  memory->destroy(acons);
+}
+
+/* ----------------------------------------------------------------------
+   called once before run
+------------------------------------------------------------------------- */
+
+void PPPM::init()
+{
+  if (me == 0) {
+    if (screen) fprintf(screen,"PPPM initialization ...\n");
+    if (logfile) fprintf(logfile,"PPPM initialization ...\n");
+  }
+
+  // error check
+
+  triclinic_check();
+  if (domain->triclinic && differentiation_flag == 1)
+    error->all(FLERR,"Cannot (yet) use PPPM with triclinic box "
+               "and kspace_modify diff ad");
+  if (domain->triclinic && slabflag)
+    error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and "
+               "slab correction");
+  if (domain->dimension == 2) error->all(FLERR,
+                                         "Cannot use PPPM with 2d simulation");
+
+  if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
+
+  if (slabflag == 0 && domain->nonperiodic > 0)
+    error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM");
+  if (slabflag) {
+    if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
+        domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
+      error->all(FLERR,"Incorrect boundaries with slab PPPM");
+  }
+
+  if (order < 2 || order > MAXORDER) {
+    char str[128];
+    sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER);
+    error->all(FLERR,str);
+  }
+
+  // extract short-range Coulombic cutoff from pair style
+
+  triclinic = domain->triclinic;
+  scale = 1.0;
+
+  pair_check();
+
+  int itmp = 0;
+  double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
+  if (p_cutoff == NULL)
+    error->all(FLERR,"KSpace style is incompatible with Pair style");
+  cutoff = *p_cutoff;
+
+  // if kspace is TIP4P, extract TIP4P params from pair style
+  // bond/angle are not yet init(), so insure equilibrium request is valid
+
+  qdist = 0.0;
+
+  if (tip4pflag) {
+    double *p_qdist = (double *) force->pair->extract("qdist",itmp);
+    int *p_typeO = (int *) force->pair->extract("typeO",itmp);
+    int *p_typeH = (int *) force->pair->extract("typeH",itmp);
+    int *p_typeA = (int *) force->pair->extract("typeA",itmp);
+    int *p_typeB = (int *) force->pair->extract("typeB",itmp);
+    if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
+      error->all(FLERR,"KSpace style is incompatible with Pair style");
+    qdist = *p_qdist;
+    typeO = *p_typeO;
+    typeH = *p_typeH;
+    int typeA = *p_typeA;
+    int typeB = *p_typeB;
+
+    if (force->angle == NULL || force->bond == NULL ||
+        force->angle->setflag == NULL || force->bond->setflag == NULL)
+      error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
+    if (typeA < 1 || typeA > atom->nangletypes ||
+        force->angle->setflag[typeA] == 0)
+      error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P");
+    if (typeB < 1 || typeB > atom->nbondtypes ||
+        force->bond->setflag[typeB] == 0)
+      error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P");
+    double theta = force->angle->equilibrium_angle(typeA);
+    double blen = force->bond->equilibrium_distance(typeB);
+    alpha = qdist / (cos(0.5*theta) * blen);
+    if (domain->triclinic)
+      error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and TIP4P");
+  }
+
+  // compute qsum & qsqsum and warn if not charge-neutral
+
+  qsum = qsqsum = 0.0;
+  for (int i = 0; i < atom->nlocal; i++) {
+    qsum += atom->q[i];
+    qsqsum += atom->q[i]*atom->q[i];
+  }
+
+  double tmp;
+  MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  qsum = tmp;
+  MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  qsqsum = tmp;
+  q2 = qsqsum * force->qqrd2e;
+
+  if (qsqsum == 0.0)
+    error->all(FLERR,"Cannot use kspace solver on system with no charge");
+  if (fabs(qsum) > SMALL && me == 0) {
+    char str[128];
+    sprintf(str,"System is not charge neutral, net charge = %g",qsum);
+    error->warning(FLERR,str);
+  }
+
+  // set accuracy (force units) from accuracy_relative or accuracy_absolute
+
+  if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
+  else accuracy = accuracy_relative * two_charge_force;
+
+  // free all arrays previously allocated
+
+  deallocate();
+  if (peratom_allocate_flag) deallocate_peratom();
+  if (group_allocate_flag) deallocate_groups();
+
+  // setup FFT grid resolution and g_ewald
+  // normally one iteration thru while loop is all that is required
+  // if grid stencil does not extend beyond neighbor proc
+  //   or overlap is allowed, then done
+  // else reduce order and try again
+
+  int (*procneigh)[2] = comm->procneigh;
+
+  CommGrid *cgtmp = NULL;
+  int iteration = 0;
+
+  while (order >= minorder) {
+    if (iteration && me == 0)
+      error->warning(FLERR,"Reducing PPPM order b/c stencil extends "
+                     "beyond nearest neighbor processor");
+
+    if (stagger_flag && !differentiation_flag) compute_gf_denom();
+    set_grid_global();
+    set_grid_local();
+    if (overlap_allowed) break;
+
+    cgtmp = new CommGrid(lmp,world,1,1,
+                         nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+                         nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+                         procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                         procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+    cgtmp->ghost_notify();
+    if (!cgtmp->ghost_overlap()) break;
+    delete cgtmp;
+
+    order--;
+    iteration++;
+  }
+  
+  if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order");
+  if (!overlap_allowed && cgtmp->ghost_overlap())
+    error->all(FLERR,"PPPM grid stencil extends "
+               "beyond nearest neighbor processor");
+  if (cgtmp) delete cgtmp;
+
+  // adjust g_ewald
+
+  if (!gewaldflag) adjust_gewald();
+
+  // calculate the final accuracy
+
+  double estimated_accuracy = final_accuracy();
+
+  // print stats
+
+  int ngrid_max,nfft_both_max;
+  MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
+  MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
+
+  if (me == 0) {
+
+#ifdef FFT_SINGLE
+    const char fft_prec[] = "single";
+#else
+    const char fft_prec[] = "double";
+#endif
+
+    if (screen) {
+      fprintf(screen,"  G vector (1/distance) = %g\n",g_ewald);
+      fprintf(screen,"  grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
+      fprintf(screen,"  stencil order = %d\n",order);
+      fprintf(screen,"  estimated absolute RMS force accuracy = %g\n",
+              estimated_accuracy);
+      fprintf(screen,"  estimated relative force accuracy = %g\n",
+              estimated_accuracy/two_charge_force);
+      fprintf(screen,"  using %s precision FFTs\n",fft_prec);
+      fprintf(screen,"  3d grid and FFT values/proc = %d %d\n",
+              ngrid_max,nfft_both_max);
+    }
+    if (logfile) {
+      fprintf(logfile,"  G vector (1/distance) = %g\n",g_ewald);
+      fprintf(logfile,"  grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
+      fprintf(logfile,"  stencil order = %d\n",order);
+      fprintf(logfile,"  estimated absolute RMS force accuracy = %g\n",
+              estimated_accuracy);
+      fprintf(logfile,"  estimated relative force accuracy = %g\n",
+              estimated_accuracy/two_charge_force);
+      fprintf(logfile,"  using %s precision FFTs\n",fft_prec);
+      fprintf(logfile,"  3d grid and FFT values/proc = %d %d\n",
+              ngrid_max,nfft_both_max);
+    }
+  }
+
+  // allocate K-space dependent memory
+  // don't invoke allocate peratom() or group(), will be allocated when needed
+
+  allocate();
+  cg->ghost_notify();
+  cg->setup();
+
+  // pre-compute Green's function denomiator expansion
+  // pre-compute 1d charge distribution coefficients
+
+  compute_gf_denom();
+  if (differentiation_flag == 1) compute_sf_precoeff();
+  compute_rho_coeff();
+}
+
+/* ----------------------------------------------------------------------
+   adjust PPPM coeffs, called initially and whenever volume has changed
+------------------------------------------------------------------------- */
+
+void PPPM::setup()
+{
+  if (triclinic) {
+    setup_triclinic();
+    return;
+  }
+
+  int i,j,k,n;
+  double *prd;
+
+  // volume-dependent factors
+  // adjust z dimension for 2d slab PPPM
+  // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+  volume = xprd * yprd * zprd_slab;
+
+  delxinv = nx_pppm/xprd;
+  delyinv = ny_pppm/yprd;
+  delzinv = nz_pppm/zprd_slab;
+
+  delvolinv = delxinv*delyinv*delzinv;
+
+  double unitkx = (MY_2PI/xprd);
+  double unitky = (MY_2PI/yprd);
+  double unitkz = (MY_2PI/zprd_slab);
+
+  // fkx,fky,fkz for my FFT grid pts
+
+  double per;
+
+  for (i = nxlo_fft; i <= nxhi_fft; i++) {
+    per = i - nx_pppm*(2*i/nx_pppm);
+    fkx[i] = unitkx*per;
+  }
+
+  for (i = nylo_fft; i <= nyhi_fft; i++) {
+    per = i - ny_pppm*(2*i/ny_pppm);
+    fky[i] = unitky*per;
+  }
+
+  for (i = nzlo_fft; i <= nzhi_fft; i++) {
+    per = i - nz_pppm*(2*i/nz_pppm);
+    fkz[i] = unitkz*per;
+  }
+
+  // virial coefficients
+
+  double sqk,vterm;
+
+  n = 0;
+  for (k = nzlo_fft; k <= nzhi_fft; k++) {
+    for (j = nylo_fft; j <= nyhi_fft; j++) {
+      for (i = nxlo_fft; i <= nxhi_fft; i++) {
+        sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
+        if (sqk == 0.0) {
+          vg[n][0] = 0.0;
+          vg[n][1] = 0.0;
+          vg[n][2] = 0.0;
+          vg[n][3] = 0.0;
+          vg[n][4] = 0.0;
+          vg[n][5] = 0.0;
+        } else {
+          vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
+          vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
+          vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
+          vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
+          vg[n][3] = vterm*fkx[i]*fky[j];
+          vg[n][4] = vterm*fkx[i]*fkz[k];
+          vg[n][5] = vterm*fky[j]*fkz[k];
+        }
+        n++;
+      }
+    }
+  }
+
+  if (differentiation_flag == 1) compute_gf_ad();
+  else compute_gf_ik();
+}
+
+/* ----------------------------------------------------------------------
+   adjust PPPM coeffs, called initially and whenever volume has changed
+   for a triclinic system
+------------------------------------------------------------------------- */
+
+void PPPM::setup_triclinic()
+{
+  int i,j,k,n;
+  double *prd;
+
+  // volume-dependent factors
+  // adjust z dimension for 2d slab PPPM
+  // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
+
+  prd = domain->prd;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+  volume = xprd * yprd * zprd_slab;
+
+  // use lamda (0-1) coordinates
+
+  delxinv = nx_pppm;
+  delyinv = ny_pppm;
+  delzinv = nz_pppm;
+  delvolinv = delxinv*delyinv*delzinv/volume;
+
+  // fkx,fky,fkz for my FFT grid pts
+
+  double per_i,per_j,per_k;
+
+  n = 0;
+  for (k = nzlo_fft; k <= nzhi_fft; k++) {
+    per_k = k - nz_pppm*(2*k/nz_pppm);
+    for (j = nylo_fft; j <= nyhi_fft; j++) {
+      per_j = j - ny_pppm*(2*j/ny_pppm);
+      for (i = nxlo_fft; i <= nxhi_fft; i++) {
+        per_i = i - nx_pppm*(2*i/nx_pppm);
+
+        double unitk_lamda[3];
+        unitk_lamda[0] = 2.0*MY_PI*per_i;
+        unitk_lamda[1] = 2.0*MY_PI*per_j;
+        unitk_lamda[2] = 2.0*MY_PI*per_k;
+        x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
+        fkx[n] = unitk_lamda[0];
+        fky[n] = unitk_lamda[1];
+        fkz[n] = unitk_lamda[2];
+        n++;
+      }
+    }
+  }
+
+  // virial coefficients
+
+  double sqk,vterm;
+
+  for (n = 0; n < nfft; n++) {
+    sqk = fkx[n]*fkx[n] + fky[n]*fky[n] + fkz[n]*fkz[n];
+    if (sqk == 0.0) {
+      vg[n][0] = 0.0;
+      vg[n][1] = 0.0;
+      vg[n][2] = 0.0;
+      vg[n][3] = 0.0;
+      vg[n][4] = 0.0;
+      vg[n][5] = 0.0;
+    } else {
+      vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
+      vg[n][0] = 1.0 + vterm*fkx[n]*fkx[n];
+      vg[n][1] = 1.0 + vterm*fky[n]*fky[n];
+      vg[n][2] = 1.0 + vterm*fkz[n]*fkz[n];
+      vg[n][3] = vterm*fkx[n]*fky[n];
+      vg[n][4] = vterm*fkx[n]*fkz[n];
+      vg[n][5] = vterm*fky[n]*fkz[n];
+    }
+  }
+
+  compute_gf_ik_triclinic();
+}
+
+/* ----------------------------------------------------------------------
+   reset local grid arrays and communication stencils
+   called by fix balance b/c it changed sizes of processor sub-domains
+------------------------------------------------------------------------- */
+
+void PPPM::setup_grid()
+{
+  // free all arrays previously allocated
+
+  deallocate();
+  if (peratom_allocate_flag) deallocate_peratom();
+  if (group_allocate_flag) deallocate_groups();
+
+  // reset portion of global grid that each proc owns
+
+  set_grid_local();
+
+  // reallocate K-space dependent memory
+  // check if grid communication is now overlapping if not allowed
+  // don't invoke allocate peratom() or group(), will be allocated when needed
+
+  allocate();
+
+  cg->ghost_notify();
+  if (overlap_allowed == 0 && cg->ghost_overlap())
+    error->all(FLERR,"PPPM grid stencil extends "
+               "beyond nearest neighbor processor");
+  cg->setup();
+
+  // pre-compute Green's function denomiator expansion
+  // pre-compute 1d charge distribution coefficients
+
+  compute_gf_denom();
+  if (differentiation_flag == 1) compute_sf_precoeff();
+  compute_rho_coeff();
+
+  // pre-compute volume-dependent coeffs
+
+  setup();
+}
+
+/* ----------------------------------------------------------------------
+   compute the PPPM long-range force, energy, virial
+------------------------------------------------------------------------- */
+
+void PPPM::compute(int eflag, int vflag)
+{
+  int i,j;
+
+  // set energy/virial flags
+  // invoke allocate_peratom() if needed for first time
+
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = evflag_atom = eflag_global = vflag_global =
+         eflag_atom = vflag_atom = 0;
+
+  if (evflag_atom && !peratom_allocate_flag) {
+    allocate_peratom();
+    cg_peratom->ghost_notify();
+    cg_peratom->setup();
+  }
+
+  // convert atoms from box to lamda coords
+
+  if (triclinic == 0) boxlo = domain->boxlo;
+  else {
+    boxlo = domain->boxlo_lamda;
+    domain->x2lamda(atom->nlocal);
+  }
+
+  // extend size of per-atom arrays if necessary
+
+  if (atom->nlocal > nmax) {
+    memory->destroy(part2grid);
+    nmax = atom->nmax;
+    memory->create(part2grid,nmax,3,"pppm:part2grid");
+  }
+
+  // find grid points for all my particles
+  // map my particle charge onto my local 3d density grid
+
+  particle_map();
+  make_rho();
+
+  // all procs communicate density values from their ghost cells
+  //   to fully sum contribution in their 3d bricks
+  // remap from 3d decomposition to FFT decomposition
+
+  cg->reverse_comm(this,REVERSE_RHO);
+  brick2fft();
+
+  // compute potential gradient on my FFT grid and
+  //   portion of e_long on this proc's FFT grid
+  // return gradients (electric fields) in 3d brick decomposition
+  // also performs per-atom calculations via poisson_peratom()
+
+  poisson();
+
+  // all procs communicate E-field values
+  // to fill ghost cells surrounding their 3d bricks
+
+  if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
+  else cg->forward_comm(this,FORWARD_IK);
+
+  // extra per-atom energy/virial communication
+
+  if (evflag_atom) {
+    if (differentiation_flag == 1 && vflag_atom) 
+      cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
+    else if (differentiation_flag == 0)
+      cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
+  }
+
+  // calculate the force on my particles
+
+  fieldforce();
+
+  // extra per-atom energy/virial communication
+
+  if (evflag_atom) fieldforce_peratom();
+
+  // sum global energy across procs and add in volume-dependent term
+
+  const double qscale = force->qqrd2e * scale;
+
+  if (eflag_global) {
+    double energy_all;
+    MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
+    energy = energy_all;
+
+    energy *= 0.5*volume;
+    energy -= g_ewald*qsqsum/MY_PIS +
+      MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
+    energy *= qscale;
+  }
+
+  // sum global virial across procs
+
+  if (vflag_global) {
+    double virial_all[6];
+    MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
+    for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
+  }
+
+  // per-atom energy/virial
+  // energy includes self-energy correction
+  // notal accounts for TIP4P tallying eatom/vatom for ghost atoms
+
+  if (evflag_atom) {
+    double *q = atom->q;
+    int nlocal = atom->nlocal;
+    int ntotal = nlocal;
+    if (tip4pflag) ntotal += atom->nghost;
+
+    if (eflag_atom) {
+      for (i = 0; i < nlocal; i++) {
+        eatom[i] *= 0.5;
+        eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
+          (g_ewald*g_ewald*volume);
+        eatom[i] *= qscale;
+      }
+      for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5*qscale;
+    }
+
+    if (vflag_atom) {
+      for (i = 0; i < ntotal; i++)
+        for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
+    }
+  }
+
+  // 2d slab correction
+
+  if (slabflag == 1) slabcorr();
+
+  // convert atoms back from lamda to box coords
+
+  if (triclinic) domain->lamda2x(atom->nlocal);
+}
+
+/* ----------------------------------------------------------------------
+   allocate memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPM::allocate()
+{
+  memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:density_brick");
+
+  memory->create(density_fft,nfft_both,"pppm:density_fft");
+  memory->create(greensfn,nfft_both,"pppm:greensfn");
+  memory->create(work1,2*nfft_both,"pppm:work1");
+  memory->create(work2,2*nfft_both,"pppm:work2");
+  memory->create(vg,nfft_both,6,"pppm:vg");
+
+  if (triclinic == 0) {
+    memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx");
+    memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky");
+    memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz");
+  } else {
+    memory->create(fkx,nfft_both,"pppm:fkx");
+    memory->create(fky,nfft_both,"pppm:fky");
+    memory->create(fkz,nfft_both,"pppm:fkz");
+  }
+
+  if (differentiation_flag == 1) {
+    memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:u_brick");
+
+    memory->create(sf_precoeff1,nfft_both,"pppm:sf_precoeff1");
+    memory->create(sf_precoeff2,nfft_both,"pppm:sf_precoeff2");
+    memory->create(sf_precoeff3,nfft_both,"pppm:sf_precoeff3");
+    memory->create(sf_precoeff4,nfft_both,"pppm:sf_precoeff4");
+    memory->create(sf_precoeff5,nfft_both,"pppm:sf_precoeff5");
+    memory->create(sf_precoeff6,nfft_both,"pppm:sf_precoeff6");
+
+  } else {
+    memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                            nxlo_out,nxhi_out,"pppm:vdx_brick");
+    memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                            nxlo_out,nxhi_out,"pppm:vdy_brick");
+    memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                            nxlo_out,nxhi_out,"pppm:vdz_brick");
+  }
+
+  // summation coeffs
+
+  order_allocated = order;
+  if (!stagger_flag) memory->create(gf_b,order,"pppm:gf_b");
+  memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
+  memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d");
+  memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
+  memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,
+                          "pppm:drho_coeff");
+
+  // create 2 FFTs and a Remap
+  // 1st FFT keeps data in FFT decompostion
+  // 2nd FFT returns data in 3d brick decomposition
+  // remap takes data from 3d brick to FFT decomposition
+
+  int tmp;
+
+  fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+                   nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+                   nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+                   0,0,&tmp);
+
+  fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+                   nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+                   nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+                   0,0,&tmp);
+
+  remap = new Remap(lmp,world,
+                    nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+                    nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+                    1,0,0,FFT_PRECISION);
+
+  // create ghost grid object for rho and electric field communication
+
+  int (*procneigh)[2] = comm->procneigh;
+
+  if (differentiation_flag == 1)
+    cg = new CommGrid(lmp,world,1,1,
+                      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+                      nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+  else
+    cg = new CommGrid(lmp,world,3,1,
+                      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+                      nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+}
+
+/* ----------------------------------------------------------------------
+   deallocate memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPM::deallocate()
+{
+  memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
+
+  if (differentiation_flag == 1) {
+    memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
+    memory->destroy(sf_precoeff1);
+    memory->destroy(sf_precoeff2);
+    memory->destroy(sf_precoeff3);
+    memory->destroy(sf_precoeff4);
+    memory->destroy(sf_precoeff5);
+    memory->destroy(sf_precoeff6);
+  } else {
+    memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
+    memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
+    memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
+  }
+
+  memory->destroy(density_fft);
+  memory->destroy(greensfn);
+  memory->destroy(work1);
+  memory->destroy(work2);
+  memory->destroy(vg);
+
+  if (triclinic == 0) {
+    memory->destroy1d_offset(fkx,nxlo_fft);
+    memory->destroy1d_offset(fky,nylo_fft);
+    memory->destroy1d_offset(fkz,nzlo_fft);
+  } else {
+    memory->destroy(fkx);
+    memory->destroy(fky);
+    memory->destroy(fkz);
+  }
+
+  memory->destroy(gf_b);
+  if (stagger_flag) gf_b = NULL;
+  memory->destroy2d_offset(rho1d,-order_allocated/2);
+  memory->destroy2d_offset(drho1d,-order_allocated/2);
+  memory->destroy2d_offset(rho_coeff,(1-order_allocated)/2);
+  memory->destroy2d_offset(drho_coeff,(1-order_allocated)/2);
+
+  delete fft1;
+  delete fft2;
+  delete remap;
+  delete cg;
+}
+
+/* ----------------------------------------------------------------------
+   allocate per-atom memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPM::allocate_peratom()
+{
+  peratom_allocate_flag = 1;
+
+  if (differentiation_flag != 1)
+    memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                            nxlo_out,nxhi_out,"pppm:u_brick");
+
+  memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:v0_brick");
+
+  memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:v1_brick");
+  memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:v2_brick");
+  memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:v3_brick");
+  memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:v4_brick");
+  memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:v5_brick");
+
+  // create ghost grid object for rho and electric field communication
+
+  int (*procneigh)[2] = comm->procneigh;
+
+  if (differentiation_flag == 1)
+    cg_peratom =
+      new CommGrid(lmp,world,6,1,
+                   nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+                   nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+                   procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                   procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+  else
+    cg_peratom =
+      new CommGrid(lmp,world,7,1,
+                   nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+                   nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+                   procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                   procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+}
+
+/* ----------------------------------------------------------------------
+   deallocate per-atom memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPM::deallocate_peratom()
+{
+  peratom_allocate_flag = 0;
+
+  memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out);
+
+  if (differentiation_flag != 1)
+    memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
+
+  delete cg_peratom;
+}
+
+/* ----------------------------------------------------------------------
+   set global size of PPPM grid = nx,ny,nz_pppm
+   used for charge accumulation, FFTs, and electric field interpolation
+------------------------------------------------------------------------- */
+
+void PPPM::set_grid_global()
+{
+  // use xprd,yprd,zprd (even if triclinic, and then scale later)
+  // adjust z dimension for 2d slab PPPM
+  // 3d PPPM just uses zprd since slab_volfactor = 1.0
+
+  double xprd = domain->xprd;
+  double yprd = domain->yprd;
+  double zprd = domain->zprd;
+  double zprd_slab = zprd*slab_volfactor;
+
+  // make initial g_ewald estimate
+  // based on desired accuracy and real space cutoff
+  // fluid-occupied volume used to estimate real-space error
+  // zprd used rather than zprd_slab
+
+  double h;
+  bigint natoms = atom->natoms;
+
+  if (!gewaldflag) {
+    if (accuracy <= 0.0)
+      error->all(FLERR,"KSpace accuracy must be > 0");
+    g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
+    if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff;
+    else g_ewald = sqrt(-log(g_ewald)) / cutoff;
+  }
+
+  // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
+  // nz_pppm uses extended zprd_slab instead of zprd
+  // reduce it until accuracy target is met
+
+  if (!gridflag) {
+
+    if (differentiation_flag == 1 || stagger_flag) {
+
+      h = h_x = h_y = h_z = 4.0/g_ewald;
+      int count = 0;
+      while (1) {
+
+        // set grid dimension
+        nx_pppm = static_cast<int> (xprd/h_x);
+        ny_pppm = static_cast<int> (yprd/h_y);
+        nz_pppm = static_cast<int> (zprd_slab/h_z);
+
+        if (nx_pppm <= 1) nx_pppm = 2;
+        if (ny_pppm <= 1) ny_pppm = 2;
+        if (nz_pppm <= 1) nz_pppm = 2;
+
+        //set local grid dimension
+        int npey_fft,npez_fft;
+        if (nz_pppm >= nprocs) {
+          npey_fft = 1;
+          npez_fft = nprocs;
+        } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
+
+        int me_y = me % npey_fft;
+        int me_z = me / npey_fft;
+
+        nxlo_fft = 0;
+        nxhi_fft = nx_pppm - 1;
+        nylo_fft = me_y*ny_pppm/npey_fft;
+        nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
+        nzlo_fft = me_z*nz_pppm/npez_fft;
+        nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
+
+        double df_kspace = compute_df_kspace();
+
+        count++;
+
+        // break loop if the accuracy has been reached or
+        // too many loops have been performed
+
+        if (df_kspace <= accuracy) break;
+        if (count > 500) error->all(FLERR, "Could not compute grid size");
+        h *= 0.95;
+        h_x = h_y = h_z = h;
+      }
+
+    } else {
+
+      double err;
+      h_x = h_y = h_z = 1.0/g_ewald;
+
+      nx_pppm = static_cast<int> (xprd/h_x) + 1;
+      ny_pppm = static_cast<int> (yprd/h_y) + 1;
+      nz_pppm = static_cast<int> (zprd_slab/h_z) + 1;
+
+      err = estimate_ik_error(h_x,xprd,natoms);
+      while (err > accuracy) {
+        err = estimate_ik_error(h_x,xprd,natoms);
+        nx_pppm++;
+        h_x = xprd/nx_pppm;
+      }
+
+      err = estimate_ik_error(h_y,yprd,natoms);
+      while (err > accuracy) {
+        err = estimate_ik_error(h_y,yprd,natoms);
+        ny_pppm++;
+        h_y = yprd/ny_pppm;
+      }
+
+      err = estimate_ik_error(h_z,zprd_slab,natoms);
+      while (err > accuracy) {
+        err = estimate_ik_error(h_z,zprd_slab,natoms);
+        nz_pppm++;
+        h_z = zprd_slab/nz_pppm;
+      }
+    }
+
+    // scale grid for triclinic skew
+    
+    if (triclinic) {
+      double tmp[3];
+      tmp[0] = nx_pppm/xprd;
+      tmp[1] = ny_pppm/yprd;
+      tmp[2] = nz_pppm/zprd;
+      lamda2xT(&tmp[0],&tmp[0]);
+      nx_pppm = static_cast<int>(tmp[0]) + 1;
+      ny_pppm = static_cast<int>(tmp[1]) + 1;
+      nz_pppm = static_cast<int>(tmp[2]) + 1;
+    }
+  }
+
+  // boost grid size until it is factorable
+
+  while (!factorable(nx_pppm)) nx_pppm++;
+  while (!factorable(ny_pppm)) ny_pppm++;
+  while (!factorable(nz_pppm)) nz_pppm++;
+
+  if (triclinic == 0) {
+    h_x = xprd/nx_pppm;
+    h_y = yprd/ny_pppm;
+    h_z = zprd_slab/nz_pppm;
+  } else {
+    double tmp[3];
+    tmp[0] = nx_pppm;
+    tmp[1] = ny_pppm;
+    tmp[2] = nz_pppm;
+    x2lamdaT(&tmp[0],&tmp[0]);
+    h_x = 1.0/tmp[0];
+    h_y = 1.0/tmp[1];
+    h_z = 1.0/tmp[2];
+  }
+
+  if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
+    error->all(FLERR,"PPPM grid is too large");
+}
+
+/* ----------------------------------------------------------------------
+   check if all factors of n are in list of factors
+   return 1 if yes, 0 if no
+------------------------------------------------------------------------- */
+
+int PPPM::factorable(int n)
+{
+  int i;
+
+  while (n > 1) {
+    for (i = 0; i < nfactors; i++) {
+      if (n % factors[i] == 0) {
+        n /= factors[i];
+        break;
+      }
+    }
+    if (i == nfactors) return 0;
+  }
+
+  return 1;
+}
+
+/* ----------------------------------------------------------------------
+   compute estimated kspace force error
+------------------------------------------------------------------------- */
+
+double PPPM::compute_df_kspace()
+{
+  double xprd = domain->xprd;
+  double yprd = domain->yprd;
+  double zprd = domain->zprd;
+  double zprd_slab = zprd*slab_volfactor;
+  bigint natoms = atom->natoms;
+  double df_kspace = 0.0;
+  if (differentiation_flag == 1 || stagger_flag) {
+    double qopt = compute_qopt();
+    df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
+  } else {
+    double lprx = estimate_ik_error(h_x,xprd,natoms);
+    double lpry = estimate_ik_error(h_y,yprd,natoms);
+    double lprz = estimate_ik_error(h_z,zprd_slab,natoms);
+    df_kspace = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
+  }
+  return df_kspace;
+}
+
+/* ----------------------------------------------------------------------
+   compute qopt
+------------------------------------------------------------------------- */
+
+double PPPM::compute_qopt()
+{
+  double qopt = 0.0;
+  double *prd = domain->prd;
+  
+  const double xprd = prd[0];
+  const double yprd = prd[1];
+  const double zprd = prd[2];
+  const double zprd_slab = zprd*slab_volfactor;
+  volume = xprd * yprd * zprd_slab;
+
+  const double unitkx = (MY_2PI/xprd);
+  const double unitky = (MY_2PI/yprd);
+  const double unitkz = (MY_2PI/zprd_slab);
+
+  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+  double u1, u2, sqk;
+  double sum1,sum2,sum3,sum4,dot2;
+
+  int k,l,m,nx,ny,nz;
+  const int twoorder = 2*order;
+
+  for (m = nzlo_fft; m <= nzhi_fft; m++) {
+    const int mper = m - nz_pppm*(2*m/nz_pppm);
+
+    for (l = nylo_fft; l <= nyhi_fft; l++) {
+      const int lper = l - ny_pppm*(2*l/ny_pppm);
+
+      for (k = nxlo_fft; k <= nxhi_fft; k++) {
+        const int kper = k - nx_pppm*(2*k/nx_pppm);
+
+        sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
+
+        if (sqk != 0.0) {
+
+          sum1 = 0.0;
+          sum2 = 0.0;
+          sum3 = 0.0;
+          sum4 = 0.0;
+          for (nx = -2; nx <= 2; nx++) {
+            qx = unitkx*(kper+nx_pppm*nx);
+            sx = exp(-0.25*square(qx/g_ewald));
+            argx = 0.5*qx*xprd/nx_pppm;
+            wx = powsinxx(argx,twoorder);
+            qx *= qx;
+
+            for (ny = -2; ny <= 2; ny++) {
+              qy = unitky*(lper+ny_pppm*ny);
+              sy = exp(-0.25*square(qy/g_ewald));
+              argy = 0.5*qy*yprd/ny_pppm;
+              wy = powsinxx(argy,twoorder);
+              qy *= qy;
+
+              for (nz = -2; nz <= 2; nz++) {
+                qz = unitkz*(mper+nz_pppm*nz);
+                sz = exp(-0.25*square(qz/g_ewald));
+                argz = 0.5*qz*zprd_slab/nz_pppm;
+                wz = powsinxx(argz,twoorder);
+                qz *= qz;
+
+                dot2 = qx+qy+qz;
+                u1   = sx*sy*sz;
+                u2   = wx*wy*wz;
+                sum1 += u1*u1/dot2*MY_4PI*MY_4PI;
+                sum2 += u1 * u2 * MY_4PI;
+                sum3 += u2;
+                sum4 += dot2*u2;
+              }
+            }
+          }
+          sum2 *= sum2;
+          qopt += sum1 - sum2/(sum3*sum4);
+        }
+      }
+    }
+  }
+  double qopt_all;
+  MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
+  return qopt_all;
+}
+
+/* ----------------------------------------------------------------------
+   estimate kspace force error for ik method
+------------------------------------------------------------------------- */
+
+double PPPM::estimate_ik_error(double h, double prd, bigint natoms)
+{
+  double sum = 0.0;
+  for (int m = 0; m < order; m++)
+    sum += acons[order][m] * pow(h*g_ewald,2.0*m);
+  double value = q2 * pow(h*g_ewald,(double)order) *
+    sqrt(g_ewald*prd*sqrt(MY_2PI)*sum/natoms) / (prd*prd);
+
+  return value;
+}
+
+/* ----------------------------------------------------------------------
+   adjust the g_ewald parameter to near its optimal value
+   using a Newton-Raphson solver
+------------------------------------------------------------------------- */
+
+void PPPM::adjust_gewald()
+{
+  double dx;
+
+  for (int i = 0; i < LARGE; i++) {
+    dx = newton_raphson_f() / derivf();
+    g_ewald -= dx;
+    if (fabs(newton_raphson_f()) < SMALL) return;
+  }
+
+  char str[128];
+  sprintf(str, "Could not compute g_ewald");
+  error->all(FLERR, str);
+}
+
+/* ----------------------------------------------------------------------
+ Calculate f(x) using Newton-Raphson solver
+ ------------------------------------------------------------------------- */
+
+double PPPM::newton_raphson_f()
+{
+  double xprd = domain->xprd;
+  double yprd = domain->yprd;
+  double zprd = domain->zprd;
+  bigint natoms = atom->natoms;
+
+  double df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) /
+       sqrt(natoms*cutoff*xprd*yprd*zprd);
+
+  double df_kspace = compute_df_kspace();
+
+  return df_rspace - df_kspace;
+}
+
+/* ----------------------------------------------------------------------
+ Calculate numerical derivative f'(x) using forward difference
+ [f(x + h) - f(x)] / h
+ ------------------------------------------------------------------------- */
+
+double PPPM::derivf()
+{
+  double h = 0.000001;  //Derivative step-size
+  double df,f1,f2,g_ewald_old;
+
+  f1 = newton_raphson_f();
+  g_ewald_old = g_ewald;
+  g_ewald += h;
+  f2 = newton_raphson_f();
+  g_ewald = g_ewald_old;
+  df = (f2 - f1)/h;
+
+  return df;
+}
+
+/* ----------------------------------------------------------------------
+   Calculate the final estimate of the accuracy
+------------------------------------------------------------------------- */
+
+double PPPM::final_accuracy()
+{
+  double xprd = domain->xprd;
+  double yprd = domain->yprd;
+  double zprd = domain->zprd;
+  double zprd_slab = zprd*slab_volfactor;
+  bigint natoms = atom->natoms;
+
+  double df_kspace = compute_df_kspace();
+  double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd);
+  double df_rspace = 2.0 * q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff);
+  double df_table = estimate_table_accuracy(q2_over_sqrt,df_rspace);
+  double estimated_accuracy = sqrt(df_kspace*df_kspace + df_rspace*df_rspace +
+   df_table*df_table);
+
+  return estimated_accuracy;
+}
+
+/* ----------------------------------------------------------------------
+   set local subset of PPPM/FFT grid that I own
+   n xyz lo/hi in = 3d brick that I own (inclusive)
+   n xyz lo/hi out = 3d brick + ghost cells in 6 directions (inclusive)
+   n xyz lo/hi fft = FFT columns that I own (all of x dim, 2d decomp in yz)
+------------------------------------------------------------------------- */
+
+void PPPM::set_grid_local()
+{
+  // global indices of PPPM grid range from 0 to N-1
+  // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
+  //   global PPPM grid that I own without ghost cells
+  // for slab PPPM, assign z grid as if it were not extended
+
+  nxlo_in = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_pppm);
+  nxhi_in = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
+
+  nylo_in = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_pppm);
+  nyhi_in = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
+
+  nzlo_in = static_cast<int>
+      (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor);
+  nzhi_in = static_cast<int>
+      (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1;
+
+  // nlower,nupper = stencil size for mapping particles to PPPM grid
+
+  nlower = -(order-1)/2;
+  nupper = order/2;
+
+  // shift values for particle <-> grid mapping
+  // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
+
+  if (order % 2) shift = OFFSET + 0.5;
+  else shift = OFFSET;
+  if (order % 2) shiftone = 0.0;
+  else shiftone = 0.5;
+
+  // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
+  //   global PPPM grid that my particles can contribute charge to
+  // effectively nlo_in,nhi_in + ghost cells
+  // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
+  //           position a particle in my box can be at
+  // dist[3] = particle position bound = subbox + skin/2.0 + qdist
+  //   qdist = offset due to TIP4P fictitious charge
+  //   convert to triclinic if necessary
+  // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
+  // for slab PPPM, assign z grid as if it were not extended
+
+  double *prd,*sublo,*subhi;
+
+  if (triclinic == 0) {
+    prd = domain->prd;
+    boxlo = domain->boxlo;
+    sublo = domain->sublo;
+    subhi = domain->subhi;
+  } else {
+    prd = domain->prd_lamda;
+    boxlo = domain->boxlo_lamda;
+    sublo = domain->sublo_lamda;
+    subhi = domain->subhi_lamda;
+  }
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+
+  double dist[3];
+  double cuthalf = 0.5*neighbor->skin + qdist;
+  if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
+  else kspacebbox(cuthalf,&dist[0]);
+
+  int nlo,nhi;
+
+  nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) *
+                            nx_pppm/xprd + shift) - OFFSET;
+  nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) *
+                            nx_pppm/xprd + shift) - OFFSET;
+  nxlo_out = nlo + nlower;
+  nxhi_out = nhi + nupper;
+
+  nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) *
+                            ny_pppm/yprd + shift) - OFFSET;
+  nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) *
+                            ny_pppm/yprd + shift) - OFFSET;
+  nylo_out = nlo + nlower;
+  nyhi_out = nhi + nupper;
+
+  nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) *
+                            nz_pppm/zprd_slab + shift) - OFFSET;
+  nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) *
+                            nz_pppm/zprd_slab + shift) - OFFSET;
+  nzlo_out = nlo + nlower;
+  nzhi_out = nhi + nupper;
+
+  if (stagger_flag) {
+    nxhi_out++;
+    nyhi_out++;
+    nzhi_out++;
+  }
+
+  // for slab PPPM, change the grid boundary for processors at +z end
+  //   to include the empty volume between periodically repeating slabs
+  // for slab PPPM, want charge data communicated from -z proc to +z proc,
+  //   but not vice versa, also want field data communicated from +z proc to
+  //   -z proc, but not vice versa
+  // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells)
+  // also insure no other procs use ghost cells beyond +z limit
+
+  if (slabflag == 1) {
+    if (comm->myloc[2] == comm->procgrid[2]-1)
+      nzhi_in = nzhi_out = nz_pppm - 1;
+    nzhi_out = MIN(nzhi_out,nz_pppm-1);
+  }
+    
+  // decomposition of FFT mesh
+  // global indices range from 0 to N-1
+  // proc owns entire x-dimension, clumps of columns in y,z dimensions
+  // npey_fft,npez_fft = # of procs in y,z dims
+  // if nprocs is small enough, proc can own 1 or more entire xy planes,
+  //   else proc owns 2d sub-blocks of yz plane
+  // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
+  // nlo_fft,nhi_fft = lower/upper limit of the section
+  //   of the global FFT mesh that I own
+
+  int npey_fft,npez_fft;
+  if (nz_pppm >= nprocs) {
+    npey_fft = 1;
+    npez_fft = nprocs;
+  } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
+
+  int me_y = me % npey_fft;
+  int me_z = me / npey_fft;
+
+  nxlo_fft = 0;
+  nxhi_fft = nx_pppm - 1;
+  nylo_fft = me_y*ny_pppm/npey_fft;
+  nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
+  nzlo_fft = me_z*nz_pppm/npez_fft;
+  nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
+
+  // PPPM grid pts owned by this proc, including ghosts
+
+  ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
+    (nzhi_out-nzlo_out+1);
+
+  // FFT grids owned by this proc, without ghosts
+  // nfft = FFT points in FFT decomposition on this proc
+  // nfft_brick = FFT points in 3d brick-decomposition on this proc
+  // nfft_both = greater of 2 values
+
+  nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
+    (nzhi_fft-nzlo_fft+1);
+  int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
+    (nzhi_in-nzlo_in+1);
+  nfft_both = MAX(nfft,nfft_brick);
+}
+
+/* ----------------------------------------------------------------------
+   pre-compute Green's function denominator expansion coeffs, Gamma(2n)
+------------------------------------------------------------------------- */
+
+void PPPM::compute_gf_denom()
+{
+  int k,l,m;
+
+  for (l = 1; l < order; l++) gf_b[l] = 0.0;
+  gf_b[0] = 1.0;
+
+  for (m = 1; m < order; m++) {
+    for (l = m; l > 0; l--)
+      gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1));
+    gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5));
+  }
+
+  bigint ifact = 1;
+  for (k = 1; k < 2*order; k++) ifact *= k;
+  double gaminv = 1.0/ifact;
+  for (l = 0; l < order; l++) gf_b[l] *= gaminv;
+}
+
+/* ----------------------------------------------------------------------
+   pre-compute modified (Hockney-Eastwood) Coulomb Green's function
+------------------------------------------------------------------------- */
+
+void PPPM::compute_gf_ik()
+{
+  const double * const prd = domain->prd;
+
+  const double xprd = prd[0];
+  const double yprd = prd[1];
+  const double zprd = prd[2];
+  const double zprd_slab = zprd*slab_volfactor;
+  const double unitkx = (MY_2PI/xprd);
+  const double unitky = (MY_2PI/yprd);
+  const double unitkz = (MY_2PI/zprd_slab);
+
+  double snx,sny,snz;
+  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+  double sum1,dot1,dot2;
+  double numerator,denominator;
+  double sqk;
+
+  int k,l,m,n,nx,ny,nz,kper,lper,mper;
+
+  const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
+                                    pow(-log(EPS_HOC),0.25));
+  const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
+                                    pow(-log(EPS_HOC),0.25));
+  const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
+                                    pow(-log(EPS_HOC),0.25));
+  const int twoorder = 2*order;
+
+  n = 0;
+  for (m = nzlo_fft; m <= nzhi_fft; m++) {
+    mper = m - nz_pppm*(2*m/nz_pppm);
+    snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
+
+    for (l = nylo_fft; l <= nyhi_fft; l++) {
+      lper = l - ny_pppm*(2*l/ny_pppm);
+      sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
+
+      for (k = nxlo_fft; k <= nxhi_fft; k++) {
+        kper = k - nx_pppm*(2*k/nx_pppm);
+        snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
+
+        sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
+
+        if (sqk != 0.0) {
+          numerator = 12.5663706/sqk;
+          denominator = gf_denom(snx,sny,snz);
+          sum1 = 0.0;
+
+          for (nx = -nbx; nx <= nbx; nx++) {
+            qx = unitkx*(kper+nx_pppm*nx);
+            sx = exp(-0.25*square(qx/g_ewald));
+            argx = 0.5*qx*xprd/nx_pppm;
+            wx = powsinxx(argx,twoorder);
+
+            for (ny = -nby; ny <= nby; ny++) {
+              qy = unitky*(lper+ny_pppm*ny);
+              sy = exp(-0.25*square(qy/g_ewald));
+              argy = 0.5*qy*yprd/ny_pppm;
+              wy = powsinxx(argy,twoorder);
+
+              for (nz = -nbz; nz <= nbz; nz++) {
+                qz = unitkz*(mper+nz_pppm*nz);
+                sz = exp(-0.25*square(qz/g_ewald));
+                argz = 0.5*qz*zprd_slab/nz_pppm;
+                wz = powsinxx(argz,twoorder);
+
+                dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
+                dot2 = qx*qx+qy*qy+qz*qz;
+                sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
+              }
+            }
+          }
+          greensfn[n++] = numerator*sum1/denominator;
+        } else greensfn[n++] = 0.0;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pre-compute modified (Hockney-Eastwood) Coulomb Green's function
+   for a triclinic system
+------------------------------------------------------------------------- */
+
+void PPPM::compute_gf_ik_triclinic()
+{
+  double snx,sny,snz;
+  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+  double sum1,dot1,dot2;
+  double numerator,denominator;
+  double sqk;
+
+  int k,l,m,n,nx,ny,nz,kper,lper,mper;
+
+  double tmp[3];
+  tmp[0] = (g_ewald/(MY_PI*nx_pppm)) * pow(-log(EPS_HOC),0.25);
+  tmp[1] = (g_ewald/(MY_PI*ny_pppm)) * pow(-log(EPS_HOC),0.25);
+  tmp[2] = (g_ewald/(MY_PI*nz_pppm)) * pow(-log(EPS_HOC),0.25);
+  lamda2xT(&tmp[0],&tmp[0]);
+  const int nbx = static_cast<int> (tmp[0]);
+  const int nby = static_cast<int> (tmp[1]);
+  const int nbz = static_cast<int> (tmp[2]);
+
+  const int twoorder = 2*order;
+
+  n = 0;
+  for (m = nzlo_fft; m <= nzhi_fft; m++) {
+    mper = m - nz_pppm*(2*m/nz_pppm);
+    snz = square(sin(MY_PI*mper/nz_pppm));
+
+    for (l = nylo_fft; l <= nyhi_fft; l++) {
+      lper = l - ny_pppm*(2*l/ny_pppm);
+      sny = square(sin(MY_PI*lper/ny_pppm));
+
+      for (k = nxlo_fft; k <= nxhi_fft; k++) {
+        kper = k - nx_pppm*(2*k/nx_pppm);
+        snx = square(sin(MY_PI*kper/nx_pppm));
+
+        double unitk_lamda[3];
+        unitk_lamda[0] = 2.0*MY_PI*kper;
+        unitk_lamda[1] = 2.0*MY_PI*lper;
+        unitk_lamda[2] = 2.0*MY_PI*mper;
+        x2lamdaT(&unitk_lamda[0],&unitk_lamda[0]);
+
+        sqk = square(unitk_lamda[0]) + square(unitk_lamda[1]) + square(unitk_lamda[2]);
+
+        if (sqk != 0.0) {
+          numerator = 12.5663706/sqk;
+          denominator = gf_denom(snx,sny,snz);
+          sum1 = 0.0;
+
+          for (nx = -nbx; nx <= nbx; nx++) {
+            argx = MY_PI*kper/nx_pppm + MY_PI*nx;
+            wx = powsinxx(argx,twoorder);
+
+            for (ny = -nby; ny <= nby; ny++) {
+              argy = MY_PI*lper/ny_pppm + MY_PI*ny;
+              wy = powsinxx(argy,twoorder);
+
+              for (nz = -nbz; nz <= nbz; nz++) {
+                argz = MY_PI*mper/nz_pppm + MY_PI*nz;
+                wz = powsinxx(argz,twoorder);
+
+                double b[3];
+                b[0] = 2.0*MY_PI*nx_pppm*nx;
+                b[1] = 2.0*MY_PI*ny_pppm*ny;
+                b[2] = 2.0*MY_PI*nz_pppm*nz;
+                x2lamdaT(&b[0],&b[0]);
+
+                qx = unitk_lamda[0]+b[0];
+                sx = exp(-0.25*square(qx/g_ewald));
+
+                qy = unitk_lamda[1]+b[1];
+                sy = exp(-0.25*square(qy/g_ewald));
+
+                qz = unitk_lamda[2]+b[2];
+                sz = exp(-0.25*square(qz/g_ewald));
+
+                dot1 = unitk_lamda[0]*qx + unitk_lamda[1]*qy + unitk_lamda[2]*qz;
+                dot2 = qx*qx+qy*qy+qz*qz;
+                sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
+              }
+            }
+          }
+          greensfn[n++] = numerator*sum1/denominator;
+        } else greensfn[n++] = 0.0;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute optimized Green's function for energy calculation
+------------------------------------------------------------------------- */
+
+void PPPM::compute_gf_ad()
+{
+  const double * const prd = domain->prd;
+
+  const double xprd = prd[0];
+  const double yprd = prd[1];
+  const double zprd = prd[2];
+  const double zprd_slab = zprd*slab_volfactor;
+  const double unitkx = (MY_2PI/xprd);
+  const double unitky = (MY_2PI/yprd);
+  const double unitkz = (MY_2PI/zprd_slab);
+
+  double snx,sny,snz,sqk;
+  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+  double numerator,denominator;
+  int k,l,m,n,kper,lper,mper;
+
+  const int twoorder = 2*order;
+
+  for (int i = 0; i < 6; i++) sf_coeff[i] = 0.0;
+
+  n = 0;
+  for (m = nzlo_fft; m <= nzhi_fft; m++) {
+    mper = m - nz_pppm*(2*m/nz_pppm);
+    qz = unitkz*mper;
+    snz = square(sin(0.5*qz*zprd_slab/nz_pppm));
+    sz = exp(-0.25*square(qz/g_ewald));
+    argz = 0.5*qz*zprd_slab/nz_pppm;
+    wz = powsinxx(argz,twoorder);
+
+    for (l = nylo_fft; l <= nyhi_fft; l++) {
+      lper = l - ny_pppm*(2*l/ny_pppm);
+      qy = unitky*lper;
+      sny = square(sin(0.5*qy*yprd/ny_pppm));
+      sy = exp(-0.25*square(qy/g_ewald));
+      argy = 0.5*qy*yprd/ny_pppm;
+      wy = powsinxx(argy,twoorder);
+
+      for (k = nxlo_fft; k <= nxhi_fft; k++) {
+        kper = k - nx_pppm*(2*k/nx_pppm);
+        qx = unitkx*kper;
+        snx = square(sin(0.5*qx*xprd/nx_pppm));
+        sx = exp(-0.25*square(qx/g_ewald));
+        argx = 0.5*qx*xprd/nx_pppm;
+        wx = powsinxx(argx,twoorder);
+
+        sqk = qx*qx + qy*qy + qz*qz;
+
+        if (sqk != 0.0) {
+          numerator = MY_4PI/sqk;
+          denominator = gf_denom(snx,sny,snz);
+          greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
+          sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
+          sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
+          sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
+          sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
+          sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
+          sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
+          n++;
+        } else {
+          greensfn[n] = 0.0;
+          sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
+          sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
+          sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
+          sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
+          sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
+          sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
+          n++;
+        }
+      }
+    }
+  }
+
+  // compute the coefficients for the self-force correction
+
+  double prex, prey, prez;
+  prex = prey = prez = MY_PI/volume;
+  prex *= nx_pppm/xprd;
+  prey *= ny_pppm/yprd;
+  prez *= nz_pppm/zprd_slab;
+  sf_coeff[0] *= prex;
+  sf_coeff[1] *= prex*2;
+  sf_coeff[2] *= prey;
+  sf_coeff[3] *= prey*2;
+  sf_coeff[4] *= prez;
+  sf_coeff[5] *= prez*2;
+
+  // communicate values with other procs
+
+  double tmp[6];
+  MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
+  for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
+}
+
+/* ----------------------------------------------------------------------
+   compute self force coefficients for ad-differentiation scheme
+------------------------------------------------------------------------- */
+
+void PPPM::compute_sf_precoeff()
+{
+  int i,k,l,m,n;
+  int nx,ny,nz,kper,lper,mper;
+  double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5];
+  double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2;
+  double u0,u1,u2,u3,u4,u5,u6;
+  double sum1,sum2,sum3,sum4,sum5,sum6;
+
+  n = 0;
+  for (m = nzlo_fft; m <= nzhi_fft; m++) {
+    mper = m - nz_pppm*(2*m/nz_pppm);
+
+    for (l = nylo_fft; l <= nyhi_fft; l++) {
+      lper = l - ny_pppm*(2*l/ny_pppm);
+
+      for (k = nxlo_fft; k <= nxhi_fft; k++) {
+        kper = k - nx_pppm*(2*k/nx_pppm);
+
+        sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
+        for (i = 0; i < 5; i++) {
+
+          qx0 = MY_2PI*(kper+nx_pppm*(i-2));
+          qx1 = MY_2PI*(kper+nx_pppm*(i-1));
+          qx2 = MY_2PI*(kper+nx_pppm*(i  ));
+          wx0[i] = powsinxx(0.5*qx0/nx_pppm,order);
+          wx1[i] = powsinxx(0.5*qx1/nx_pppm,order);
+          wx2[i] = powsinxx(0.5*qx2/nx_pppm,order);
+
+          qy0 = MY_2PI*(lper+ny_pppm*(i-2));
+          qy1 = MY_2PI*(lper+ny_pppm*(i-1));
+          qy2 = MY_2PI*(lper+ny_pppm*(i  ));
+          wy0[i] = powsinxx(0.5*qy0/ny_pppm,order);
+          wy1[i] = powsinxx(0.5*qy1/ny_pppm,order);
+          wy2[i] = powsinxx(0.5*qy2/ny_pppm,order);
+
+          qz0 = MY_2PI*(mper+nz_pppm*(i-2));
+          qz1 = MY_2PI*(mper+nz_pppm*(i-1));
+          qz2 = MY_2PI*(mper+nz_pppm*(i  ));
+
+          wz0[i] = powsinxx(0.5*qz0/nz_pppm,order);
+          wz1[i] = powsinxx(0.5*qz1/nz_pppm,order);
+          wz2[i] = powsinxx(0.5*qz2/nz_pppm,order);
+        }
+
+        for (nx = 0; nx < 5; nx++) {
+          for (ny = 0; ny < 5; ny++) {
+            for (nz = 0; nz < 5; nz++) {
+              u0 = wx0[nx]*wy0[ny]*wz0[nz];
+              u1 = wx1[nx]*wy0[ny]*wz0[nz];
+              u2 = wx2[nx]*wy0[ny]*wz0[nz];
+              u3 = wx0[nx]*wy1[ny]*wz0[nz];
+              u4 = wx0[nx]*wy2[ny]*wz0[nz];
+              u5 = wx0[nx]*wy0[ny]*wz1[nz];
+              u6 = wx0[nx]*wy0[ny]*wz2[nz];
+
+              sum1 += u0*u1;
+              sum2 += u0*u2;
+              sum3 += u0*u3;
+              sum4 += u0*u4;
+              sum5 += u0*u5;
+              sum6 += u0*u6;
+            }
+          }
+        }
+
+        // store values
+
+        sf_precoeff1[n] = sum1;
+        sf_precoeff2[n] = sum2;
+        sf_precoeff3[n] = sum3;
+        sf_precoeff4[n] = sum4;
+        sf_precoeff5[n] = sum5;
+        sf_precoeff6[n++] = sum6;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   find center grid pt for each of my particles
+   check that full stencil for the particle will fit in my 3d brick
+   store central grid pt indices in part2grid array
+------------------------------------------------------------------------- */
+
+void PPPM::particle_map()
+{
+  int nx,ny,nz;
+
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+
+  int flag = 0;
+  for (int i = 0; i < nlocal; i++) {
+
+    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+    // current particle coord can be outside global and local box
+    // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
+
+    nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
+    ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
+    nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
+
+    part2grid[i][0] = nx;
+    part2grid[i][1] = ny;
+    part2grid[i][2] = nz;
+
+    // check that entire stencil around nx,ny,nz will fit in my 3d brick
+
+    if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
+        ny+nlower < nylo_out || ny+nupper > nyhi_out ||
+        nz+nlower < nzlo_out || nz+nupper > nzhi_out)
+      flag = 1;
+  }
+
+  if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
+}
+
+/* ----------------------------------------------------------------------
+   create discretized "density" on section of global grid due to my particles
+   density(x,y,z) = charge "density" at grid points of my 3d brick
+   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+   in global grid
+------------------------------------------------------------------------- */
+
+void PPPM::make_rho()
+{
+  int l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+
+  // clear 3d density array
+
+  memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
+         ngrid*sizeof(FFT_SCALAR));
+
+  // loop over my charges, add their contribution to nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+
+  double *q = atom->q;
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; i++) {
+
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+    compute_rho1d(dx,dy,dz);
+
+    z0 = delvolinv * q[i];
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      y0 = z0*rho1d[2][n];
+      for (m = nlower; m <= nupper; m++) {
+        my = m+ny;
+        x0 = y0*rho1d[1][m];
+        for (l = nlower; l <= nupper; l++) {
+          mx = l+nx;
+          density_brick[mz][my][mx] += x0*rho1d[0][l];
+        }
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   remap density from 3d brick decomposition to FFT decomposition
+------------------------------------------------------------------------- */
+
+void PPPM::brick2fft()
+{
+  int n,ix,iy,iz;
+
+  // copy grabs inner portion of density from 3d brick
+  // remap could be done as pre-stage of FFT,
+  //   but this works optimally on only double values, not complex values
+
+  n = 0;
+  for (iz = nzlo_in; iz <= nzhi_in; iz++)
+    for (iy = nylo_in; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++)
+        density_fft[n++] = density_brick[iz][iy][ix];
+
+  remap->perform(density_fft,density_fft,work1);
+}
+
+/* ----------------------------------------------------------------------
+   FFT-based Poisson solver
+------------------------------------------------------------------------- */
+
+void PPPM::poisson()
+{
+  if (differentiation_flag == 1) poisson_ad();
+  else poisson_ik();
+}
+
+/* ----------------------------------------------------------------------
+   FFT-based Poisson solver for ik
+------------------------------------------------------------------------- */
+
+void PPPM::poisson_ik()
+{
+  int i,j,k,n;
+  double eng;
+
+  // transform charge density (r -> k)
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work1[n++] = density_fft[i];
+    work1[n++] = ZEROF;
+  }
+
+  fft1->compute(work1,work1,1);
+
+  // global energy and virial contribution
+
+  double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
+  double s2 = scaleinv*scaleinv;
+
+  if (eflag_global || vflag_global) {
+    if (vflag_global) {
+      n = 0;
+      for (i = 0; i < nfft; i++) {
+        eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
+        for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
+        if (eflag_global) energy += eng;
+        n += 2;
+      }
+    } else {
+      n = 0;
+      for (i = 0; i < nfft; i++) {
+        energy +=
+          s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
+        n += 2;
+      }
+    }
+  }
+
+  // scale by 1/total-grid-pts to get rho(k)
+  // multiply by Green's function to get V(k)
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work1[n++] *= scaleinv * greensfn[i];
+    work1[n++] *= scaleinv * greensfn[i];
+  }
+
+  // extra FFTs for per-atom energy/virial
+
+  if (evflag_atom) poisson_peratom();
+
+  // triclinic system
+
+  if (triclinic) {
+    poisson_ik_triclinic();
+    return;
+  }
+
+  // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
+  // FFT leaves data in 3d brick decomposition
+  // copy it into inner portion of vdx,vdy,vdz arrays
+
+  // x direction gradient
+
+  n = 0;
+  for (k = nzlo_fft; k <= nzhi_fft; k++)
+    for (j = nylo_fft; j <= nyhi_fft; j++)
+      for (i = nxlo_fft; i <= nxhi_fft; i++) {
+        work2[n] = fkx[i]*work1[n+1];
+        work2[n+1] = -fkx[i]*work1[n];
+        n += 2;
+      }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        vdx_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+
+  // y direction gradient
+
+  n = 0;
+  for (k = nzlo_fft; k <= nzhi_fft; k++)
+    for (j = nylo_fft; j <= nyhi_fft; j++)
+      for (i = nxlo_fft; i <= nxhi_fft; i++) {
+        work2[n] = fky[j]*work1[n+1];
+        work2[n+1] = -fky[j]*work1[n];
+        n += 2;
+      }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        vdy_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+
+  // z direction gradient
+
+  n = 0;
+  for (k = nzlo_fft; k <= nzhi_fft; k++)
+    for (j = nylo_fft; j <= nyhi_fft; j++)
+      for (i = nxlo_fft; i <= nxhi_fft; i++) {
+        work2[n] = fkz[k]*work1[n+1];
+        work2[n+1] = -fkz[k]*work1[n];
+        n += 2;
+      }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        vdz_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+}
+
+/* ----------------------------------------------------------------------
+   FFT-based Poisson solver for ik for a triclinic system
+------------------------------------------------------------------------- */
+
+void PPPM::poisson_ik_triclinic()
+{
+  int i,j,k,n;
+
+  // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
+  // FFT leaves data in 3d brick decomposition
+  // copy it into inner portion of vdx,vdy,vdz arrays
+
+  // x direction gradient
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work2[n] = fkx[i]*work1[n+1];
+    work2[n+1] = -fkx[i]*work1[n];
+    n += 2;
+  }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        vdx_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+
+  // y direction gradient
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work2[n] = fky[i]*work1[n+1];
+    work2[n+1] = -fky[i]*work1[n];
+    n += 2;
+  }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        vdy_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+
+  // z direction gradient
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work2[n] = fkz[i]*work1[n+1];
+    work2[n+1] = -fkz[i]*work1[n];
+    n += 2;
+  }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        vdz_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+}
+
+/* ----------------------------------------------------------------------
+   FFT-based Poisson solver for ad
+------------------------------------------------------------------------- */
+
+void PPPM::poisson_ad()
+{
+  int i,j,k,n;
+  double eng;
+
+  // transform charge density (r -> k)
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work1[n++] = density_fft[i];
+    work1[n++] = ZEROF;
+  }
+
+  fft1->compute(work1,work1,1);
+
+  // global energy and virial contribution
+
+  double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
+  double s2 = scaleinv*scaleinv;
+
+  if (eflag_global || vflag_global) {
+    if (vflag_global) {
+      n = 0;
+      for (i = 0; i < nfft; i++) {
+        eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
+        for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
+        if (eflag_global) energy += eng;
+        n += 2;
+      }
+    } else {
+      n = 0;
+      for (i = 0; i < nfft; i++) {
+        energy +=
+          s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
+        n += 2;
+      }
+    }
+  }
+
+  // scale by 1/total-grid-pts to get rho(k)
+  // multiply by Green's function to get V(k)
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work1[n++] *= scaleinv * greensfn[i];
+    work1[n++] *= scaleinv * greensfn[i];
+  }
+
+  // extra FFTs for per-atom energy/virial
+
+  if (vflag_atom) poisson_peratom();
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work2[n] = work1[n];
+    work2[n+1] = work1[n+1];
+    n += 2;
+  }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        u_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+}
+
+/* ----------------------------------------------------------------------
+   FFT-based Poisson solver for per-atom energy/virial
+------------------------------------------------------------------------- */
+
+void PPPM::poisson_peratom()
+{
+  int i,j,k,n;
+
+  // energy
+
+  if (eflag_atom && differentiation_flag != 1) {
+    n = 0;
+    for (i = 0; i < nfft; i++) {
+      work2[n] = work1[n];
+      work2[n+1] = work1[n+1];
+      n += 2;
+    }
+
+    fft2->compute(work2,work2,-1);
+
+    n = 0;
+    for (k = nzlo_in; k <= nzhi_in; k++)
+      for (j = nylo_in; j <= nyhi_in; j++)
+        for (i = nxlo_in; i <= nxhi_in; i++) {
+          u_brick[k][j][i] = work2[n];
+          n += 2;
+        }
+  }
+
+  // 6 components of virial in v0 thru v5
+
+  if (!vflag_atom) return;
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work2[n] = work1[n]*vg[i][0];
+    work2[n+1] = work1[n+1]*vg[i][0];
+    n += 2;
+  }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        v0_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work2[n] = work1[n]*vg[i][1];
+    work2[n+1] = work1[n+1]*vg[i][1];
+    n += 2;
+  }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        v1_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work2[n] = work1[n]*vg[i][2];
+    work2[n+1] = work1[n+1]*vg[i][2];
+    n += 2;
+  }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        v2_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work2[n] = work1[n]*vg[i][3];
+    work2[n+1] = work1[n+1]*vg[i][3];
+    n += 2;
+  }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        v3_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work2[n] = work1[n]*vg[i][4];
+    work2[n+1] = work1[n+1]*vg[i][4];
+    n += 2;
+  }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        v4_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work2[n] = work1[n]*vg[i][5];
+    work2[n+1] = work1[n+1]*vg[i][5];
+    n += 2;
+  }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        v5_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get electric field & force on my particles
+------------------------------------------------------------------------- */
+
+void PPPM::fieldforce()
+{
+  if (differentiation_flag == 1) fieldforce_ad();
+  else fieldforce_ik();
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get electric field & force on my particles for ik
+------------------------------------------------------------------------- */
+
+void PPPM::fieldforce_ik()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+  FFT_SCALAR ekx,eky,ekz;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of E-field on particle
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double **f = atom->f;
+
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+    compute_rho1d(dx,dy,dz);
+
+    ekx = eky = ekz = ZEROF;
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      z0 = rho1d[2][n];
+      for (m = nlower; m <= nupper; m++) {
+        my = m+ny;
+        y0 = z0*rho1d[1][m];
+        for (l = nlower; l <= nupper; l++) {
+          mx = l+nx;
+          x0 = y0*rho1d[0][l];
+          ekx -= x0*vdx_brick[mz][my][mx];
+          eky -= x0*vdy_brick[mz][my][mx];
+          ekz -= x0*vdz_brick[mz][my][mx];
+        }
+      }
+    }
+
+    // convert E-field to force
+
+    const double qfactor = force->qqrd2e * scale * q[i];
+    f[i][0] += qfactor*ekx;
+    f[i][1] += qfactor*eky;
+    if (slabflag != 2) f[i][2] += qfactor*ekz;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get electric field & force on my particles for ad
+------------------------------------------------------------------------- */
+
+void PPPM::fieldforce_ad()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz;
+  FFT_SCALAR ekx,eky,ekz;
+  double s1,s2,s3;
+  double sf = 0.0;
+  double *prd;
+
+  prd = domain->prd;
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+
+  double hx_inv = nx_pppm/xprd;
+  double hy_inv = ny_pppm/yprd;
+  double hz_inv = nz_pppm/zprd;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of E-field on particle
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double **f = atom->f;
+
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+    compute_rho1d(dx,dy,dz);
+    compute_drho1d(dx,dy,dz);
+
+    ekx = eky = ekz = ZEROF;
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      for (m = nlower; m <= nupper; m++) {
+        my = m+ny;
+        for (l = nlower; l <= nupper; l++) {
+          mx = l+nx;
+          ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
+          eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
+          ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
+        }
+      }
+    }
+    ekx *= hx_inv;
+    eky *= hy_inv;
+    ekz *= hz_inv;
+
+    // convert E-field to force and substract self forces
+
+    const double qfactor = force->qqrd2e * scale;
+
+    s1 = x[i][0]*hx_inv;
+    s2 = x[i][1]*hy_inv;
+    s3 = x[i][2]*hz_inv;
+    sf = sf_coeff[0]*sin(2*MY_PI*s1);
+    sf += sf_coeff[1]*sin(4*MY_PI*s1);
+    sf *= 2*q[i]*q[i];
+    f[i][0] += qfactor*(ekx*q[i] - sf);
+
+    sf = sf_coeff[2]*sin(2*MY_PI*s2);
+    sf += sf_coeff[3]*sin(4*MY_PI*s2);
+    sf *= 2*q[i]*q[i];
+    f[i][1] += qfactor*(eky*q[i] - sf);
+
+
+    sf = sf_coeff[4]*sin(2*MY_PI*s3);
+    sf += sf_coeff[5]*sin(4*MY_PI*s3);
+    sf *= 2*q[i]*q[i];
+    if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get per-atom energy/virial
+------------------------------------------------------------------------- */
+
+void PPPM::fieldforce_peratom()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+  FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
+
+  // loop over my charges, interpolate from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+
+  double *q = atom->q;
+  double **x = atom->x;
+
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+    compute_rho1d(dx,dy,dz);
+
+    u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      z0 = rho1d[2][n];
+      for (m = nlower; m <= nupper; m++) {
+        my = m+ny;
+        y0 = z0*rho1d[1][m];
+        for (l = nlower; l <= nupper; l++) {
+          mx = l+nx;
+          x0 = y0*rho1d[0][l];
+          if (eflag_atom) u += x0*u_brick[mz][my][mx];
+          if (vflag_atom) {
+            v0 += x0*v0_brick[mz][my][mx];
+            v1 += x0*v1_brick[mz][my][mx];
+            v2 += x0*v2_brick[mz][my][mx];
+            v3 += x0*v3_brick[mz][my][mx];
+            v4 += x0*v4_brick[mz][my][mx];
+            v5 += x0*v5_brick[mz][my][mx];
+          }
+        }
+      }
+    }
+
+    if (eflag_atom) eatom[i] += q[i]*u;
+    if (vflag_atom) {
+      vatom[i][0] += q[i]*v0;
+      vatom[i][1] += q[i]*v1;
+      vatom[i][2] += q[i]*v2;
+      vatom[i][3] += q[i]*v3;
+      vatom[i][4] += q[i]*v4;
+      vatom[i][5] += q[i]*v5;
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pack own values to buf to send to another proc
+------------------------------------------------------------------------- */
+
+void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
+{
+  int n = 0;
+
+  if (flag == FORWARD_IK) {
+    FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++) {
+      buf[n++] = xsrc[list[i]];
+      buf[n++] = ysrc[list[i]];
+      buf[n++] = zsrc[list[i]];
+    }
+  } else if (flag == FORWARD_AD) {
+    FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++)
+      buf[i] = src[list[i]];
+  } else if (flag == FORWARD_IK_PERATOM) {
+    FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++) {
+      if (eflag_atom) buf[n++] = esrc[list[i]];
+      if (vflag_atom) {
+        buf[n++] = v0src[list[i]];
+        buf[n++] = v1src[list[i]];
+        buf[n++] = v2src[list[i]];
+        buf[n++] = v3src[list[i]];
+        buf[n++] = v4src[list[i]];
+        buf[n++] = v5src[list[i]];
+      }
+    }
+  } else if (flag == FORWARD_AD_PERATOM) {
+    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++) {
+      buf[n++] = v0src[list[i]];
+      buf[n++] = v1src[list[i]];
+      buf[n++] = v2src[list[i]];
+      buf[n++] = v3src[list[i]];
+      buf[n++] = v4src[list[i]];
+      buf[n++] = v5src[list[i]];
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   unpack another proc's own values from buf and set own ghost values
+------------------------------------------------------------------------- */
+
+void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
+{
+  int n = 0;
+
+  if (flag == FORWARD_IK) {
+    FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++) {
+      xdest[list[i]] = buf[n++];
+      ydest[list[i]] = buf[n++];
+      zdest[list[i]] = buf[n++];
+    }
+  } else if (flag == FORWARD_AD) {
+    FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++)
+      dest[list[i]] = buf[i];
+  } else if (flag == FORWARD_IK_PERATOM) {
+    FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++) {
+      if (eflag_atom) esrc[list[i]] = buf[n++];
+      if (vflag_atom) {
+        v0src[list[i]] = buf[n++];
+        v1src[list[i]] = buf[n++];
+        v2src[list[i]] = buf[n++];
+        v3src[list[i]] = buf[n++];
+        v4src[list[i]] = buf[n++];
+        v5src[list[i]] = buf[n++];
+      }
+    }
+  } else if (flag == FORWARD_AD_PERATOM) {
+    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++) {
+      v0src[list[i]] = buf[n++];
+      v1src[list[i]] = buf[n++];
+      v2src[list[i]] = buf[n++];
+      v3src[list[i]] = buf[n++];
+      v4src[list[i]] = buf[n++];
+      v5src[list[i]] = buf[n++];
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pack ghost values into buf to send to another proc
+------------------------------------------------------------------------- */
+
+void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
+{
+  if (flag == REVERSE_RHO) {
+    FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++)
+      buf[i] = src[list[i]];
+  }
+}
+
+/* ----------------------------------------------------------------------
+   unpack another proc's ghost values from buf and add to own values
+------------------------------------------------------------------------- */
+
+void PPPM::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
+{
+  if (flag == REVERSE_RHO) {
+    FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++)
+      dest[list[i]] += buf[i];
+  } 
+}
+
+/* ----------------------------------------------------------------------
+   map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
+------------------------------------------------------------------------- */
+
+void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
+{
+  // loop thru all possible factorizations of nprocs
+  // surf = surface area of largest proc sub-domain
+  // innermost if test minimizes surface area and surface/volume ratio
+
+  int bestsurf = 2 * (nx + ny);
+  int bestboxx = 0;
+  int bestboxy = 0;
+
+  int boxx,boxy,surf,ipx,ipy;
+
+  ipx = 1;
+  while (ipx <= nprocs) {
+    if (nprocs % ipx == 0) {
+      ipy = nprocs/ipx;
+      boxx = nx/ipx;
+      if (nx % ipx) boxx++;
+      boxy = ny/ipy;
+      if (ny % ipy) boxy++;
+      surf = boxx + boxy;
+      if (surf < bestsurf ||
+          (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
+        bestsurf = surf;
+        bestboxx = boxx;
+        bestboxy = boxy;
+        *px = ipx;
+        *py = ipy;
+      }
+    }
+    ipx++;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   charge assignment into rho1d
+   dx,dy,dz = distance of particle from "lower left" grid point
+------------------------------------------------------------------------- */
+
+void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
+                         const FFT_SCALAR &dz)
+{
+  int k,l;
+  FFT_SCALAR r1,r2,r3;
+
+  for (k = (1-order)/2; k <= order/2; k++) {
+    r1 = r2 = r3 = ZEROF;
+
+    for (l = order-1; l >= 0; l--) {
+      r1 = rho_coeff[l][k] + r1*dx;
+      r2 = rho_coeff[l][k] + r2*dy;
+      r3 = rho_coeff[l][k] + r3*dz;
+    }
+    rho1d[0][k] = r1;
+    rho1d[1][k] = r2;
+    rho1d[2][k] = r3;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   charge assignment into drho1d
+   dx,dy,dz = distance of particle from "lower left" grid point
+------------------------------------------------------------------------- */
+
+void PPPM::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
+                          const FFT_SCALAR &dz)
+{
+  int k,l;
+  FFT_SCALAR r1,r2,r3;
+
+  for (k = (1-order)/2; k <= order/2; k++) {
+    r1 = r2 = r3 = ZEROF;
+
+    for (l = order-2; l >= 0; l--) {
+      r1 = drho_coeff[l][k] + r1*dx;
+      r2 = drho_coeff[l][k] + r2*dy;
+      r3 = drho_coeff[l][k] + r3*dz;
+    }
+    drho1d[0][k] = r1;
+    drho1d[1][k] = r2;
+    drho1d[2][k] = r3;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   generate coeffients for the weight function of order n
+
+              (n-1)
+  Wn(x) =     Sum    wn(k,x) , Sum is over every other integer
+           k=-(n-1)
+  For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
+      k is odd integers if n is even and even integers if n is odd
+              ---
+             | n-1
+             | Sum a(l,j)*(x-k/2)**l   if abs(x-k/2) < 1/2
+  wn(k,x) = <  l=0
+             |
+             |  0                       otherwise
+              ---
+  a coeffients are packed into the array rho_coeff to eliminate zeros
+  rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k)
+------------------------------------------------------------------------- */
+
+void PPPM::compute_rho_coeff()
+{
+  int j,k,l,m;
+  FFT_SCALAR s;
+
+  FFT_SCALAR **a;
+  memory->create2d_offset(a,order,-order,order,"pppm:a");
+
+  for (k = -order; k <= order; k++)
+    for (l = 0; l < order; l++)
+      a[l][k] = 0.0;
+
+  a[0][0] = 1.0;
+  for (j = 1; j < order; j++) {
+    for (k = -j; k <= j; k += 2) {
+      s = 0.0;
+      for (l = 0; l < j; l++) {
+        a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
+#ifdef FFT_SINGLE
+        s += powf(0.5,(float) l+1) *
+          (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
+#else
+        s += pow(0.5,(double) l+1) *
+          (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
+#endif
+      }
+      a[0][k] = s;
+    }
+  }
+
+  m = (1-order)/2;
+  for (k = -(order-1); k < order; k += 2) {
+    for (l = 0; l < order; l++)
+      rho_coeff[l][m] = a[l][k];
+    for (l = 1; l < order; l++)
+      drho_coeff[l-1][m] = l*a[l][k];
+    m++;
+  }
+
+  memory->destroy2d_offset(a,-order);
+}
+
+/* ----------------------------------------------------------------------
+   Slab-geometry correction term to dampen inter-slab interactions between
+   periodically repeating slabs.  Yields good approximation to 2D Ewald if
+   adequate empty space is left between repeating slabs (J. Chem. Phys.
+   111, 3155).  Slabs defined here to be parallel to the xy plane. Also
+   extended to non-neutral systems (J. Chem. Phys. 131, 094107).
+------------------------------------------------------------------------- */
+
+void PPPM::slabcorr()
+{
+  // compute local contribution to global dipole moment
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double zprd = domain->zprd;
+  int nlocal = atom->nlocal;
+
+  double dipole = 0.0;
+  for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
+
+  // sum local contributions to get global dipole moment
+
+  double dipole_all;
+  MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
+
+  // need to make non-neutral systems and/or
+  //  per-atom energy translationally invariant
+
+  double dipole_r2 = 0.0;
+  if (eflag_atom || fabs(qsum) > SMALL) {
+    for (int i = 0; i < nlocal; i++)
+      dipole_r2 += q[i]*x[i][2]*x[i][2];
+
+    // sum local contributions
+
+    double tmp;
+    MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+    dipole_r2 = tmp;
+  }
+
+  // compute corrections
+
+  const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
+    qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
+  const double qscale = force->qqrd2e * scale;
+
+  if (eflag_global) energy += qscale * e_slabcorr;
+
+  // per-atom energy
+
+  if (eflag_atom) {
+    double efact = qscale * MY_2PI/volume;
+    for (int i = 0; i < nlocal; i++)
+      eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
+        qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
+  }
+
+  // add on force corrections
+
+  double ffact = qscale * (-4.0*MY_PI/volume);
+  double **f = atom->f;
+
+  for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
+}
+
+/* ----------------------------------------------------------------------
+   perform and time the 1d FFTs required for N timesteps
+------------------------------------------------------------------------- */
+
+int PPPM::timing_1d(int n, double &time1d)
+{
+  double time1,time2;
+
+  for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
+
+  MPI_Barrier(world);
+  time1 = MPI_Wtime();
+
+  for (int i = 0; i < n; i++) {
+    fft1->timing1d(work1,nfft_both,1);
+    fft2->timing1d(work1,nfft_both,-1);
+    if (differentiation_flag != 1) {
+      fft2->timing1d(work1,nfft_both,-1);
+      fft2->timing1d(work1,nfft_both,-1);
+    }
+  }
+
+  MPI_Barrier(world);
+  time2 = MPI_Wtime();
+  time1d = time2 - time1;
+
+  if (differentiation_flag) return 2;
+  return 4;
+}
+
+/* ----------------------------------------------------------------------
+   perform and time the 3d FFTs required for N timesteps
+------------------------------------------------------------------------- */
+
+int PPPM::timing_3d(int n, double &time3d)
+{
+  double time1,time2;
+
+  for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
+
+  MPI_Barrier(world);
+  time1 = MPI_Wtime();
+
+  for (int i = 0; i < n; i++) {
+    fft1->compute(work1,work1,1);
+    fft2->compute(work1,work1,-1);
+    if (differentiation_flag != 1) {
+      fft2->compute(work1,work1,-1);
+      fft2->compute(work1,work1,-1);
+    }
+  }
+
+  MPI_Barrier(world);
+  time2 = MPI_Wtime();
+  time3d = time2 - time1;
+
+  if (differentiation_flag) return 2;
+  return 4;
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local arrays
+------------------------------------------------------------------------- */
+
+double PPPM::memory_usage()
+{
+  double bytes = nmax*3 * sizeof(double);
+  int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
+    (nzhi_out-nzlo_out+1);
+  if (differentiation_flag == 1) {
+    bytes += 2 * nbrick * sizeof(FFT_SCALAR);
+  } else {
+    bytes += 4 * nbrick * sizeof(FFT_SCALAR);
+  }
+  if (triclinic) bytes += 3 * nfft_both * sizeof(double);
+  bytes += 6 * nfft_both * sizeof(double);
+  bytes += nfft_both * sizeof(double);
+  bytes += nfft_both*5 * sizeof(FFT_SCALAR);
+
+  if (peratom_allocate_flag)
+    bytes += 6 * nbrick * sizeof(FFT_SCALAR);
+
+  if (group_allocate_flag) {
+    bytes += 2 * nbrick * sizeof(FFT_SCALAR);
+    bytes += 2 * nfft_both * sizeof(FFT_SCALAR);;
+  }
+
+  bytes += cg->memory_usage();
+
+  return bytes;
+}
+
+/* ----------------------------------------------------------------------
+   group-group interactions
+ ------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   compute the PPPM total long-range force and energy for groups A and B
+ ------------------------------------------------------------------------- */
+
+void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
+{
+  if (slabflag && triclinic)
+    error->all(FLERR,"Cannot (yet) use K-space slab "
+               "correction with compute group/group for triclinic systems");
+
+  if (differentiation_flag)
+    error->all(FLERR,"Cannot (yet) use kspace_modify "
+               "diff ad with compute group/group");
+
+  if (!group_allocate_flag) allocate_groups();
+
+  // convert atoms from box to lamda coords
+
+  if (triclinic == 0) boxlo = domain->boxlo;
+  else {
+    boxlo = domain->boxlo_lamda;
+    domain->x2lamda(atom->nlocal);
+  }
+
+  e2group = 0.0; //energy
+  f2group[0] = 0.0; //force in x-direction
+  f2group[1] = 0.0; //force in y-direction
+  f2group[2] = 0.0; //force in z-direction
+
+  // map my particle charge onto my local 3d density grid
+
+  make_rho_groups(groupbit_A,groupbit_B,AA_flag);
+
+  // all procs communicate density values from their ghost cells
+  //   to fully sum contribution in their 3d bricks
+  // remap from 3d decomposition to FFT decomposition
+
+  // temporarily store and switch pointers so we can
+  //  use brick2fft() for groups A and B (without
+  //  writing an additional function)
+
+  FFT_SCALAR ***density_brick_real = density_brick;
+  FFT_SCALAR *density_fft_real = density_fft;
+
+  // group A
+
+  density_brick = density_A_brick;
+  density_fft = density_A_fft;
+
+  cg->reverse_comm(this,REVERSE_RHO);
+  brick2fft();
+
+  // group B
+
+  density_brick = density_B_brick;
+  density_fft = density_B_fft;
+
+  cg->reverse_comm(this,REVERSE_RHO);
+  brick2fft();
+
+  // switch back pointers
+
+  density_brick = density_brick_real;
+  density_fft = density_fft_real;
+
+  // compute potential gradient on my FFT grid and
+  //   portion of group-group energy/force on this proc's FFT grid
+
+  poisson_groups(AA_flag);
+
+  const double qscale = force->qqrd2e * scale;
+
+  // total group A <--> group B energy
+  // self and boundary correction terms are in compute_group_group.cpp
+
+  double e2group_all;
+  MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world);
+  e2group = e2group_all;
+
+  e2group *= qscale*0.5*volume;
+
+  // total group A <--> group B force
+
+  double f2group_all[3];
+  MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world);
+
+  f2group[0] = qscale*volume*f2group_all[0];
+  f2group[1] = qscale*volume*f2group_all[1];
+  if (slabflag != 2) f2group[2] = qscale*volume*f2group_all[2];
+
+  // convert atoms back from lamda to box coords
+
+  if (triclinic) domain->lamda2x(atom->nlocal);
+
+  if (slabflag == 1)
+    slabcorr_groups(groupbit_A, groupbit_B, AA_flag);
+}
+
+/* ----------------------------------------------------------------------
+ allocate group-group memory that depends on # of K-vectors and order
+ ------------------------------------------------------------------------- */
+
+void PPPM::allocate_groups()
+{
+  group_allocate_flag = 1;
+
+  memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:density_A_brick");
+  memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:density_B_brick");
+  memory->create(density_A_fft,nfft_both,"pppm:density_A_fft");
+  memory->create(density_B_fft,nfft_both,"pppm:density_B_fft");
+}
+
+/* ----------------------------------------------------------------------
+ deallocate group-group memory that depends on # of K-vectors and order
+ ------------------------------------------------------------------------- */
+
+void PPPM::deallocate_groups()
+{
+  group_allocate_flag = 0;
+
+  memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy(density_A_fft);
+  memory->destroy(density_B_fft);
+}
+
+/* ----------------------------------------------------------------------
+ create discretized "density" on section of global grid due to my particles
+ density(x,y,z) = charge "density" at grid points of my 3d brick
+ (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+ in global grid for group-group interactions
+ ------------------------------------------------------------------------- */
+
+void PPPM::make_rho_groups(int groupbit_A, int groupbit_B, int AA_flag)
+{
+  int l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+
+  // clear 3d density arrays
+
+  memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0,
+         ngrid*sizeof(FFT_SCALAR));
+
+  memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0,
+         ngrid*sizeof(FFT_SCALAR));
+
+  // loop over my charges, add their contribution to nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+
+  double *q = atom->q;
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+  int *mask = atom->mask;
+
+  for (int i = 0; i < nlocal; i++) {
+
+    if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
+      if (AA_flag) continue;
+
+    if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
+
+      nx = part2grid[i][0];
+      ny = part2grid[i][1];
+      nz = part2grid[i][2];
+      dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+      dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+      dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+      compute_rho1d(dx,dy,dz);
+
+      z0 = delvolinv * q[i];
+      for (n = nlower; n <= nupper; n++) {
+        mz = n+nz;
+        y0 = z0*rho1d[2][n];
+        for (m = nlower; m <= nupper; m++) {
+          my = m+ny;
+          x0 = y0*rho1d[1][m];
+          for (l = nlower; l <= nupper; l++) {
+            mx = l+nx;
+
+            // group A
+
+            if (mask[i] & groupbit_A)
+              density_A_brick[mz][my][mx] += x0*rho1d[0][l];
+
+            // group B
+
+            if (mask[i] & groupbit_B)
+              density_B_brick[mz][my][mx] += x0*rho1d[0][l];
+          }
+        }
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   FFT-based Poisson solver for group-group interactions
+ ------------------------------------------------------------------------- */
+
+void PPPM::poisson_groups(int AA_flag)
+{
+  int i,j,k,n;
+
+  // reuse memory (already declared)
+
+  FFT_SCALAR *work_A = work1;
+  FFT_SCALAR *work_B = work2;
+
+  // transform charge density (r -> k)
+
+  // group A
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work_A[n++] = density_A_fft[i];
+    work_A[n++] = ZEROF;
+  }
+
+  fft1->compute(work_A,work_A,1);
+
+  // group B
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work_B[n++] = density_B_fft[i];
+    work_B[n++] = ZEROF;
+  }
+
+  fft1->compute(work_B,work_B,1);
+
+  // group-group energy and force contribution,
+  //  keep everything in reciprocal space so
+  //  no inverse FFTs needed
+
+  double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
+  double s2 = scaleinv*scaleinv;
+
+  // energy
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    e2group += s2 * greensfn[i] *
+      (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]);
+    n += 2;
+  }
+
+  if (AA_flag) return;
+
+
+  // multiply by Green's function and s2
+  //  (only for work_A so it is not squared below)
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work_A[n++] *= s2 * greensfn[i];
+    work_A[n++] *= s2 * greensfn[i];
+  }
+
+  // triclinic system
+  
+  if (triclinic) {
+    poisson_groups_triclinic();
+    return;
+  }
+
+  double partial_group;
+
+  // force, x direction
+
+  n = 0;
+  for (k = nzlo_fft; k <= nzhi_fft; k++)
+    for (j = nylo_fft; j <= nyhi_fft; j++)
+      for (i = nxlo_fft; i <= nxhi_fft; i++) {
+        partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+        f2group[0] += fkx[i] * partial_group;
+        n += 2;
+      }
+
+  // force, y direction
+
+  n = 0;
+  for (k = nzlo_fft; k <= nzhi_fft; k++)
+    for (j = nylo_fft; j <= nyhi_fft; j++)
+      for (i = nxlo_fft; i <= nxhi_fft; i++) {
+        partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+        f2group[1] += fky[j] * partial_group;
+        n += 2;
+      }
+
+  // force, z direction
+
+  n = 0;
+  for (k = nzlo_fft; k <= nzhi_fft; k++)
+    for (j = nylo_fft; j <= nyhi_fft; j++)
+      for (i = nxlo_fft; i <= nxhi_fft; i++) {
+        partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+        f2group[2] += fkz[k] * partial_group;
+        n += 2;
+      }
+}
+
+/* ----------------------------------------------------------------------
+   FFT-based Poisson solver for group-group interactions
+   for a triclinic system
+ ------------------------------------------------------------------------- */
+
+void PPPM::poisson_groups_triclinic()
+{
+  int i,j,k,n;
+
+  // reuse memory (already declared)
+
+  FFT_SCALAR *work_A = work1;
+  FFT_SCALAR *work_B = work2;
+
+  double partial_group;
+
+  // force, x direction
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+    f2group[0] += fkx[i] * partial_group;
+    n += 2;
+  }
+
+  // force, y direction
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+    f2group[1] += fky[i] * partial_group;
+    n += 2;
+  }
+
+  // force, z direction
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+    f2group[2] += fkz[i] * partial_group;
+    n += 2;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   Slab-geometry correction term to dampen inter-slab interactions between
+   periodically repeating slabs.  Yields good approximation to 2D Ewald if
+   adequate empty space is left between repeating slabs (J. Chem. Phys.
+   111, 3155).  Slabs defined here to be parallel to the xy plane. Also
+   extended to non-neutral systems (J. Chem. Phys. 131, 094107).
+------------------------------------------------------------------------- */
+
+void PPPM::slabcorr_groups(int groupbit_A, int groupbit_B, int AA_flag)
+{
+  // compute local contribution to global dipole moment
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double zprd = domain->zprd;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  double qsum_A = 0.0;
+  double qsum_B = 0.0;
+  double dipole_A = 0.0;
+  double dipole_B = 0.0;
+  double dipole_r2_A = 0.0;
+  double dipole_r2_B = 0.0;
+
+  for (int i = 0; i < nlocal; i++) {
+    if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
+      if (AA_flag) continue;
+
+    if (mask[i] & groupbit_A) { 
+      qsum_A += q[i];
+      dipole_A += q[i]*x[i][2];
+      dipole_r2_A += q[i]*x[i][2]*x[i][2];
+    }
+
+    if (mask[i] & groupbit_B) {
+      qsum_B += q[i];
+      dipole_B += q[i]*x[i][2];
+      dipole_r2_B += q[i]*x[i][2]*x[i][2];
+    }
+  }
+
+  // sum local contributions to get total charge and global dipole moment
+  //  for each group
+
+  double tmp;
+  MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  qsum_A = tmp;
+
+  MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  qsum_B = tmp;
+
+  MPI_Allreduce(&dipole_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  dipole_A = tmp;
+
+  MPI_Allreduce(&dipole_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  dipole_B = tmp;
+
+  MPI_Allreduce(&dipole_r2_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  dipole_r2_A = tmp;
+
+  MPI_Allreduce(&dipole_r2_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  dipole_r2_B = tmp;
+
+  // compute corrections
+
+  const double qscale = force->qqrd2e * scale;
+  const double efact = qscale * MY_2PI/volume;
+
+  e2group += efact * (dipole_A*dipole_B - 0.5*(qsum_A*dipole_r2_B +
+    qsum_B*dipole_r2_A) - qsum_A*qsum_B*zprd*zprd/12.0);
+
+  // add on force corrections
+
+  const double ffact = qscale * (-4.0*MY_PI/volume);
+  f2group[2] += ffact * (qsum_A*dipole_B - qsum_B*dipole_A);
+}
diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp
index 426dbf0e7b..a15cf38515 100755
--- a/src/KSPACE/pppm_disp.cpp
+++ b/src/KSPACE/pppm_disp.cpp
@@ -1,8209 +1,8209 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Rolf Isele-Holder (Aachen University)
-                         Paul Crozier (SNL)
-------------------------------------------------------------------------- */
-
-#include "lmptype.h"
-#include "mpi.h"
-#include "string.h"
-#include "stdio.h"
-#include "stdlib.h"
-#include "math.h"
-#include "pppm_disp.h"
-#include "math_const.h"
-#include "atom.h"
-#include "comm.h"
-#include "commgrid.h"
-#include "neighbor.h"
-#include "force.h"
-#include "pair.h"
-#include "bond.h"
-#include "angle.h"
-#include "domain.h"
-#include "fft3d_wrap.h"
-#include "remap_wrap.h"
-#include "memory.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-using namespace MathConst;
-
-#define MAXORDER   7
-#define OFFSET 16384
-#define SMALL 0.00001
-#define LARGE 10000.0
-#define EPS_HOC 1.0e-7
-
-enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER};
-enum{REVERSE_RHO, REVERSE_RHO_G, REVERSE_RHO_A, REVERSE_RHO_NONE};
-enum{FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM,
-     FORWARD_IK_G, FORWARD_AD_G, FORWARD_IK_PERATOM_G, FORWARD_AD_PERATOM_G,
-     FORWARD_IK_A, FORWARD_AD_A, FORWARD_IK_PERATOM_A, FORWARD_AD_PERATOM_A,
-     FORWARD_IK_NONE, FORWARD_AD_NONE, FORWARD_IK_PERATOM_NONE, FORWARD_AD_PERATOM_NONE};
-
-
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-PPPMDisp::PPPMDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
-{
-  if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm/disp command");
-
-  triclinic_support = 0;
-  pppmflag = dispersionflag = 1;
-  accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
-  
-  nfactors = 3;
-  factors = new int[nfactors];
-  factors[0] = 2;
-  factors[1] = 3;
-  factors[2] = 5;
-
-  MPI_Comm_rank(world,&me);
-  MPI_Comm_size(world,&nprocs);
-
-  csumflag = 0;
-  B = NULL;
-  cii = NULL;
-  csumi = NULL;
-  peratom_allocate_flag = 0;
-
-  density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
-  density_fft = NULL;
-  u_brick = v0_brick = v1_brick = v2_brick = v3_brick = 
-    v4_brick = v5_brick = NULL;
-
-  density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL;
-  density_fft_g = NULL;
-  u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = 
-    v4_brick_g = v5_brick_g = NULL;
-
-  density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL;
-  density_fft_a0 = NULL;
-  u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = 
-    v4_brick_a0 = v5_brick_a0 = NULL;
-
-  density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL;
-  density_fft_a1 = NULL;
-  u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = 
-    v4_brick_a1 = v5_brick_a1 = NULL;
-
-  density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL;
-  density_fft_a2 = NULL;
-  u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = 
-    v4_brick_a2 = v5_brick_a2 = NULL;
-
-  density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL;
-  density_fft_a3 = NULL;
-  u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = 
-    v4_brick_a3 = v5_brick_a3 = NULL;
-
-  density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL;
-  density_fft_a4 = NULL;
-  u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = 
-    v4_brick_a4 = v5_brick_a4 = NULL;
-
-  density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL;
-  density_fft_a5 = NULL;
-  u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = 
-    v4_brick_a5 = v5_brick_a5 = NULL;
-
-  density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL;
-  density_fft_a6 = NULL;
-  u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = 
-    v4_brick_a6 = v5_brick_a6 = NULL;
-
-  density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL;
-  density_fft_none = NULL;
-  u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = 
-    v4_brick_none = v5_brick_none = NULL;
-
-  greensfn = NULL;
-  greensfn_6 = NULL;
-  work1 = work2 = NULL;
-  work1_6 = work2_6 = NULL;
-  vg = NULL;
-  vg2 = NULL;
-  vg_6 = NULL;
-  vg2_6 = NULL;
-  fkx = fky = fkz = NULL;
-  fkx2 = fky2 = fkz2 = NULL;
-  fkx_6 = fky_6 = fkz_6 = NULL;
-  fkx2_6 = fky2_6 = fkz2_6 = NULL;
-
-  sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = 
-    sf_precoeff5 = sf_precoeff6 = NULL;
-  sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = 
-    sf_precoeff5_6 = sf_precoeff6_6 = NULL;
-
-  gf_b = NULL;
-  gf_b_6 = NULL;
-  rho1d = rho_coeff = NULL;
-  drho1d = drho_coeff = NULL;
-  rho1d_6 = rho_coeff_6 = NULL;
-  drho1d_6 = drho_coeff_6 = NULL;
-  fft1 = fft2 = NULL;
-  fft1_6 = fft2_6 = NULL;
-  remap = NULL;
-  remap_6 = NULL;
-
-  nmax = 0;
-  part2grid = NULL;
-  part2grid_6 = NULL;
-
-  cg = NULL;
-  cg_peratom = NULL;
-  cg_6 = NULL;
-  cg_peratom_6 = NULL;
-
-  memset(function, 0, EWALD_FUNCS*sizeof(int));
-}
-
-/* ----------------------------------------------------------------------
-   free all memory 
-------------------------------------------------------------------------- */
-
-PPPMDisp::~PPPMDisp()
-{
-  delete [] factors;
-  delete [] B;
-  B = NULL;
-  delete [] cii;
-  cii = NULL;
-  delete [] csumi;
-  csumi = NULL;
-  deallocate();
-  deallocate_peratom();
-  memory->destroy(part2grid);
-  memory->destroy(part2grid_6);
-  part2grid = part2grid_6 = NULL;
-}
-
-/* ----------------------------------------------------------------------
-   called once before run 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::init()
-{
-  if (me == 0) {
-    if (screen) fprintf(screen,"PPPMDisp initialization ...\n");
-    if (logfile) fprintf(logfile,"PPPMDisp initialization ...\n");
-  }
-
-  triclinic_check();
-  if (domain->dimension == 2)
-    error->all(FLERR,"Cannot use PPPMDisp with 2d simulation");
-
-  if (slabflag == 0 && domain->nonperiodic > 0)
-    error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp");
-  if (slabflag == 1) {
-    if (domain->xperiodic != 1 || domain->yperiodic != 1 || 
-	domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
-      error->all(FLERR,"Incorrect boundaries with slab PPPMDisp");
-  }
- 
-  if (order > MAXORDER || order_6 > MAXORDER) {
-    char str[128];
-    sprintf(str,"PPPMDisp coulomb order cannot be greater than %d",MAXORDER);
-    error->all(FLERR,str);
-  }
-
-  // free all arrays previously allocated
-
-  deallocate();
-  deallocate_peratom(); 
-
-  // set scale
-
-  scale = 1.0;
-
-  triclinic = domain->triclinic;
-
-  // check whether cutoff and pair style are set
-
-  pair_check();
-
-  int tmp;
-  Pair *pair = force->pair;
-  int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL;
-  double *p_cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL;
-  double *p_cutoff_lj = pair ? (double *) pair->extract("cut_LJ",tmp) : NULL;
-  if (!(ptr||*p_cutoff||*p_cutoff_lj)) 
-    error->all(FLERR,"KSpace style is incompatible with Pair style");
-  cutoff = *p_cutoff;
-  cutoff_lj = *p_cutoff_lj;
-
-  double tmp2;
-  MPI_Allreduce(&cutoff, &tmp2,1,MPI_DOUBLE,MPI_SUM,world); 
-
-  // check out which types of potentials will have to be calculated
-
-  int ewald_order = ptr ? *((int *) ptr) : 1<<1;
-  int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC;
-  memset(function, 0, EWALD_FUNCS*sizeof(int));
-  for (int i=0; i<=EWALD_MAXORDER; ++i)			// transcribe order
-    if (ewald_order&(1<<i)) {				// from pair_style
-      int  k=-1;
-      char str[128];
-      switch (i) {
-	case 1:
-	  k = 0; break;
-	case 6:
-	  if ((ewald_mix==GEOMETRIC || ewald_mix==SIXTHPOWER|| mixflag == 1) && mixflag!= 2) { k = 1; break; }
-	  else if (ewald_mix==ARITHMETIC && mixflag!=2) { k = 2; break; }
-	  else if (mixflag == 2) { k = 3; break; }
-	default:
-	  sprintf(str, "Unsupported order in kspace_style "
-                  "pppm/disp, pair_style %s", force->pair_style);
-	  error->all(FLERR,str);
-      }
-      function[k] = 1;
-    }
- 
-
-  // warn, if function[0] is not set but charge attribute is set!
-  if (!function[0] && atom->q_flag && me == 0) {
-    char str[128];
-    sprintf(str, "Charges are set, but coulombic solver is not used");
-    error->warning(FLERR, str);
-  }
-
-  // compute qsum & qsqsum, if function[0] is set, print error if no charges are set or warn if not charge-neutral  
- 
-  if (function[0]) {
-    if (!atom->q_flag) 
-      error->all(FLERR,"Kspace style with selected options "
-                 "requires atom attribute q");
- 
-    qsum = qsqsum = 0.0;
-    for (int i = 0; i < atom->nlocal; i++) {
-      qsum += atom->q[i];
-      qsqsum += atom->q[i]*atom->q[i];
-
-    }
-
-    double tmp;
-    MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-    qsum = tmp;
-    MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-    qsqsum = tmp;
-
-    if (qsqsum == 0.0)
-      error->all(FLERR,"Cannot use kspace solver with selected options "
-                 "on system with no charge");
-    if (fabs(qsum) > SMALL && me == 0) {
-      char str[128];
-      sprintf(str,"System is not charge neutral, net charge = %g",qsum);
-      error->warning(FLERR,str);
-    }
-  }
-
-  // if kspace is TIP4P, extract TIP4P params from pair style
-  // bond/angle are not yet init(), so insure equilibrium request is valid
-
-  qdist = 0.0;
- 
-  if (tip4pflag) {
-    int itmp;
-    double *p_qdist = (double *) force->pair->extract("qdist",itmp);
-    int *p_typeO = (int *) force->pair->extract("typeO",itmp);
-    int *p_typeH = (int *) force->pair->extract("typeH",itmp);
-    int *p_typeA = (int *) force->pair->extract("typeA",itmp);
-    int *p_typeB = (int *) force->pair->extract("typeB",itmp);
-    if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
-      error->all(FLERR,"KSpace style is incompatible with Pair style");
-    qdist = *p_qdist;
-    typeO = *p_typeO;
-    typeH = *p_typeH;
-    int typeA = *p_typeA;
-    int typeB = *p_typeB;
-
-    if (force->angle == NULL || force->bond == NULL)
-      error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
-    if (typeA < 1 || typeA > atom->nangletypes || 
-	force->angle->setflag[typeA] == 0)
-      error->all(FLERR,"Bad TIP4P angle type for PPPMDisp/TIP4P");
-    if (typeB < 1 || typeB > atom->nbondtypes || 
-	force->bond->setflag[typeB] == 0)
-      error->all(FLERR,"Bad TIP4P bond type for PPPMDisp/TIP4P");
-    double theta = force->angle->equilibrium_angle(typeA);
-    double blen = force->bond->equilibrium_distance(typeB);
-    alpha = qdist / (cos(0.5*theta) * blen);
-  }
-
-
-  // initialize the pair style to get the coefficients
-  neighrequest_flag = 0;
-  pair->init();
-  neighrequest_flag = 1;
-  init_coeffs();
-
-  //if g_ewald and g_ewald_6 have not been specified, set some initial value
-  //  to avoid problems when calculating the energies!
-
-  if (!gewaldflag) g_ewald = 1;
-  if (!gewaldflag_6) g_ewald_6 = 1;
-
-  // set accuracy (force units) from accuracy_relative or accuracy_absolute
-  
-  if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
-  else accuracy = accuracy_relative * two_charge_force;
-
-  int (*procneigh)[2] = comm->procneigh;
-
-  int iteration = 0;
-  if (function[0]) {
-    CommGrid *cgtmp = NULL;
-    while (order >= minorder) {
-
-      if (iteration && me == 0)
-          error->warning(FLERR,"Reducing PPPMDisp Coulomb order "
-                         "b/c stencil extends beyond neighbor processor");
-      iteration++;
-
-      // set grid for dispersion interaction and coulomb interactions
- 
-      set_grid();
-
-      if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
-      error->all(FLERR,"PPPMDisp Coulomb grid is too large");
-
-      set_fft_parameters(nx_pppm, ny_pppm, nz_pppm,
-                         nxlo_fft, nylo_fft, nzlo_fft,
-                         nxhi_fft, nyhi_fft, nzhi_fft,
-                         nxlo_in, nylo_in, nzlo_in,
-                         nxhi_in, nyhi_in, nzhi_in,
-                         nxlo_out, nylo_out, nzlo_out,
-                         nxhi_out, nyhi_out, nzhi_out,
-                         nlower, nupper,
-                         ngrid, nfft, nfft_both,
-                         shift, shiftone, order);
-
-      if (overlap_allowed) break;
-
-      cgtmp = new CommGrid(lmp, world,1,1,
-                           nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                           nxlo_out,nxhi_out,nylo_out,nyhi_out,
-                           nzlo_out,nzhi_out,
-                           procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                           procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-      cgtmp->ghost_notify();
-      if (!cgtmp->ghost_overlap()) break;
-      delete cgtmp;
-
-      order--;
-    }
-
-    if (order < minorder)
-      error->all(FLERR,
-                 "Coulomb PPPMDisp order has been reduced below minorder");
-    if (cgtmp) delete cgtmp;
-
-    // adjust g_ewald
-  
-    if (!gewaldflag) adjust_gewald();
-
-    // calculate the final accuracy
-  
-    double acc = final_accuracy();
-  
-    // print stats
-
-    int ngrid_max,nfft_both_max,nbuf_max;
-    MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
-    MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
-
-    if (me == 0) {
-    #ifdef FFT_SINGLE
-      const char fft_prec[] = "single";
-    #else
-      const char fft_prec[] = "double";
-    #endif
-  
-      if (screen) {
-        fprintf(screen,"  Coulomb G vector (1/distance)= %g\n",g_ewald);
-        fprintf(screen,"  Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
-        fprintf(screen,"  Coulomb stencil order = %d\n",order);
-        fprintf(screen,"  Coulomb estimated absolute RMS force accuracy = %g\n",
-                acc);
-        fprintf(screen,"  Coulomb estimated relative force accuracy = %g\n",
-                acc/two_charge_force);
-        fprintf(screen,"  using %s precision FFTs\n",fft_prec);
-        fprintf(screen,"  3d grid and FFT values/proc = %d %d\n",
-		ngrid_max, nfft_both_max);
-      }
-      if (logfile) {
-        fprintf(logfile,"  Coulomb G vector (1/distance) = %g\n",g_ewald);
-        fprintf(logfile,"  Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
-        fprintf(logfile,"  Coulomb stencil order = %d\n",order);
-        fprintf(logfile,
-                "  Coulomb estimated absolute RMS force accuracy = %g\n",
-                acc);
-        fprintf(logfile,"  Coulomb estimated relative force accuracy = %g\n",
-                acc/two_charge_force);
-        fprintf(logfile,"  using %s precision FFTs\n",fft_prec);
-        fprintf(logfile,"  3d grid and FFT values/proc = %d %d\n",
-		ngrid_max, nfft_both_max);
-      }
-    }
-  }
-
-  iteration = 0;
-  if (function[1] + function[2] + function[3]) {
-    CommGrid *cgtmp = NULL;
-    while (order_6 >= minorder) {
-
-      if (iteration && me == 0)
-          error->warning(FLERR,"Reducing PPPMDisp dispersion order "
-                         "b/c stencil extends beyond neighbor processor");
-      iteration++;
-
-      set_grid_6();
-   
-      if (nx_pppm_6 >= OFFSET || ny_pppm_6 >= OFFSET || nz_pppm_6 >= OFFSET)
-      error->all(FLERR,"PPPMDisp Dispersion grid is too large");
-
-      set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6,
-                         nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
-                         nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
-                         nxlo_in_6, nylo_in_6, nzlo_in_6,
-                         nxhi_in_6, nyhi_in_6, nzhi_in_6,
-                         nxlo_out_6, nylo_out_6, nzlo_out_6,
-                         nxhi_out_6, nyhi_out_6, nzhi_out_6,
-                         nlower_6, nupper_6,
-                         ngrid_6, nfft_6, nfft_both_6,
-                         shift_6, shiftone_6, order_6);
-
-      if (overlap_allowed) break;
-
-      cgtmp = new CommGrid(lmp,world,1,1,
-                           nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,
-                           nzlo_in_6,nzhi_in_6,
-                           nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,
-                           nzlo_out_6,nzhi_out_6,
-                           procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                           procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-      cgtmp->ghost_notify();
-      if (!cgtmp->ghost_overlap()) break;
-      delete cgtmp;
-      order_6--;
-    }
-
-    if (order_6 < minorder) 
-      error->all(FLERR,"Dispersion PPPMDisp order has been "
-                 "reduced below minorder");
-    if (cgtmp) delete cgtmp;
-
-    // adjust g_ewald_6
-
-    if (!gewaldflag_6 && accuracy_kspace_6 == accuracy_real_6) 
-      adjust_gewald_6();
-
-    // calculate the final accuracy
-
-    double acc, acc_real, acc_kspace;
-    final_accuracy_6(acc, acc_real, acc_kspace);
-
-
-    // print stats
-
-    int ngrid_max,nfft_both_max,nbuf_max;
-    MPI_Allreduce(&ngrid_6,&ngrid_max,1,MPI_INT,MPI_MAX,world);
-    MPI_Allreduce(&nfft_both_6,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
-
-    if (me == 0) {
-    #ifdef FFT_SINGLE
-      const char fft_prec[] = "single";
-    #else
-      const char fft_prec[] = "double";
-    #endif
-  
-      if (screen) {
-        fprintf(screen,"  Dispersion G vector (1/distance)= %g\n",g_ewald_6);
-        fprintf(screen,"  Dispersion grid = %d %d %d\n",
-                nx_pppm_6,ny_pppm_6,nz_pppm_6);
-        fprintf(screen,"  Dispersion stencil order = %d\n",order_6);
-        fprintf(screen,"  Dispersion estimated absolute "
-                "RMS force accuracy = %g\n",acc);
-        fprintf(screen,"  Dispersion estimated absolute "
-                "real space RMS force accuracy = %g\n",acc_real);
-        fprintf(screen,"  Dispersion estimated absolute "
-                "kspace RMS force accuracy = %g\n",acc_kspace);
-        fprintf(screen,"  Dispersion estimated relative force accuracy = %g\n",
-                acc/two_charge_force);
-        fprintf(screen,"  using %s precision FFTs\n",fft_prec);
-        fprintf(screen,"  3d grid and FFT values/proc dispersion = %d %d\n",
-                          ngrid_max,nfft_both_max);
-      }
-      if (logfile) {
-        fprintf(logfile,"  Dispersion G vector (1/distance) = %g\n",g_ewald_6);
-        fprintf(logfile,"  Dispersion grid = %d %d %d\n",
-                nx_pppm_6,ny_pppm_6,nz_pppm_6);
-        fprintf(logfile,"  Dispersion stencil order = %d\n",order_6);
-        fprintf(logfile,"  Dispersion estimated absolute "
-                "RMS force accuracy = %g\n",acc);
-        fprintf(logfile,"  Dispersion estimated absolute "
-                "real space RMS force accuracy = %g\n",acc_real);
-        fprintf(logfile,"  Dispersion estimated absolute "
-                "kspace RMS force accuracy = %g\n",acc_kspace);
-        fprintf(logfile,"  Disperion estimated relative force accuracy = %g\n",
-                acc/two_charge_force);
-        fprintf(logfile,"  using %s precision FFTs\n",fft_prec);
-        fprintf(logfile,"  3d grid and FFT values/proc dispersion = %d %d\n",
-                           ngrid_max,nfft_both_max);
-      }
-    }
-  }
-
-  // allocate K-space dependent memory
-
-  allocate();
-
-  // pre-compute Green's function denomiator expansion
-  // pre-compute 1d charge distribution coefficients
-
-  if (function[0]) {
-    compute_gf_denom(gf_b, order);
-    compute_rho_coeff(rho_coeff, drho_coeff, order);
-    cg->ghost_notify();
-    cg->setup();
-    if (differentiation_flag == 1)
-      compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order,
-                          nxlo_fft, nylo_fft, nzlo_fft, 
-                          nxhi_fft, nyhi_fft, nzhi_fft,
-                          sf_precoeff1, sf_precoeff2, sf_precoeff3,
-                          sf_precoeff4, sf_precoeff5, sf_precoeff6);
-  }
-  if (function[1] + function[2] + function[3]) {
-    compute_gf_denom(gf_b_6, order_6);
-    compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6);
-    cg_6->ghost_notify();
-    cg_6->setup();
-    if (differentiation_flag == 1)
-      compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6,
-                          nxlo_fft_6, nylo_fft_6, nzlo_fft_6, 
-                          nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
-                          sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6,
-                          sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6);
-  }
-
-}
-
-/* ----------------------------------------------------------------------
-   adjust PPPM coeffs, called initially and whenever volume has changed 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::setup()
-{
-  double *prd;
-
-  // volume-dependent factors
-  // adjust z dimension for 2d slab PPPM
-  // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-  volume = xprd * yprd * zprd_slab;
-
- // compute fkx,fky,fkz for my FFT grid pts
-
-  double unitkx = (2.0*MY_PI/xprd);
-  double unitky = (2.0*MY_PI/yprd);
-  double unitkz = (2.0*MY_PI/zprd_slab);
-
-  //compute the virial coefficients and green functions
-  if (function[0]){
-
-    delxinv = nx_pppm/xprd;
-    delyinv = ny_pppm/yprd;
-    delzinv = nz_pppm/zprd_slab;
-
-    delvolinv = delxinv*delyinv*delzinv;
-
-    double per;
-    int i, j, k, n;
-
-    for (i = nxlo_fft; i <= nxhi_fft; i++) {
-      per = i - nx_pppm*(2*i/nx_pppm);
-      fkx[i] = unitkx*per;
-      j = (nx_pppm - i) % nx_pppm;
-      per = j - nx_pppm*(2*j/nx_pppm);
-      fkx2[i] = unitkx*per;
-    }
-
-    for (i = nylo_fft; i <= nyhi_fft; i++) {
-      per = i - ny_pppm*(2*i/ny_pppm);
-      fky[i] = unitky*per;
-      j = (ny_pppm - i) % ny_pppm;
-      per = j - ny_pppm*(2*j/ny_pppm);
-      fky2[i] = unitky*per;
-    }
-
-    for (i = nzlo_fft; i <= nzhi_fft; i++) {
-      per = i - nz_pppm*(2*i/nz_pppm);
-      fkz[i] = unitkz*per;
-      j = (nz_pppm - i) % nz_pppm;
-      per = j - nz_pppm*(2*j/nz_pppm);
-      fkz2[i] = unitkz*per;
-    }
-
-    double sqk,vterm;
-    double gew2inv = 1/(g_ewald*g_ewald);
-    n = 0;
-    for (k = nzlo_fft; k <= nzhi_fft; k++) {
-      for (j = nylo_fft; j <= nyhi_fft; j++) {
-        for (i = nxlo_fft; i <= nxhi_fft; i++) {
-	  sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
-	  if (sqk == 0.0) {
-	    vg[n][0] = 0.0;
-	    vg[n][1] = 0.0;
-	    vg[n][2] = 0.0;
-	    vg[n][3] = 0.0;
-	    vg[n][4] = 0.0;
-	    vg[n][5] = 0.0;
-	  } else {
-	    vterm = -2.0 * (1.0/sqk + 0.25*gew2inv);
-	    vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
-	    vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
-	    vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
-	    vg[n][3] = vterm*fkx[i]*fky[j];
-	    vg[n][4] = vterm*fkx[i]*fkz[k];
-	    vg[n][5] = vterm*fky[j]*fkz[k];
-            vg2[n][0] = vterm*0.5*(fkx[i]*fky[j] + fkx2[i]*fky2[j]);
-            vg2[n][1] = vterm*0.5*(fkx[i]*fkz[k] + fkx2[i]*fkz2[k]);
-            vg2[n][2] = vterm*0.5*(fky[j]*fkz[k] + fky2[j]*fkz2[k]);
-  	  }
-	  n++;
-        }
-      }
-    }
-    compute_gf();
-    if (differentiation_flag == 1) compute_sf_coeff();
-  }
-
-  if (function[1] + function[2] + function[3]) {
-    delxinv_6 = nx_pppm_6/xprd;
-    delyinv_6 = ny_pppm_6/yprd;
-    delzinv_6 = nz_pppm_6/zprd_slab;
-    delvolinv_6 = delxinv_6*delyinv_6*delzinv_6;
-
-    double per;
-    int i, j, k, n;
-    for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
-      per = i - nx_pppm_6*(2*i/nx_pppm_6);
-      fkx_6[i] = unitkx*per;
-      j = (nx_pppm_6 - i) % nx_pppm_6;
-      per = j - nx_pppm_6*(2*j/nx_pppm_6);
-      fkx2_6[i] = unitkx*per;
-    }
-    for (i = nylo_fft_6; i <= nyhi_fft_6; i++) {
-      per = i - ny_pppm_6*(2*i/ny_pppm_6);
-      fky_6[i] = unitky*per;
-      j = (ny_pppm_6 - i) % ny_pppm_6;
-      per = j - ny_pppm_6*(2*j/ny_pppm_6);
-      fky2_6[i] = unitky*per;
-    }
-    for (i = nzlo_fft_6; i <= nzhi_fft_6; i++) {
-      per = i - nz_pppm_6*(2*i/nz_pppm_6);
-      fkz_6[i] = unitkz*per;
-      j = (nz_pppm_6 - i) % nz_pppm_6;
-      per = j - nz_pppm_6*(2*j/nz_pppm_6);
-      fkz2_6[i] = unitkz*per;
-    }
-    double sqk,vterm;
-    long double erft, expt,nom, denom;
-    long double b, bs, bt;
-    double rtpi = sqrt(MY_PI);
-    double gewinv = 1/g_ewald_6;
-    n = 0;
-    for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) {
-      for (j = nylo_fft_6; j <= nyhi_fft_6; j++) {
-        for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
-	  sqk = fkx_6[i]*fkx_6[i] + fky_6[j]*fky_6[j] + fkz_6[k]*fkz_6[k];
-	  if (sqk == 0.0) {
-	    vg_6[n][0] = 0.0;
-	    vg_6[n][1] = 0.0;
-	    vg_6[n][2] = 0.0;
-	    vg_6[n][3] = 0.0;
-	    vg_6[n][4] = 0.0;
-	    vg_6[n][5] = 0.0;
-	  } else {
-            b = 0.5*sqrt(sqk)*gewinv;
-            bs = b*b;
-            bt = bs*b;
-            erft = 2*bt*rtpi*erfc(b);
-            expt = exp(-bs);
-            nom = erft - 2*bs*expt;
-            denom = nom + expt;
-            if (denom == 0) vterm = 3.0/sqk;
-            else vterm = 3.0*nom/(sqk*denom);
-	    vg_6[n][0] = 1.0 + vterm*fkx_6[i]*fkx_6[i];
-	    vg_6[n][1] = 1.0 + vterm*fky_6[j]*fky_6[j];
-	    vg_6[n][2] = 1.0 + vterm*fkz_6[k]*fkz_6[k];
-	    vg_6[n][3] = vterm*fkx_6[i]*fky_6[j];
-	    vg_6[n][4] = vterm*fkx_6[i]*fkz_6[k];
-	    vg_6[n][5] = vterm*fky_6[j]*fkz_6[k];
-            vg2_6[n][0] = vterm*0.5*(fkx_6[i]*fky_6[j] + fkx2_6[i]*fky2_6[j]);
-            vg2_6[n][1] = vterm*0.5*(fkx_6[i]*fkz_6[k] + fkx2_6[i]*fkz2_6[k]);
-            vg2_6[n][2] = vterm*0.5*(fky_6[j]*fkz_6[k] + fky2_6[j]*fkz2_6[k]);
-	  }
-	  n++;
-        }
-      }
-    }
-    compute_gf_6();
-    if (differentiation_flag == 1) compute_sf_coeff_6();
-  }
-}
-
-/* ----------------------------------------------------------------------
-   reset local grid arrays and communication stencils
-   called by fix balance b/c it changed sizes of processor sub-domains
-------------------------------------------------------------------------- */
-
-void PPPMDisp::setup_grid()
-{
-  // free all arrays previously allocated
-
-  deallocate();
-  deallocate_peratom();
-
-  // reset portion of global grid that each proc owns
-
-  if (function[0])
-    set_fft_parameters(nx_pppm, ny_pppm, nz_pppm,
-                       nxlo_fft, nylo_fft, nzlo_fft,
-                       nxhi_fft, nyhi_fft, nzhi_fft,
-                       nxlo_in, nylo_in, nzlo_in,
-                       nxhi_in, nyhi_in, nzhi_in,
-                       nxlo_out, nylo_out, nzlo_out,
-                       nxhi_out, nyhi_out, nzhi_out,
-                       nlower, nupper,
-                       ngrid, nfft, nfft_both,
-                       shift, shiftone, order);
-
-  if (function[1] + function[2] + function[3])
-    set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6,
-                       nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
-                       nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
-                       nxlo_in_6, nylo_in_6, nzlo_in_6,
-                       nxhi_in_6, nyhi_in_6, nzhi_in_6,
-                       nxlo_out_6, nylo_out_6, nzlo_out_6,
-                       nxhi_out_6, nyhi_out_6, nzhi_out_6,
-                       nlower_6, nupper_6,
-                       ngrid_6, nfft_6, nfft_both_6,
-                       shift_6, shiftone_6, order_6);
-
-  // reallocate K-space dependent memory
-  // check if grid communication is now overlapping if not allowed
-  // don't invoke allocate_peratom(), compute() will allocate when needed
-
-  allocate();
-
-  if (function[0]) {
-    cg->ghost_notify();
-    if (overlap_allowed == 0 && cg->ghost_overlap())
-      error->all(FLERR,"PPPM grid stencil extends "
-                 "beyond nearest neighbor processor");
-    cg->setup();
-  }
-  if (function[1] + function[2] + function[3]) {
-    cg_6->ghost_notify();
-    if (overlap_allowed == 0 && cg_6->ghost_overlap())
-      error->all(FLERR,"PPPM grid stencil extends "
-                 "beyond nearest neighbor processor");
-    cg_6->setup();
-  }
-
-  // pre-compute Green's function denomiator expansion
-  // pre-compute 1d charge distribution coefficients
-
-  if (function[0]) {
-    compute_gf_denom(gf_b, order);
-    compute_rho_coeff(rho_coeff, drho_coeff, order);
-    if (differentiation_flag == 1) 
-      compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order,
-                          nxlo_fft, nylo_fft, nzlo_fft, 
-                          nxhi_fft, nyhi_fft, nzhi_fft,
-                          sf_precoeff1, sf_precoeff2, sf_precoeff3,
-                          sf_precoeff4, sf_precoeff5, sf_precoeff6);
-  }
-  if (function[1] + function[2] + function[3]) {
-    compute_gf_denom(gf_b_6, order_6);
-    compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6);
-    if (differentiation_flag == 1)
-      compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6,
-                          nxlo_fft_6, nylo_fft_6, nzlo_fft_6, 
-                          nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
-                          sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6,
-                          sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6);
-  }
-
-  // pre-compute volume-dependent coeffs
-
-  setup();
-}
-
-/* ----------------------------------------------------------------------
-   compute the PPPM long-range force, energy, virial 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::compute(int eflag, int vflag)
-{
-
-  int i;
-  // convert atoms from box to lamda coords
-
-  if (eflag || vflag) ev_setup(eflag,vflag);
-  else evflag = evflag_atom = eflag_global = vflag_global = 
-	 eflag_atom = vflag_atom = 0;
-
-  if (evflag_atom && !peratom_allocate_flag) {
-    allocate_peratom();
-    if (function[0]) {
-      cg_peratom->ghost_notify();
-      cg_peratom->setup();
-    }
-    if (function[1] + function[2] + function[3]) {
-      cg_peratom_6->ghost_notify();
-      cg_peratom_6->setup();
-    }
-    peratom_allocate_flag = 1;
-  }
-  
-  if (triclinic == 0) boxlo = domain->boxlo;
-  else {
-    boxlo = domain->boxlo_lamda;
-    domain->x2lamda(atom->nlocal);
-  }
-  // extend size of per-atom arrays if necessary
-
-  if (atom->nlocal > nmax) {
-
-    if (function[0]) memory->destroy(part2grid);
-    if (function[1] + function[2] + function[3]) memory->destroy(part2grid_6);
-    nmax = atom->nmax;
-    if (function[0]) memory->create(part2grid,nmax,3,"pppm/disp:part2grid");
-    if (function[1] + function[2] + function[3]) 
-      memory->create(part2grid_6,nmax,3,"pppm/disp:part2grid_6");
-  }
-
-
-  energy = 0.0;
-  energy_1 = 0.0;
-  energy_6 = 0.0;
-  if (vflag) for (i = 0; i < 6; i++) virial_6[i] = virial_1[i] = 0.0;
-
-  // find grid points for all my particles
-  // distribute partcles' charges/dispersion coefficients on the grid
-  // communication between processors and remapping two fft
-  // Solution of poissons equation in k-space and backtransformation
-  // communication between processors
-  // calculation of forces
-
-  if (function[0]) {
-
-    //perfrom calculations for coulomb interactions only
-
-    particle_map_c(delxinv, delyinv, delzinv, shift, part2grid, nupper, nlower,
-                 nxlo_out, nylo_out, nzlo_out, nxhi_out, nyhi_out, nzhi_out);
-
-    make_rho_c();
-
-    cg->reverse_comm(this,REVERSE_RHO);
- 
-    brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
-	      density_brick, density_fft, work1,remap); 
- 
-    if (differentiation_flag == 1) {
-
-      poisson_ad(work1, work2, density_fft, fft1, fft2,
-                 nx_pppm, ny_pppm, nz_pppm, nfft,
-                 nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft,
-                 nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
-                 energy_1, greensfn, 
-                 virial_1, vg,vg2,
-                 u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);
-
-      cg->forward_comm(this,FORWARD_AD);
-
-      fieldforce_c_ad(); 
-
-      if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM);
-
-    } else {
-      poisson_ik(work1, work2, density_fft, fft1, fft2,
-                 nx_pppm, ny_pppm, nz_pppm, nfft,
-                 nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft,
-                 nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
-                 energy_1, greensfn, 
-	         fkx, fky, fkz,fkx2, fky2, fkz2,
-                 vdx_brick, vdy_brick, vdz_brick, virial_1, vg,vg2,
-                 u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);
-
-      cg->forward_comm(this, FORWARD_IK);
-
-      fieldforce_c_ik(); 
-
-      if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM);
-    }
-    if (evflag_atom) fieldforce_c_peratom();
-  }
-
-  if (function[1]) {
-    //perfrom calculations for geometric mixing
-    particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
-                 nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
-    make_rho_g();
-
-
-    cg_6->reverse_comm(this, REVERSE_RHO_G);
-
-    brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
-	      density_brick_g, density_fft_g, work1_6,remap_6);
- 
-    if (differentiation_flag == 1) {
-
-      poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
-                 nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
-                 nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
-                 nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
-                 energy_6, greensfn_6, 
-                 virial_6, vg_6, vg2_6,
-                 u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
-
-      cg_6->forward_comm(this,FORWARD_AD_G);
-
-      fieldforce_g_ad();
-
-      if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G);
-
-    } else {
-      poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
-                 nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
-                 nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
-                 nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
-                 energy_6, greensfn_6, 
-	         fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6,
-                 vdx_brick_g, vdy_brick_g, vdz_brick_g, virial_6, vg_6, vg2_6,
-                 u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
- 
-      cg_6->forward_comm(this,FORWARD_IK_G);
- 
-      fieldforce_g_ik();
-
-
-      if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G);
-    }
-    if (evflag_atom) fieldforce_g_peratom();
-  }
-
-  if (function[2]) {
-    //perform calculations for arithmetic mixing
-    particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
-                 nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
-    make_rho_a();
-
-    cg_6->reverse_comm(this, REVERSE_RHO_A);
-
-    brick2fft_a();
-
-    if ( differentiation_flag == 1) {
-
-      poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
-                 nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
-                 nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
-                 nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
-                 energy_6, greensfn_6, 
-                 virial_6, vg_6, vg2_6,
-                 u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3);
-      poisson_2s_ad(density_fft_a0, density_fft_a6,
-                    u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0,
-                    u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6);
-      poisson_2s_ad(density_fft_a1, density_fft_a5,
-                    u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1,
-                    u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5);
-      poisson_2s_ad(density_fft_a2, density_fft_a4,
-                    u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2,
-                    u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);
-
-      cg_6->forward_comm(this, FORWARD_AD_A);
-
-      fieldforce_a_ad();
-
-      if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A);
-
-    }  else {
-    
-      poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
-                 nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
-                 nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
-                 nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
-                 energy_6, greensfn_6, 
-	         fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6,
-                 vdx_brick_a3, vdy_brick_a3, vdz_brick_a3, virial_6, vg_6, vg2_6,
-                 u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3);
-      poisson_2s_ik(density_fft_a0, density_fft_a6,
-                    vdx_brick_a0, vdy_brick_a0, vdz_brick_a0,
-                    vdx_brick_a6, vdy_brick_a6, vdz_brick_a6,
-                    u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0,
-                    u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6);
-      poisson_2s_ik(density_fft_a1, density_fft_a5,
-                    vdx_brick_a1, vdy_brick_a1, vdz_brick_a1,
-                    vdx_brick_a5, vdy_brick_a5, vdz_brick_a5,
-                    u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1,
-                    u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5);
-      poisson_2s_ik(density_fft_a2, density_fft_a4,
-                    vdx_brick_a2, vdy_brick_a2, vdz_brick_a2,
-                    vdx_brick_a4, vdy_brick_a4, vdz_brick_a4,
-                    u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2,
-                    u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);
-
-      cg_6->forward_comm(this, FORWARD_IK_A);
-
-      fieldforce_a_ik();
-
-      if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A);
-    }
-    if (evflag_atom) fieldforce_a_peratom();
-  }
-
-  if (function[3]) {
-    //perfrom calculations if no mixing rule applies
-    particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
-                 nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
-
-    make_rho_none();
-
-    cg_6->reverse_comm(this, REVERSE_RHO_NONE);
-
-    brick2fft_none();
-
-    if (differentiation_flag == 1) {
-
-      int n = 0;
-      for (int k = 0; k<nsplit_alloc/2; k++) {
-        poisson_none_ad(n,n+1,density_fft_none[n],density_fft_none[n+1],
-                        u_brick_none[n],u_brick_none[n+1],
-                        v0_brick_none, v1_brick_none, v2_brick_none,
-                        v3_brick_none, v4_brick_none, v5_brick_none);
-        n += 2;
-      }
-
-      cg_6->forward_comm(this,FORWARD_AD_NONE);
-
-      fieldforce_none_ad();
-
-      if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_NONE);
-
-    } else {
-      int n = 0;
-      for (int k = 0; k<nsplit_alloc/2; k++) {
-
-        poisson_none_ik(n,n+1,density_fft_none[n], density_fft_none[n+1],
-                        vdx_brick_none[n], vdy_brick_none[n], vdz_brick_none[n],
-                        vdx_brick_none[n+1], vdy_brick_none[n+1], vdz_brick_none[n+1],
-                        u_brick_none, v0_brick_none, v1_brick_none, v2_brick_none,
-                        v3_brick_none, v4_brick_none, v5_brick_none);
-        n += 2;
-      }
-
-      cg_6->forward_comm(this,FORWARD_IK_NONE);
-
-      fieldforce_none_ik();
-
-
-      if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_NONE);
-    }
-    if (evflag_atom) fieldforce_none_peratom();
-  }
-
-  // sum energy across procs and add in volume-dependent term
-
-  const double qscale = force->qqrd2e * scale;
-  if (eflag_global) {
-    double energy_all;
-    MPI_Allreduce(&energy_1,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
-    energy_1 = energy_all;
-    MPI_Allreduce(&energy_6,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
-    energy_6 = energy_all;
-   
-    energy_1 *= 0.5*volume;
-    energy_6 *= 0.5*volume;
-    
-    energy_1 -= g_ewald*qsqsum/MY_PIS +
-      MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
-    energy_6 += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij +
-      1.0/12.0*pow(g_ewald_6,6)*csum;
-    energy_1 *= qscale;
-  }
-
-  // sum virial across procs
-
-  if (vflag_global) {
-    double virial_all[6];
-    MPI_Allreduce(virial_1,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
-    for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
-    MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
-    for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i];
-    if (function[1]+function[2]+function[3]){
-      double a =  MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij;
-      virial[0] -= a;
-      virial[1] -= a;
-      virial[2] -= a;
-    }
-  }
-
-  if (eflag_atom) {
-    if (function[0]) {
-      double *q = atom->q;
-      for (i = 0; i < atom->nlocal; i++) {
-        eatom[i] -= qscale*g_ewald*q[i]*q[i]/MY_PIS + qscale*MY_PI2*q[i]*qsum / (g_ewald*g_ewald*volume); //coulomb self energy correction
-      }
-    }
-    if (function[1] + function[2] + function[3]) {
-      int tmp;
-      for (i = 0; i < atom->nlocal; i++) {
-        tmp = atom->type[i];
-        eatom[i] += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp] +
-                      1.0/12.0*pow(g_ewald_6,6)*cii[tmp];
-      }
-    }
-  }
-            
-  if (vflag_atom) {
-    if (function[1] + function[2] + function[3]) {
-      int tmp;
-      for (i = 0; i < atom->nlocal; i++) {
-        tmp = atom->type[i];
-        for (int n = 0; n < 3; n++) vatom[i][n] -= MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp]; //dispersion self virial correction
-      }
-    }
-  }
-
-
-  // 2d slab correction
-
-  if (slabflag) slabcorr(eflag);
-  if (function[0]) energy += energy_1;
-  if (function[1] + function[2] + function[3]) energy += energy_6;
-
-  // convert atoms back from lamda to box coords
-  
-  if (triclinic) domain->lamda2x(atom->nlocal);
-}
-
-/* ----------------------------------------------------------------------
-   initialize coefficients needed for the dispersion density on the grids
-------------------------------------------------------------------------- */
-
-void PPPMDisp::init_coeffs()				// local pair coeffs
-{
-  int tmp;
-  int n = atom->ntypes;
-  int converged;
-  delete [] B;
-  if (function[3] + function[2]) {                     // no mixing rule or arithmetic
-    if (function[2] && me == 0) {
-      if (screen) fprintf(screen,"  Optimizing splitting of Dispersion coefficients\n");
-      if (logfile) fprintf(logfile,"  Optimizing splitting of Dispersion coefficients\n");
-    }
-    // get dispersion coefficients
-    double **b = (double **) force->pair->extract("B",tmp);
-    // allocate data for eigenvalue decomposition
-    double **A;
-    double **Q;
-    memory->create(A,n,n,"pppm/disp:A");
-    memory->create(Q,n,n,"pppm/disp:Q");
-    // fill coefficients to matrix a
-    for (int i = 1; i <= n; i++)
-      for (int j = 1; j <= n; j++)
-        A[i-1][j-1] = b[i][j];
-    // transform q to a unity matrix
-    for (int i = 0; i < n; i++)
-      for (int j = 0; j < n; j++)
-        Q[i][j] = 0.0;
-    for (int i = 0; i < n; i++)
-      Q[i][i] = 1.0;
-    // perfrom eigenvalue decomposition with QR algorithm
-    converged = qr_alg(A,Q,n);
-    if (function[3] && !converged) {
-      error->all(FLERR,"Matrix factorization to split dispersion coefficients failed");
-    }
-    // determine number of used eigenvalues 
-    //   based on maximum allowed number or cutoff criterion
-    //   sort eigenvalues according to their size with bubble sort
-    double t;
-    for (int i = 0; i < n; i++) {
-      for (int j = 0; j < n-1-i; j++) {
-        if (fabs(A[j][j]) < fabs(A[j+1][j+1])) {
-          t = A[j][j];
-          A[j][j] = A[j+1][j+1];
-          A[j+1][j+1] = t;
-          for (int k = 0; k < n; k++) {
-	    t = Q[k][j];
-	    Q[k][j] = Q[k][j+1];
-            Q[k][j+1] = t;
-          }
-        }
-      }
-    }
-
-    //   check which eigenvalue is the first that is smaller
-    //   than a specified tolerance
-    //   check how many are maximum allowed by the user
-    double amax = fabs(A[0][0]);
-    double acrit = amax*splittol;
-    double bmax = 0;
-    double err = 0;
-    nsplit = 0;
-    for (int i = 0; i < n; i++) {
-      if (fabs(A[i][i]) > acrit) nsplit++;
-      else {
-        bmax = fabs(A[i][i]);
-        break;
-      }
-    }
-
-    err =  bmax/amax;
-    if (err > 1.0e-4) {
-      char str[128];
-      sprintf(str,"Error in splitting of dispersion coeffs is estimated %g",err);
-      error->warning(FLERR, str);
-    }
-    // set B
-    B = new double[nsplit*n+nsplit];
-    for (int i = 0; i< nsplit; i++) {
-      B[i] = A[i][i];
-      for (int j = 0; j < n; j++) {
-        B[nsplit*(j+1) + i] = Q[j][i];
-      }
-    }
-
-    nsplit_alloc = nsplit;
-    if (nsplit%2 == 1) nsplit_alloc = nsplit + 1;
-    // check if the function should preferably be [1] or [2] or [3]
-    if (nsplit == 1) {
-      delete [] B;
-      function[3] = 0;
-      function[2] = 0;
-      function[1] = 1;
-      if (me == 0) {
-        if (screen) fprintf(screen,"  Using geometric mixing for reciprocal space\n");
-        if (logfile) fprintf(logfile,"  Using geometric mixing for reciprocal space\n");
-      }
-    }
-    if (function[2] && nsplit <= 6) {
-      if (me == 0) {
-        if (screen) fprintf(screen,"  Using %d instead of 7 structure factors\n",nsplit);
-        if (logfile) fprintf(logfile,"  Using %d instead of 7 structure factors\n",nsplit);
-      }
-      function[3] = 1;
-      function[2] = 0;
-    }
-    if (function[2] && (nsplit > 6)) {
-      if (me == 0) {
-        if (screen) fprintf(screen,"  Using 7 structure factors\n");
-        if (logfile) fprintf(logfile,"  Using 7 structure factors\n");
-      }
-      delete [] B;
-    }
-    if (function[3]) {
-      if (me == 0) {
-        if (screen) fprintf(screen,"  Using %d structure factors\n",nsplit);
-        if (logfile) fprintf(logfile,"  Using %d structure factors\n",nsplit);
-      }
-      if (nsplit > 9) error->warning(FLERR, "Simulations might be very slow because of large number of structure factors");
-    }
-
-    memory->destroy(A);
-    memory->destroy(Q);
-  }
-  if (function[1]) {					// geometric 1/r^6
-    double **b = (double **) force->pair->extract("B",tmp);
-    B = new double[n+1];
-    for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i]));
-  }
-  if (function[2]) {					// arithmetic 1/r^6
-    //cannot use epsilon, because this has not been set yet
-    double **epsilon = (double **) force->pair->extract("epsilon",tmp);  
-    //cannot use sigma, because this has not been set yet
-    double **sigma = (double **) force->pair->extract("sigma",tmp);  
-    if (!(epsilon&&sigma))
-      error->all(FLERR,"Epsilon or sigma reference not set by pair style in PPPMDisp");
-    double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7];
-    double c[7] = {
-      1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0};
-    for (int i=0; i<=n; ++i) {
-      eps_i = sqrt(epsilon[i][i]);
-      sigma_i = sigma[i][i];
-      sigma_n = 1.0;
-      for (int j=0; j<7; ++j) {
-	*(bi++) = sigma_n*eps_i*c[j]*0.25;
-        sigma_n *= sigma_i;
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   Eigenvalue decomposition of a real, symmetric matrix with the QR
-   method (includes transpformation to Tridiagonal Matrix + Wilkinson
-   shift)
-------------------------------------------------------------------------- */
-
-int PPPMDisp::qr_alg(double **A, double **Q, int n)
-{
-  int converged = 0;
-  double an1, an, bn1, d, mue;
-  // allocate some memory for the required operations
-  double **A0,**Qi,**C,**D,**E;
-  // make a copy of A for convergence check
-  memory->create(A0,n,n,"pppm/disp:A0");
-  for (int i = 0; i < n; i++)
-    for (int j = 0; j < n; j++)
-      A0[i][j] = A[i][j];
-
-  // allocate an auxiliary matrix Qi
-  memory->create(Qi,n,n,"pppm/disp:Qi");
-
-  // alllocate an auxillary matrices for the matrix multiplication
-  memory->create(C,n,n,"pppm/disp:C");
-  memory->create(D,n,n,"pppm/disp:D");
-  memory->create(E,n,n,"pppm/disp:E");
-
-  // transform Matrix A to Tridiagonal form
-  hessenberg(A,Q,n);
-
-  // start loop for the matrix factorization
-  int count = 0;
-  int countmax = 100000;
-  while (1) {
-    // make a Wilkinson shift
-    an1 = A[n-2][n-2];
-    an = A[n-1][n-1];
-    bn1 = A[n-2][n-1];
-    d = (an1-an)/2;
-    mue = an + d - copysign(1.,d)*sqrt(d*d + bn1*bn1);
-    for (int i = 0; i < n; i++) 
-      A[i][i] -= mue;
-
-    // perform a QR factorization for a tridiagonal matrix A
-    qr_tri(Qi,A,n);
-
-    // update the matrices
-    mmult(A,Qi,C,n);
-    mmult(Q,Qi,C,n);
-
-    // backward Wilkinson shift
-    for (int i = 0; i < n; i++)
-      A[i][i] += mue;
-
-    // check the convergence
-    converged = check_convergence(A,Q,A0,C,D,E,n);
-    if (converged) break;
-    count = count + 1;
-    if (count == countmax) break;
-  }
-  
-  // free allocated memory
-  memory->destroy(Qi);
-  memory->destroy(A0);
-  memory->destroy(C);
-  memory->destroy(D);
-  memory->destroy(E);
-  
-  return converged;
-}
-
-/* ----------------------------------------------------------------------
-   Transform a Matrix to Hessenberg form (for symmetric Matrices, the 
-   result will be a tridiagonal matrix)
-------------------------------------------------------------------------- */
-
-void PPPMDisp::hessenberg(double **A, double **Q, int n)
-{
-  double r,a,b,c,s,x1,x2;
-  for (int i = 0; i < n-1; i++) {
-    for (int j = i+2; j < n; j++) {
-      // compute coeffs for the rotation matrix
-      a = A[i+1][i];
-      b = A[j][i];
-      r = sqrt(a*a + b*b);
-      c = a/r;
-      s = b/r;
-      // update the entries of A with multiplication from the left
-      for (int k = 0; k < n; k++) {
-        x1 = A[i+1][k];
-        x2 = A[j][k];
-        A[i+1][k] = c*x1 + s*x2;
-        A[j][k] = -s*x1 + c*x2;
-      }
-      // update the entries of A and Q with a multiplication from the right
-      for (int k = 0; k < n; k++) {
-        x1 = A[k][i+1];
-        x2 = A[k][j];
-        A[k][i+1] = c*x1 + s*x2;
-        A[k][j] = -s*x1 + c*x2;
-        x1 = Q[k][i+1];
-        x2 = Q[k][j];
-        Q[k][i+1] = c*x1 + s*x2;
-        Q[k][j] = -s*x1 + c*x2;
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   QR factorization for a tridiagonal matrix; Result of the factorization
-   is stored in A and Qi
-------------------------------------------------------------------------- */
-
-void PPPMDisp::qr_tri(double** Qi,double** A,int n)
-{
-  double r,a,b,c,s,x1,x2;
-  int j,k,k0,kmax;
-  // make Qi a unity matrix
-  for (int i = 0; i < n; i++)
-    for (int j = 0; j < n; j++)
-      Qi[i][j] = 0.0;
-  for (int i = 0; i < n; i++)
-    Qi[i][i] = 1.0;
-  // loop over main diagonal and first of diagonal of A
-  for (int i = 0; i < n-1; i++) {
-    j = i+1;
-    // coefficients of the rotation matrix
-    a = A[i][i];
-    b = A[j][i];
-    r = sqrt(a*a + b*b);
-    c = a/r;
-    s = b/r;
-    // update the entries of A and Q
-    k0 = (i-1>0)?i-1:0;   //min(i-1,0);
-    kmax = (i+3<n)?i+3:n;  //min(i+3,n);
-    for (k = k0; k < kmax; k++) {
-      x1 = A[i][k];
-      x2 = A[j][k];
-      A[i][k] = c*x1 + s*x2;
-      A[j][k] = -s*x1 + c*x2;
-    }
-    for (k = 0; k < n; k++) {
-      x1 = Qi[k][i];
-      x2 = Qi[k][j];
-      Qi[k][i] = c*x1 + s*x2;
-      Qi[k][j] = -s*x1 + c*x2;
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   Multiply two matrices A and B, store the result in A; C provides
-   some memory to store intermediate results
-------------------------------------------------------------------------- */
-
-void PPPMDisp::mmult(double** A, double** B, double** C, int n)
-{
-  for (int i = 0; i < n; i++)
-    for (int j = 0; j < n; j++)
-      C[i][j] = 0.0;
-
-  // perform matrix multiplication 
-  for (int i = 0; i < n; i++)
-    for (int j = 0; j < n; j++)
-      for (int k = 0; k < n; k++)
-        C[i][j] += A[i][k] * B[k][j];
-  // copy the result back to matrix A
-  for (int i = 0; i < n; i++)
-    for (int j = 0; j < n; j++)
-      A[i][j] = C[i][j];
-}
-
-/* ----------------------------------------------------------------------
-   Check if the factorization has converged by comparing all elements of the
-   original matrix and the new matrix
-------------------------------------------------------------------------- */
-
-int PPPMDisp::check_convergence(double** A,double** Q,double** A0,
-                                double** C,double** D,double** E,int n)
-{
-  double eps = 1.0e-8;
-  int converged = 1;
-  double epsmax = -1;
-  double Bmax = 0.0;
-  double diff;
-  // get the largest eigenvalue of the original matrix
-  for (int i = 0; i < n; i++)
-    for (int j = 0; j < n; j++)
-      Bmax = (Bmax>A0[i][j])?Bmax:A0[i][j];  //max(Bmax,A0[i][j]);
-  double epsabs = eps*Bmax;
-  
-  // reconstruct the original matrix
-  // store the diagonal elements in D
-  for (int i = 0; i < n; i++)
-    for (int j = 0; j < n; j++)
-      D[i][j] = 0.0;
-  for (int i = 0; i < n; i++)
-    D[i][i] = A[i][i];
-  // store matrix Q in E
-  for (int i = 0; i < n; i++)
-    for (int j = 0; j < n; j++)
-      E[i][j] = Q[i][j];
-  // E = Q*A
-  mmult(E,D,C,n);
-  // store transpose of Q in D
-  for (int i = 0; i < n; i++)
-    for (int j = 0; j < n; j++)
-      D[i][j] = Q[j][i];
-  // E = Q*A*Q.t
-  mmult(E,D,C,n);
-
-  //compare the original matrix and the final matrix
-  for (int i = 0; i < n; i++) {
-    for (int j = 0; j < n; j++) {
-      diff = A0[i][j] - E[i][j];
-      epsmax = (epsmax>fabs(diff))?epsmax:fabs(diff);//max(epsmax,fabs(diff));
-    }
-  }
-  if (epsmax > epsabs) converged = 0;
-  return converged;
-}
-
-/* ----------------------------------------------------------------------
-   allocate memory that depends on # of K-vectors and order 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::allocate()
-{
-
-  int (*procneigh)[2] = comm->procneigh;
-
-  if (function[0]) {
-    memory->create(work1,2*nfft_both,"pppm/disp:work1");
-    memory->create(work2,2*nfft_both,"pppm/disp:work2");
-
-    memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm/disp:fkx");
-    memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm/disp:fky");
-    memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm/disp:fkz");
-
-    memory->create1d_offset(fkx2,nxlo_fft,nxhi_fft,"pppm/disp:fkx2");
-    memory->create1d_offset(fky2,nylo_fft,nyhi_fft,"pppm/disp:fky2");
-    memory->create1d_offset(fkz2,nzlo_fft,nzhi_fft,"pppm/disp:fkz2");
-
-
-    memory->create(gf_b,order,"pppm/disp:gf_b");
-    memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm/disp:rho1d");
-    memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm/disp:rho_coeff");
-    memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm/disp:rho1d");
-    memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,"pppm/disp:drho_coeff");
-
-    memory->create(greensfn,nfft_both,"pppm/disp:greensfn");
-    memory->create(vg,nfft_both,6,"pppm/disp:vg");
-    memory->create(vg2,nfft_both,3,"pppm/disp:vg2");
-
-    memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-  			    nxlo_out,nxhi_out,"pppm/disp:density_brick");
-    if ( differentiation_flag == 1) {
-      memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-  		  	      nxlo_out,nxhi_out,"pppm/disp:u_brick");
-      memory->create(sf_precoeff1,nfft_both,"pppm/disp:sf_precoeff1");
-      memory->create(sf_precoeff2,nfft_both,"pppm/disp:sf_precoeff2");
-      memory->create(sf_precoeff3,nfft_both,"pppm/disp:sf_precoeff3");
-      memory->create(sf_precoeff4,nfft_both,"pppm/disp:sf_precoeff4");
-      memory->create(sf_precoeff5,nfft_both,"pppm/disp:sf_precoeff5");
-      memory->create(sf_precoeff6,nfft_both,"pppm/disp:sf_precoeff6");
-
-    } else {
-      memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-  			      nxlo_out,nxhi_out,"pppm/disp:vdx_brick");
-      memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-			      nxlo_out,nxhi_out,"pppm/disp:vdy_brick");
-      memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-			      nxlo_out,nxhi_out,"pppm/disp:vdz_brick");
-    }
-    memory->create(density_fft,nfft_both,"pppm/disp:density_fft");
-
-    int tmp;
-
-    fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
-		     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
-		     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
-		     0,0,&tmp);
-
-    fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
-		     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
-		     nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-		     0,0,&tmp);
-
-    remap = new Remap(lmp,world,
-		      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-		      nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
-		      1,0,0,FFT_PRECISION);
-
-  // create ghost grid object for rho and electric field communication
-
-  if (differentiation_flag == 1)
-    cg = new CommGrid(lmp,world,1,1,
-                      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                      nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
-                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-  else
-    cg = new CommGrid(lmp,world,3,1,
-                      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                      nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
-                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-  }
-
-  if (function[1]) {
-    memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
-    memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
-
-    memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
-    memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
-    memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
-
-    memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
-    memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
-    memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
-
-    memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
-    memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
-    memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
-    memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
-    memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
-
-    memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
-    memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
-    memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
-
-    memory->create3d_offset(density_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_g");
-    if ( differentiation_flag == 1) {
-      memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g");
-
-      memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
-      memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
-      memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
-      memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
-      memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
-      memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
-
-    }  else {
-      memory->create3d_offset(vdx_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_g");
-      memory->create3d_offset(vdy_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_g");
-      memory->create3d_offset(vdz_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_g");
-    }
-    memory->create(density_fft_g,nfft_both_6,"pppm/disp:density_fft_g");
-
-
-    int tmp;
-
-    fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
-		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
-		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
-		     0,0,&tmp);
-
-    fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
-		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
-		     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-		     0,0,&tmp);
-
-    remap_6 = new Remap(lmp,world,
-		      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-		      nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
-		      1,0,0,FFT_PRECISION);
-
-    // create ghost grid object for rho and electric field communication
-
-    if (differentiation_flag == 1)
-      cg_6 = new CommGrid(lmp,world,1,1,
-                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
-                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-    else
-      cg_6 = new CommGrid(lmp,world,3,1,
-                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
-                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-  }
-
-  if (function[2]) {
-    memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
-    memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
-
-    memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
-    memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
-    memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
-
-    memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
-    memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
-    memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
-
-    memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
-    memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
-    memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
-    memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
-    memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
-
-    memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
-    memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
-    memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
-
-    memory->create3d_offset(density_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a0");
-    memory->create3d_offset(density_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a1");
-    memory->create3d_offset(density_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a2");
-    memory->create3d_offset(density_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a3");
-    memory->create3d_offset(density_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a4");
-    memory->create3d_offset(density_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a5");
-    memory->create3d_offset(density_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a6");
-
-    memory->create(density_fft_a0,nfft_both_6,"pppm/disp:density_fft_a0");
-    memory->create(density_fft_a1,nfft_both_6,"pppm/disp:density_fft_a1");
-    memory->create(density_fft_a2,nfft_both_6,"pppm/disp:density_fft_a2");
-    memory->create(density_fft_a3,nfft_both_6,"pppm/disp:density_fft_a3");
-    memory->create(density_fft_a4,nfft_both_6,"pppm/disp:density_fft_a4");
-    memory->create(density_fft_a5,nfft_both_6,"pppm/disp:density_fft_a5");
-    memory->create(density_fft_a6,nfft_both_6,"pppm/disp:density_fft_a6");
-
-
-    if ( differentiation_flag == 1 ) {
-      memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0");
-      memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1");
-      memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2");
-      memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3");
-      memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4");
-      memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5");
-      memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6");
-
-      memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
-      memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
-      memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
-      memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
-      memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
-      memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
-
-    } else {
-
-      memory->create3d_offset(vdx_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a0");
-      memory->create3d_offset(vdy_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a0");
-      memory->create3d_offset(vdz_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a0");
-
-      memory->create3d_offset(vdx_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a1");
-      memory->create3d_offset(vdy_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a1");
-      memory->create3d_offset(vdz_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a1");
-
-      memory->create3d_offset(vdx_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a2");
-      memory->create3d_offset(vdy_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a2");
-      memory->create3d_offset(vdz_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a2");
-
-      memory->create3d_offset(vdx_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a3");
-      memory->create3d_offset(vdy_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a3");
-      memory->create3d_offset(vdz_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a3");
-
-      memory->create3d_offset(vdx_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a4");
-      memory->create3d_offset(vdy_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a4");
-      memory->create3d_offset(vdz_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a4");
-
-      memory->create3d_offset(vdx_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a5");
-      memory->create3d_offset(vdy_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a5");
-      memory->create3d_offset(vdz_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a5");
-
-      memory->create3d_offset(vdx_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a6");
-      memory->create3d_offset(vdy_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a6");
-      memory->create3d_offset(vdz_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a6");
-    }
-
-
-
-    int tmp;
-
-    fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
-		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
-		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
-		     0,0,&tmp);
-
-    fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
-		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
-		     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-		     0,0,&tmp);
-
-    remap_6 = new Remap(lmp,world,
-		      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-		      nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
-		      1,0,0,FFT_PRECISION);
-
-    // create ghost grid object for rho and electric field communication
-
-
-    if (differentiation_flag == 1)
-      cg_6 = new CommGrid(lmp,world,7,7,
-                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
-                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-    else
-      cg_6 = new CommGrid(lmp,world,21,7,
-                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
-                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-  }  
-
-  if (function[3]) {
-    memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
-    memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
-
-    memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
-    memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
-    memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
-
-    memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
-    memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
-    memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
-
-    memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
-    memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
-    memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
-    memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
-    memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
-
-    memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
-    memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
-    memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
-
-    memory->create4d_offset(density_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_none");
-    if ( differentiation_flag == 1) {
-      memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none");
-
-      memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
-      memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
-      memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
-      memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
-      memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
-      memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
-
-    }  else {
-      memory->create4d_offset(vdx_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_none");
-      memory->create4d_offset(vdy_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_none");
-      memory->create4d_offset(vdz_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_none");
-    }
-    memory->create(density_fft_none,nsplit_alloc,nfft_both_6,"pppm/disp:density_fft_none");
-
-
-    int tmp;
-
-    fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
-		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
-		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
-		     0,0,&tmp);
-
-    fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
-		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
-		     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-		     0,0,&tmp);
-
-    remap_6 = new Remap(lmp,world,
-		      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-		      nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
-		      1,0,0,FFT_PRECISION);
-
-    // create ghost grid object for rho and electric field communication
-
-    if (differentiation_flag == 1)
-      cg_6 = new CommGrid(lmp,world,nsplit_alloc,nsplit_alloc,
-                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
-                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-    else
-      cg_6 = new CommGrid(lmp,world,3*nsplit_alloc,nsplit_alloc,
-                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
-                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-  }
-
-}
-
-/* ----------------------------------------------------------------------
-   allocate memory that depends on # of K-vectors and order
-   for per atom calculations 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::allocate_peratom()
-{
-
-  int (*procneigh)[2] = comm->procneigh;
-
-  if (function[0]) {
-
-    if (differentiation_flag != 1)
-      memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-    	                      nxlo_out,nxhi_out,"pppm/disp:u_brick");
-
-    memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-			    nxlo_out,nxhi_out,"pppm/disp:v0_brick");
-    memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-  			    nxlo_out,nxhi_out,"pppm/disp:v1_brick");
-    memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-  			    nxlo_out,nxhi_out,"pppm/disp:v2_brick");
-    memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-  			    nxlo_out,nxhi_out,"pppm/disp:v3_brick");
-    memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-  			    nxlo_out,nxhi_out,"pppm/disp:v4_brick");
-    memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-  			    nxlo_out,nxhi_out,"pppm/disp:v5_brick");
-
-    // create ghost grid object for rho and electric field communication
-
-    if (differentiation_flag == 1)
-      cg_peratom =
-        new CommGrid(lmp,world,6,1,
-                     nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                     nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
-                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-    else
-      cg_peratom =
-        new CommGrid(lmp,world,7,1,
-                     nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                     nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
-                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-
-  }
-
-
-  if (function[1]) {
-
-    if ( differentiation_flag != 1 )
-      memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g");
-
-    memory->create3d_offset(v0_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_g");
-    memory->create3d_offset(v1_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_g");
-    memory->create3d_offset(v2_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_g");
-    memory->create3d_offset(v3_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_g");
-    memory->create3d_offset(v4_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_g");
-    memory->create3d_offset(v5_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_g");
-
-    // create ghost grid object for rho and electric field communication
-
-    if (differentiation_flag == 1)
-      cg_peratom_6 =
-        new CommGrid(lmp,world,6,1,
-                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
-                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-    else
-      cg_peratom_6 =
-        new CommGrid(lmp,world,7,1,
-                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
-                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-
-  }
-
-  if (function[2]) {
-   
-    if ( differentiation_flag != 1 ) {
-      memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0");
-      memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1");
-      memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2");
-      memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3");
-      memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4");
-      memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5");
-      memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6");
-    }
-
-    memory->create3d_offset(v0_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a0");
-    memory->create3d_offset(v1_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-    	                        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a0");
-    memory->create3d_offset(v2_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a0");
-    memory->create3d_offset(v3_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a0");
-    memory->create3d_offset(v4_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a0");
-    memory->create3d_offset(v5_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a0");
-
-    memory->create3d_offset(v0_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a1");
-    memory->create3d_offset(v1_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-   	                        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a1");
-    memory->create3d_offset(v2_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a1");
-    memory->create3d_offset(v3_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a1");
-    memory->create3d_offset(v4_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  	  	                nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a1");
-    memory->create3d_offset(v5_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a1");
-
-    memory->create3d_offset(v0_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a2");
-    memory->create3d_offset(v1_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a2");
-    memory->create3d_offset(v2_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a2");
-    memory->create3d_offset(v3_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a2");
-    memory->create3d_offset(v4_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a2");
-    memory->create3d_offset(v5_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a2");
-
-    memory->create3d_offset(v0_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a3");
-    memory->create3d_offset(v1_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a3");
-    memory->create3d_offset(v2_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a3");
-    memory->create3d_offset(v3_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  	  	                nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a3");
-    memory->create3d_offset(v4_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a3");
-    memory->create3d_offset(v5_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a3");
-
-    memory->create3d_offset(v0_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a4");
-    memory->create3d_offset(v1_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a4");
-    memory->create3d_offset(v2_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a4");
-    memory->create3d_offset(v3_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a4");
-    memory->create3d_offset(v4_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a4");
-    memory->create3d_offset(v5_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a4");
-
-    memory->create3d_offset(v0_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a5");
-    memory->create3d_offset(v1_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a5");
-    memory->create3d_offset(v2_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a5");
-    memory->create3d_offset(v3_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a5");
-    memory->create3d_offset(v4_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a5");
-    memory->create3d_offset(v5_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a5");
-
-    memory->create3d_offset(v0_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  	  	                nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a6");
-    memory->create3d_offset(v1_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a6");
-    memory->create3d_offset(v2_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a6");
-    memory->create3d_offset(v3_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a6");
-    memory->create3d_offset(v4_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a6");
-    memory->create3d_offset(v5_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a6");
-
-    // create ghost grid object for rho and electric field communication
-
-    if (differentiation_flag == 1)
-      cg_peratom_6 =
-        new CommGrid(lmp,world,42,1,
-                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
-                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-    else
-      cg_peratom_6 =
-        new CommGrid(lmp,world,49,1,
-                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
-                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-
-  }  
-
-  if (function[3]) {
-
-    if ( differentiation_flag != 1 )
-      memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none");
-
-    memory->create4d_offset(v0_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_none");
-    memory->create4d_offset(v1_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_none");
-    memory->create4d_offset(v2_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_none");
-    memory->create4d_offset(v3_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_none");
-    memory->create4d_offset(v4_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_none");
-    memory->create4d_offset(v5_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
-  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_none");
-
-    // create ghost grid object for rho and electric field communication
-
-    if (differentiation_flag == 1)
-      cg_peratom_6 =
-        new CommGrid(lmp,world,6*nsplit_alloc,1,
-                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
-                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-    else
-      cg_peratom_6 =
-        new CommGrid(lmp,world,7*nsplit_alloc,1,
-                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
-                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
-                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
-                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
-
-  }
-}
-
-
-/* ----------------------------------------------------------------------
-   deallocate memory that depends on # of K-vectors and order 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::deallocate()
-{
-  memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy(density_fft);
-  density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
-  density_fft = NULL;
-
-  memory->destroy3d_offset(density_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdx_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdy_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdz_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy(density_fft_g);
-  density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL;
-  density_fft_g = NULL;
-
-  memory->destroy3d_offset(density_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdx_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdy_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdz_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy(density_fft_a0);
-  density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL;
-  density_fft_a0 = NULL;
-
-  memory->destroy3d_offset(density_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdx_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdy_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdz_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy(density_fft_a1);
-  density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL;
-  density_fft_a1 = NULL;
-
-  memory->destroy3d_offset(density_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdx_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdy_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdz_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy(density_fft_a2);
-  density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL;
-  density_fft_a2 = NULL;
-
-  memory->destroy3d_offset(density_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdx_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdy_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdz_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy(density_fft_a3);
-  density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL;
-  density_fft_a3 = NULL;
- 
-  memory->destroy3d_offset(density_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdx_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdy_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdz_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy(density_fft_a4);
-  density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL;
-  density_fft_a4 = NULL;
-
-  memory->destroy3d_offset(density_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdx_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdy_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdz_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy(density_fft_a5);
-  density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL;
-  density_fft_a5 = NULL;
-
-  memory->destroy3d_offset(density_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdx_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdy_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy3d_offset(vdz_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy(density_fft_a6);
-  density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL;
-  density_fft_a6 = NULL;
-
-  memory->destroy4d_offset(density_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy4d_offset(vdx_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy4d_offset(vdy_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy4d_offset(vdz_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
-  memory->destroy(density_fft_none);
-  density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL;
-  density_fft_none = NULL;
-
-  memory->destroy(sf_precoeff1);
-  memory->destroy(sf_precoeff2);
-  memory->destroy(sf_precoeff3);
-  memory->destroy(sf_precoeff4);
-  memory->destroy(sf_precoeff5);
-  memory->destroy(sf_precoeff6);
-  sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL;
-
-  memory->destroy(sf_precoeff1_6);
-  memory->destroy(sf_precoeff2_6);
-  memory->destroy(sf_precoeff3_6);
-  memory->destroy(sf_precoeff4_6);
-  memory->destroy(sf_precoeff5_6);
-  memory->destroy(sf_precoeff6_6);
-  sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = sf_precoeff5_6 = sf_precoeff6_6 = NULL;
-
-  memory->destroy(greensfn);
-  memory->destroy(greensfn_6);
-  memory->destroy(work1);
-  memory->destroy(work2);
-  memory->destroy(work1_6);
-  memory->destroy(work2_6);
-  memory->destroy(vg);
-  memory->destroy(vg2);
-  memory->destroy(vg_6);
-  memory->destroy(vg2_6);
-  greensfn = greensfn_6 = NULL;
-  work1 = work2 = work1_6 = work2_6 = NULL;
-  vg = vg2 = vg_6 = vg2_6 = NULL;
-
-  memory->destroy1d_offset(fkx,nxlo_fft);
-  memory->destroy1d_offset(fky,nylo_fft);
-  memory->destroy1d_offset(fkz,nzlo_fft);
-  fkx = fky = fkz = NULL;
-
-  memory->destroy1d_offset(fkx2,nxlo_fft);
-  memory->destroy1d_offset(fky2,nylo_fft);
-  memory->destroy1d_offset(fkz2,nzlo_fft);
-  fkx2 = fky2 = fkz2 = NULL;
-
-  memory->destroy1d_offset(fkx_6,nxlo_fft_6);
-  memory->destroy1d_offset(fky_6,nylo_fft_6);
-  memory->destroy1d_offset(fkz_6,nzlo_fft_6);
-  fkx_6 = fky_6 = fkz_6 = NULL;
-
-  memory->destroy1d_offset(fkx2_6,nxlo_fft_6);
-  memory->destroy1d_offset(fky2_6,nylo_fft_6);
-  memory->destroy1d_offset(fkz2_6,nzlo_fft_6);
-  fkx2_6 = fky2_6 = fkz2_6 = NULL;
-
-
-  memory->destroy(gf_b);
-  memory->destroy2d_offset(rho1d,-order/2);
-  memory->destroy2d_offset(rho_coeff,(1-order)/2);
-  memory->destroy2d_offset(drho1d,-order/2);
-  memory->destroy2d_offset(drho_coeff, (1-order)/2);
-  gf_b = NULL;
-  rho1d = rho_coeff = drho1d = drho_coeff = NULL;
-
-  memory->destroy(gf_b_6);
-  memory->destroy2d_offset(rho1d_6,-order_6/2);
-  memory->destroy2d_offset(rho_coeff_6,(1-order_6)/2);
-  memory->destroy2d_offset(drho1d_6,-order_6/2); 
-  memory->destroy2d_offset(drho_coeff_6,(1-order_6)/2);
-  gf_b_6 = NULL;
-  rho1d_6 = rho_coeff_6 = drho1d_6 = drho_coeff_6 = NULL;
-
-  delete fft1;
-  delete fft2;
-  delete remap;
-  delete cg;
-  fft1 = fft2 = NULL;
-  remap = NULL;
-  cg = NULL;
-
-  delete fft1_6;
-  delete fft2_6;
-  delete remap_6;
-  delete cg_6;
-  fft1_6 = fft2_6 = NULL;
-  remap_6 = NULL;
-  cg_6 = NULL;
-}
-
-
-/* ----------------------------------------------------------------------
-   deallocate memory that depends on # of K-vectors and order
-   for per atom calculations 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::deallocate_peratom()
-{
-  peratom_allocate_flag = 0;
-
-  memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out);
-  memory->destroy3d_offset(v0_brick, nzlo_out, nylo_out, nxlo_out);
-  memory->destroy3d_offset(v1_brick, nzlo_out, nylo_out, nxlo_out);
-  memory->destroy3d_offset(v2_brick, nzlo_out, nylo_out, nxlo_out);
-  memory->destroy3d_offset(v3_brick, nzlo_out, nylo_out, nxlo_out);
-  memory->destroy3d_offset(v4_brick, nzlo_out, nylo_out, nxlo_out);
-  memory->destroy3d_offset(v5_brick, nzlo_out, nylo_out, nxlo_out);
-  u_brick = v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
-
-  memory->destroy3d_offset(u_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v0_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v1_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v2_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v3_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v4_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v5_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = v4_brick_g = v5_brick_g = NULL;
-
-  memory->destroy3d_offset(u_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v0_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v1_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v2_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v3_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v4_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v5_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = v4_brick_a0 = v5_brick_a0 = NULL;
-
-  memory->destroy3d_offset(u_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v0_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v1_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v2_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v3_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v4_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v5_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = v4_brick_a1 = v5_brick_a1 = NULL;
-
-  memory->destroy3d_offset(u_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v0_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v1_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v2_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v3_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v4_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v5_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = v4_brick_a2 = v5_brick_a2 = NULL;
-
-  memory->destroy3d_offset(u_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v0_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v1_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v2_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v3_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v4_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v5_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = v4_brick_a3 = v5_brick_a3 = NULL;
- 
-  memory->destroy3d_offset(u_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v0_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v1_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v2_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v3_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v4_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v5_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = v4_brick_a4 = v5_brick_a4 = NULL;
- 
-  memory->destroy3d_offset(u_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v0_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v1_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v2_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v3_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v4_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v5_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = v4_brick_a5 = v5_brick_a5 = NULL;
-
-  memory->destroy3d_offset(u_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v0_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v1_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v2_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v3_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v4_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy3d_offset(v5_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = v4_brick_a6 = v5_brick_a6 = NULL;
-
-  memory->destroy4d_offset(u_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy4d_offset(v0_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy4d_offset(v1_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy4d_offset(v2_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy4d_offset(v3_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy4d_offset(v4_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  memory->destroy4d_offset(v5_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
-  u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = v4_brick_none = v5_brick_none = NULL;
-
-  delete cg_peratom;
-  delete cg_peratom_6;
-  cg_peratom = cg_peratom_6 = NULL;
-}
-
-/* ----------------------------------------------------------------------
-   set size of FFT grid (nx,ny,nz_pppm) and g_ewald
-   for Coulomb interactions
-------------------------------------------------------------------------- */
-
-void PPPMDisp::set_grid()
-{
-  double q2 = qsqsum * force->qqrd2e;
-
-  // use xprd,yprd,zprd even if triclinic so grid size is the same
-  // adjust z dimension for 2d slab PPPM
-  // 3d PPPM just uses zprd since slab_volfactor = 1.0
-
-  double xprd = domain->xprd;
-  double yprd = domain->yprd;
-  double zprd = domain->zprd;
-  double zprd_slab = zprd*slab_volfactor;
-  
-  // make initial g_ewald estimate
-  // based on desired accuracy and real space cutoff
-  // fluid-occupied volume used to estimate real-space error
-  // zprd used rather than zprd_slab
-
-  double h, h_x,h_y,h_z;
-  bigint natoms = atom->natoms;
-
-  if (!gewaldflag) {
-    g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
-    if (g_ewald >= 1.0)  
-      error->all(FLERR,"KSpace accuracy too large to estimate G vector");
-    g_ewald = sqrt(-log(g_ewald)) / cutoff;
-  } 
-
-  // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
-  // nz_pppm uses extended zprd_slab instead of zprd
-  // reduce it until accuracy target is met
-
-  if (!gridflag) {
-    h = h_x = h_y = h_z = 4.0/g_ewald;  
-    int count = 0;
-    while (1) {
-      
-      // set grid dimension
-      nx_pppm = static_cast<int> (xprd/h_x);
-      ny_pppm = static_cast<int> (yprd/h_y);
-      nz_pppm = static_cast<int> (zprd_slab/h_z);
-
-      if (nx_pppm <= 1) nx_pppm = 2;
-      if (ny_pppm <= 1) ny_pppm = 2;
-      if (nz_pppm <= 1) nz_pppm = 2;
-
-      //set local grid dimension
-      int npey_fft,npez_fft;
-      if (nz_pppm >= nprocs) {
-        npey_fft = 1;
-        npez_fft = nprocs;
-      } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
-
-      int me_y = me % npey_fft;
-      int me_z = me / npey_fft;
-
-      nxlo_fft = 0;
-      nxhi_fft = nx_pppm - 1;
-      nylo_fft = me_y*ny_pppm/npey_fft;
-      nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
-      nzlo_fft = me_z*nz_pppm/npez_fft;
-      nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
-
-      double qopt = compute_qopt();
-   
-      double dfkspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
-
-      count++;
-
-      // break loop if the accuracy has been reached or too many loops have been performed
-      if (dfkspace <= accuracy) break;
-      if (count > 500) error->all(FLERR, "Could not compute grid size for Coulomb interaction");
-      h *= 0.95;
-      h_x = h_y = h_z = h;
-    }
-  }
-  
-  // boost grid size until it is factorable
-
-  while (!factorable(nx_pppm)) nx_pppm++;
-  while (!factorable(ny_pppm)) ny_pppm++;
-  while (!factorable(nz_pppm)) nz_pppm++;
-}
-
-/* ----------------------------------------------------------------------
-   set the FFT parameters 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::set_fft_parameters(int& nx_p,int& ny_p,int& nz_p,
-                                   int& nxlo_f,int& nylo_f,int& nzlo_f,
-                                   int& nxhi_f,int& nyhi_f,int& nzhi_f,
-                                   int& nxlo_i,int& nylo_i,int& nzlo_i,
-                                   int& nxhi_i,int& nyhi_i,int& nzhi_i,
-                                   int& nxlo_o,int& nylo_o,int& nzlo_o,
-                                   int& nxhi_o,int& nyhi_o,int& nzhi_o,
-		                   int& nlow, int& nupp,
-                                   int& ng, int& nf, int& nfb,
-		                   double& sft,double& sftone, int& ord)
-{
-  // global indices of PPPM grid range from 0 to N-1
-  // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
-  //   global PPPM grid that I own without ghost cells
-  // for slab PPPM, assign z grid as if it were not extended
-
-  nxlo_i = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_p);
-  nxhi_i = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_p) - 1;
-
-  nylo_i = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_p);
-  nyhi_i = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_p) - 1;
-
-  nzlo_i = static_cast<int> 
-      (comm->zsplit[comm->myloc[2]] * nz_p/slab_volfactor);
-  nzhi_i = static_cast<int> 
-      (comm->zsplit[comm->myloc[2]+1] * nz_p/slab_volfactor) - 1;
-
-
-  // nlow,nupp = stencil size for mapping particles to PPPM grid
-
-  nlow = -(ord-1)/2;
-  nupp = ord/2;
-
-  // sft values for particle <-> grid mapping
-  // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
-
-  if (ord % 2) sft = OFFSET + 0.5;
-  else sft = OFFSET;
-  if (ord % 2) sftone = 0.0;
-  else sftone = 0.5;
-
-  // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
-  //   global PPPM grid that my particles can contribute charge to
-  // effectively nlo_in,nhi_in + ghost cells
-  // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
-  //           position a particle in my box can be at
-  // dist[3] = particle position bound = subbox + skin/2.0 + qdist
-  //   qdist = offset due to TIP4P fictitious charge
-  //   convert to triclinic if necessary
-  // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
-  // for slab PPPM, assign z grid as if it were not extended
-
-  double *prd,*sublo,*subhi;
-
-  if (triclinic == 0) {
-    prd = domain->prd;
-    boxlo = domain->boxlo;
-    sublo = domain->sublo;
-    subhi = domain->subhi;
-  } else {
-    prd = domain->prd_lamda;
-    boxlo = domain->boxlo_lamda;
-    sublo = domain->sublo_lamda;
-    subhi = domain->subhi_lamda;
-  }
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-
-  double dist[3];
-  double cuthalf = 0.5*neighbor->skin + qdist;
-  if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
-  else {
-    dist[0] = cuthalf/domain->prd[0];
-    dist[1] = cuthalf/domain->prd[1];
-    dist[2] = cuthalf/domain->prd[2];
-  }
-    
-  int nlo,nhi;
-    
-  nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) * 
-                            nx_p/xprd + sft) - OFFSET;
-  nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) * 
-                            nx_p/xprd + sft) - OFFSET;
-  nxlo_o = nlo + nlow;
-  nxhi_o = nhi + nupp;
-
-  nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) * 
-                            ny_p/yprd + sft) - OFFSET;
-  nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) * 
-                            ny_p/yprd + sft) - OFFSET;
-  nylo_o = nlo + nlow;
-  nyhi_o = nhi + nupp;
-
-  nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) * 
-                            nz_p/zprd_slab + sft) - OFFSET;
-  nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) * 
-                            nz_p/zprd_slab + sft) - OFFSET;
-  nzlo_o = nlo + nlow;
-  nzhi_o = nhi + nupp;
-
-  // for slab PPPM, change the grid boundary for processors at +z end
-  //   to include the empty volume between periodically repeating slabs
-  // for slab PPPM, want charge data communicated from -z proc to +z proc,
-  //   but not vice versa, also want field data communicated from +z proc to
-  //   -z proc, but not vice versa
-  // this is accomplished by nzhi_i = nzhi_o on +z end (no ghost cells)
-
-  if (slabflag && (comm->myloc[2] == comm->procgrid[2]-1)) {
-    nzhi_i = nz_p - 1;
-    nzhi_o = nz_p - 1;
-  }
-  
-  // decomposition of FFT mesh
-  // global indices range from 0 to N-1
-  // proc owns entire x-dimension, clump of columns in y,z dimensions
-  // npey_fft,npez_fft = # of procs in y,z dims
-  // if nprocs is small enough, proc can own 1 or more entire xy planes,
-  //   else proc owns 2d sub-blocks of yz plane
-  // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
-  // nlo_fft,nhi_fft = lower/upper limit of the section
-  //   of the global FFT mesh that I own
-
-  int npey_fft,npez_fft;
-  if (nz_p >= nprocs) {
-    npey_fft = 1;
-    npez_fft = nprocs;
-  } else procs2grid2d(nprocs,ny_p,nz_p,&npey_fft,&npez_fft);
-
-  int me_y = me % npey_fft;
-  int me_z = me / npey_fft;
-
-  nxlo_f = 0;
-  nxhi_f = nx_p - 1;
-  nylo_f = me_y*ny_p/npey_fft;
-  nyhi_f = (me_y+1)*ny_p/npey_fft - 1;
-  nzlo_f = me_z*nz_p/npez_fft;
-  nzhi_f = (me_z+1)*nz_p/npez_fft - 1;
-
-  // PPPM grid for this proc, including ghosts
-
-  ng = (nxhi_o-nxlo_o+1) * (nyhi_o-nylo_o+1) *
-    (nzhi_o-nzlo_o+1);
-
-  // FFT arrays on this proc, without ghosts
-  // nfft = FFT points in FFT decomposition on this proc
-  // nfft_brick = FFT points in 3d brick-decomposition on this proc
-  // nfft_both = greater of 2 values
-
-  nf = (nxhi_f-nxlo_f+1) * (nyhi_f-nylo_f+1) *
-    (nzhi_f-nzlo_f+1);
-  int nfft_brick = (nxhi_i-nxlo_i+1) * (nyhi_i-nylo_i+1) *
-    (nzhi_i-nzlo_i+1);
-  nfb = MAX(nf,nfft_brick);
-
-}
-
-/* ----------------------------------------------------------------------
-   check if all factors of n are in list of factors
-   return 1 if yes, 0 if no 
-------------------------------------------------------------------------- */
-
-int PPPMDisp::factorable(int n)
-{
-  int i;
-
-  while (n > 1) {
-    for (i = 0; i < nfactors; i++) {
-      if (n % factors[i] == 0) {
-	n /= factors[i];
-	break;
-      }
-    }
-    if (i == nfactors) return 0;
-  }
-
-  return 1;
-}
-
-/* ----------------------------------------------------------------------
-   pre-compute Green's function denominator expansion coeffs, Gamma(2n) 
-------------------------------------------------------------------------- */
-void PPPMDisp::adjust_gewald()
-{
-  
-  // Use Newton solver to find g_ewald
-
-  double dx;
-        
-  // Begin algorithm
-  
-  for (int i = 0; i < LARGE; i++) {
-    dx = f() / derivf(); 
-    g_ewald -= dx; //Update g_ewald
-    if (fabs(f()) < SMALL) return;
-  }
-   
-  // Failed to converge
-  
-  char str[128];
-  sprintf(str, "Could not compute g_ewald");
-  error->all(FLERR, str);
-
-}
-
-/* ----------------------------------------------------------------------
- Calculate f(x)
- ------------------------------------------------------------------------- */
-
-double PPPMDisp::f()
-{
-  double df_rspace, df_kspace;
-  double q2 = qsqsum * force->qqrd2e;
-  double xprd = domain->xprd;
-  double yprd = domain->yprd;
-  double zprd = domain->zprd;
-  double zprd_slab = zprd*slab_volfactor;
-  bigint natoms = atom->natoms;
-
-  df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) / 
-       sqrt(natoms*cutoff*xprd*yprd*zprd);
-   
-  double qopt = compute_qopt();
-  df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
-   
-  return df_rspace - df_kspace;
-}
-
-/* ----------------------------------------------------------------------
- Calculate numerical derivative f'(x) using forward difference
- [f(x + h) - f(x)] / h
- ------------------------------------------------------------------------- */
-            
-double PPPMDisp::derivf()
-{  
-  double h = 0.000001;  //Derivative step-size
-  double df,f1,f2,g_ewald_old;
-  
-  f1 = f();
-  g_ewald_old = g_ewald;
-  g_ewald += h;
-  f2 = f();
-  g_ewald = g_ewald_old;
-  df = (f2 - f1)/h;
-  
-  return df;
-} 
-
-/* ----------------------------------------------------------------------
-   Calculate the final estimator for the accuracy
-------------------------------------------------------------------------- */
-
-double PPPMDisp::final_accuracy()
-{
-  double df_rspace, df_kspace;
-  double q2 = qsqsum * force->qqrd2e;
-  double xprd = domain->xprd;
-  double yprd = domain->yprd;
-  double zprd = domain->zprd;
-  double zprd_slab = zprd*slab_volfactor;
-  bigint natoms = atom->natoms;
-  df_rspace = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / 
-             sqrt(natoms*cutoff*xprd*yprd*zprd);
-
-  double qopt = compute_qopt();
-
-  df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
-
-  double acc = sqrt(df_rspace*df_rspace + df_kspace*df_kspace);
-  return acc;
-}
-
-/* ----------------------------------------------------------------------
-   Calculate the final estimator for the Dispersion accuracy
-------------------------------------------------------------------------- */
-
-void PPPMDisp::final_accuracy_6(double& acc, double& acc_real, double& acc_kspace)
-{
-  double df_rspace, df_kspace;
-  double xprd = domain->xprd;
-  double yprd = domain->yprd;
-  double zprd = domain->zprd;
-  double zprd_slab = zprd*slab_volfactor;
-  bigint natoms = atom->natoms;
-  acc_real = lj_rspace_error();
-
-  double qopt = compute_qopt_6();
-
-  acc_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
-
-  acc = sqrt(acc_real*acc_real + acc_kspace*acc_kspace);
-  return;
-}
-
-/* ----------------------------------------------------------------------
-   Compute qopt for Coulomb interactions
-------------------------------------------------------------------------- */
-
-double PPPMDisp::compute_qopt()
-{
-  double qopt;
-  if (differentiation_flag == 1) {
-    qopt = compute_qopt_ad();
-  } else {
-    qopt = compute_qopt_ik();
-  }
-  double qopt_all;
-  MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
-  return qopt_all;
-}
-
-/* ----------------------------------------------------------------------
-   Compute qopt for Dispersion interactions
-------------------------------------------------------------------------- */
-
-double PPPMDisp::compute_qopt_6()
-{
-  double qopt;
-  if (differentiation_flag == 1) {
-    qopt = compute_qopt_6_ad();
-  } else {
-    qopt = compute_qopt_6_ik();
-  }
-  double qopt_all;
-  MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
-  return qopt_all;
-}
-
-/* ----------------------------------------------------------------------
-   Compute qopt for the ik differentiation scheme and Coulomb interaction
-------------------------------------------------------------------------- */
-
-double PPPMDisp::compute_qopt_ik()
-{
-  double qopt = 0.0;
-  int k,l,m;
-  double *prd;
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-
-  double unitkx = (2.0*MY_PI/xprd);
-  double unitky = (2.0*MY_PI/yprd);
-  double unitkz = (2.0*MY_PI/zprd_slab);
-
-  int nx,ny,nz,kper,lper,mper;
-  double sqk, u2;
-  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
-  double sum1,sum2, sum3,dot1,dot2;
-
-  int nbx = 2;
-  int nby = 2;
-  int nbz = 2;
-
-  for (m = nzlo_fft; m <= nzhi_fft; m++) {
-    mper = m - nz_pppm*(2*m/nz_pppm);
-
-    for (l = nylo_fft; l <= nyhi_fft; l++) {
-      lper = l - ny_pppm*(2*l/ny_pppm);
-
-      for (k = nxlo_fft; k <= nxhi_fft; k++) {
-        kper = k - nx_pppm*(2*k/nx_pppm);
-      
-        sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + 
-          pow(unitkz*mper,2.0);
-
-        if (sqk != 0.0) {
-          sum1 = 0.0;
-          sum2 = 0.0;
-          sum3 = 0.0;
-          for (nx = -nbx; nx <= nbx; nx++) {
-            qx = unitkx*(kper+nx_pppm*nx);
-            sx = exp(-0.25*pow(qx/g_ewald,2.0));
-            wx = 1.0;
-            argx = 0.5*qx*xprd/nx_pppm;
-            if (argx != 0.0) wx = pow(sin(argx)/argx,order);
-            for (ny = -nby; ny <= nby; ny++) {
-              qy = unitky*(lper+ny_pppm*ny);
-              sy = exp(-0.25*pow(qy/g_ewald,2.0));
-              wy = 1.0;
-              argy = 0.5*qy*yprd/ny_pppm;
-              if (argy != 0.0) wy = pow(sin(argy)/argy,order);
-              for (nz = -nbz; nz <= nbz; nz++) {
-                qz = unitkz*(mper+nz_pppm*nz);
-                sz = exp(-0.25*pow(qz/g_ewald,2.0));
-                wz = 1.0;
-                argz = 0.5*qz*zprd_slab/nz_pppm;
-                if (argz != 0.0) wz = pow(sin(argz)/argz,order);
-
-                dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
-                dot2 = qx*qx+qy*qy+qz*qz;
-                u2 =  pow(wx*wy*wz,2.0);
-                sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI;
-                sum2 += u2*sx*sy*sz*4.0*MY_PI/dot2*dot1;
-		sum3 += u2;
-              }
-            }
-          }
-	  sum2 *= sum2;
-	  sum3 *= sum3*sqk;
-          qopt += sum1 -sum2/sum3;
-        }
-      }
-    }
-  }
-  return qopt;
-}
-
-/* ----------------------------------------------------------------------
-   Compute qopt for the ad differentiation scheme and Coulomb interaction
-------------------------------------------------------------------------- */
-
-double PPPMDisp::compute_qopt_ad()
-{
-  double qopt = 0.0;
-  int k,l,m;
-  double *prd;
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-
-
-  double unitkx = (2.0*MY_PI/xprd);
-  double unitky = (2.0*MY_PI/yprd);
-  double unitkz = (2.0*MY_PI/zprd_slab);
-
-  int nx,ny,nz,kper,lper,mper;
-  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
-  double u2, sqk;
-  double sum1,sum2,sum3,sum4,dot2;
-  double numerator;
-
-  int nbx = 2;
-  int nby = 2;
-  int nbz = 2;
-  double form = 1.0;
-
-  for (m = nzlo_fft; m <= nzhi_fft; m++) {
-    mper = m - nz_pppm*(2*m/nz_pppm);
-
-    for (l = nylo_fft; l <= nyhi_fft; l++) {
-      lper = l - ny_pppm*(2*l/ny_pppm);
-
-      for (k = nxlo_fft; k <= nxhi_fft; k++) {
-        kper = k - nx_pppm*(2*k/nx_pppm);
-      
-        sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + 
-          pow(unitkz*mper,2.0);
-
-        if (sqk != 0.0) {
-          numerator = form*12.5663706;
-    
-          sum1 = 0.0;
-          sum2 = 0.0;
-          sum3 = 0.0;
-          sum4 = 0.0;
-          for (nx = -nbx; nx <= nbx; nx++) {
-            qx = unitkx*(kper+nx_pppm*nx);
-            sx = exp(-0.25*pow(qx/g_ewald,2.0));
-            wx = 1.0;
-            argx = 0.5*qx*xprd/nx_pppm;
-            if (argx != 0.0) wx = pow(sin(argx)/argx,order);
-            for (ny = -nby; ny <= nby; ny++) {
-              qy = unitky*(lper+ny_pppm*ny);
-              sy = exp(-0.25*pow(qy/g_ewald,2.0));
-              wy = 1.0;
-              argy = 0.5*qy*yprd/ny_pppm;
-              if (argy != 0.0) wy = pow(sin(argy)/argy,order);
-              for (nz = -nbz; nz <= nbz; nz++) {
-                qz = unitkz*(mper+nz_pppm*nz);
-                sz = exp(-0.25*pow(qz/g_ewald,2.0));
-                wz = 1.0;
-                argz = 0.5*qz*zprd_slab/nz_pppm;
-                if (argz != 0.0) wz = pow(sin(argz)/argz,order);
-
-                dot2 = qx*qx+qy*qy+qz*qz;
-                u2 =  pow(wx*wy*wz,2.0);
-                sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI;
-                sum2 += sx*sy*sz * u2*4.0*MY_PI;
-                sum3 += u2;
-                sum4 += dot2*u2;
-              }
-            }
-          }
-          sum2 *= sum2;
-          qopt += sum1 - sum2/(sum3*sum4);
-        }
-      }
-    }
-  }
-  return qopt;
-}
-
-/* ----------------------------------------------------------------------
-   Compute qopt for the ik differentiation scheme and Dispersion interaction
-------------------------------------------------------------------------- */
-
-double PPPMDisp::compute_qopt_6_ik()
-{
-  double qopt = 0.0;
-  int k,l,m,n;
-  double *prd;
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-
-  double unitkx = (2.0*MY_PI/xprd);
-  double unitky = (2.0*MY_PI/yprd);
-  double unitkz = (2.0*MY_PI/zprd_slab);
-
-  int nx,ny,nz,kper,lper,mper;
-  double sqk, u2;
-  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
-  double sum1,sum2, sum3;
-  double dot1,dot2, rtdot2, term;
-  double inv2ew = 2*g_ewald_6;
-  inv2ew = 1.0/inv2ew;
-  double rtpi = sqrt(MY_PI);
-
-  int nbx = 2;
-  int nby = 2;
-  int nbz = 2;
-
-  n = 0;
-  for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
-    mper = m - nz_pppm_6*(2*m/nz_pppm_6);
-
-    for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
-      lper = l - ny_pppm_6*(2*l/ny_pppm_6);
-
-      for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
-        kper = k - nx_pppm_6*(2*k/nx_pppm_6);
-      
-        sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + 
-          pow(unitkz*mper,2.0);
-
-        if (sqk != 0.0) {
-          sum1 = 0.0;
-          sum2 = 0.0;
-          sum3 = 0.0;
-          for (nx = -nbx; nx <= nbx; nx++) {
-            qx = unitkx*(kper+nx_pppm_6*nx);
-            sx = exp(-qx*qx*inv2ew*inv2ew);
-            wx = 1.0;
-            argx = 0.5*qx*xprd/nx_pppm_6;
-            if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
-            for (ny = -nby; ny <= nby; ny++) {
-              qy = unitky*(lper+ny_pppm_6*ny);
-              sy = exp(-qy*qy*inv2ew*inv2ew);
-              wy = 1.0;
-              argy = 0.5*qy*yprd/ny_pppm_6;
-              if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
-              for (nz = -nbz; nz <= nbz; nz++) {
-                qz = unitkz*(mper+nz_pppm_6*nz);
-                sz = exp(-qz*qz*inv2ew*inv2ew);
-                wz = 1.0;
-                argz = 0.5*qz*zprd_slab/nz_pppm_6;
-                if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
-
-                dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
-                dot2 = qx*qx+qy*qy+qz*qz;
-                rtdot2 = sqrt(dot2);
-                term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz +
-		       2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew);
-                term *= g_ewald_6*g_ewald_6*g_ewald_6;
-                u2 =  pow(wx*wy*wz,2.0);
-                sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2;
-                sum2 += -u2*term*MY_PI*rtpi/3.0*dot1;
-		sum3 += u2;
-              }
-            }
-          }
-	  sum2 *= sum2;
-	  sum3 *= sum3*sqk;
-          qopt += sum1 -sum2/sum3;
-        }
-      }
-    }
-  }
-  return qopt;
-}
-
-/* ----------------------------------------------------------------------
-   Compute qopt for the ad differentiation scheme and Dispersion interaction
-------------------------------------------------------------------------- */
-
-double PPPMDisp::compute_qopt_6_ad()
-{
-  double qopt = 0.0;
-  int k,l,m;
-  double *prd;
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-
-  double unitkx = (2.0*MY_PI/xprd);
-  double unitky = (2.0*MY_PI/yprd);
-  double unitkz = (2.0*MY_PI/zprd_slab);
-
-  int nx,ny,nz,kper,lper,mper;
-  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
-  double u2, sqk;
-  double sum1,sum2,sum3,sum4;
-  double dot2, rtdot2, term;
-  double inv2ew = 2*g_ewald_6;
-  inv2ew = 1/inv2ew;
-  double rtpi = sqrt(MY_PI);
-
-  int nbx = 2;
-  int nby = 2;
-  int nbz = 2;
-
-  for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
-    mper = m - nz_pppm_6*(2*m/nz_pppm_6);
-
-    for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
-      lper = l - ny_pppm_6*(2*l/ny_pppm_6);
-
-      for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
-        kper = k - nx_pppm_6*(2*k/nx_pppm_6);
-      
-        sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + 
-          pow(unitkz*mper,2.0);
-
-        if (sqk != 0.0) {
-    
-          sum1 = 0.0;
-          sum2 = 0.0;
-          sum3 = 0.0;
-          sum4 = 0.0;
-          for (nx = -nbx; nx <= nbx; nx++) {
-            qx = unitkx*(kper+nx_pppm_6*nx);
-            sx = exp(-qx*qx*inv2ew*inv2ew);
-            wx = 1.0;
-            argx = 0.5*qx*xprd/nx_pppm_6;
-            if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
-            for (ny = -nby; ny <= nby; ny++) {
-              qy = unitky*(lper+ny_pppm_6*ny);
-              sy = exp(-qy*qy*inv2ew*inv2ew);
-              wy = 1.0;
-              argy = 0.5*qy*yprd/ny_pppm_6;
-              if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
-              for (nz = -nbz; nz <= nbz; nz++) {
-                qz = unitkz*(mper+nz_pppm_6*nz);
-                sz = exp(-qz*qz*inv2ew*inv2ew);
-                wz = 1.0;
-                argz = 0.5*qz*zprd_slab/nz_pppm_6;
-                if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
-
-                dot2 = qx*qx+qy*qy+qz*qz;
-                rtdot2 = sqrt(dot2);
-                term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz +
-		       2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew);
-                term *= g_ewald_6*g_ewald_6*g_ewald_6;
-                u2 =  pow(wx*wy*wz,2.0);
-                sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2;
-                sum2 += -term*MY_PI*rtpi/3.0 * u2 * dot2;
-                sum3 += u2;
-                sum4 += dot2*u2;
-              }
-            }
-          }
-          sum2 *= sum2;
-          qopt += sum1 - sum2/(sum3*sum4);
-        }
-      }
-    }
-  }
-  return qopt;
-}
-
-/* ----------------------------------------------------------------------
-   set size of FFT grid  and g_ewald_6
-   for Dispersion interactions
-------------------------------------------------------------------------- */
-
-void PPPMDisp::set_grid_6()
-{
-  // Calculate csum
-  if (!csumflag) calc_csum();
-  if (!gewaldflag_6) set_init_g6();
-  if (!gridflag_6) set_n_pppm_6();
-  while (!factorable(nx_pppm_6)) nx_pppm_6++;
-  while (!factorable(ny_pppm_6)) ny_pppm_6++;
-  while (!factorable(nz_pppm_6)) nz_pppm_6++;
-  
-}
-
-/* ----------------------------------------------------------------------
-   Calculate the sum of the squared dispersion coefficients and other 
-   related quantities required for the calculations
-------------------------------------------------------------------------- */
-
-void PPPMDisp::calc_csum()
-{
-  csumij = 0.0;
-  csum = 0.0;
-
-  int ntypes = atom->ntypes;   
-  int i,j,k;
-
-  delete [] cii;
-  cii = new double[ntypes +1];
-  for (i = 0; i<=ntypes; i++) cii[i] = 0.0;
-  delete [] csumi; 
-  csumi = new double[ntypes +1];
-  for (i = 0; i<=ntypes; i++) csumi[i] = 0.0; 
-  int *neach = new int[ntypes+1];
-  for (i = 0; i<=ntypes; i++) neach[i] = 0; 
-
-  //the following variables are needed to distinguish between arithmetic
-  //  and geometric mixing
-
-  double mix1;    // scales 20/16 to 4
-  int mix2;       // shifts the value to the sigma^3 value
-  int mix3;       // shifts the value to the right atom type
-  if (function[1]) {
-    for (i = 1; i <= ntypes; i++)
-      cii[i] = B[i]*B[i];
-    int tmp;
-    for (i = 0; i < atom->nlocal; i++) {
-      tmp = atom->type[i];
-      neach[tmp]++;
-      csum += B[tmp]*B[tmp];
-    }
-  }
-  if (function[2]) {
-    for (i = 1; i <= ntypes; i++)
-      cii[i] = 64.0/20.0*B[7*i+3]*B[7*i+3];
-    int tmp;
-    for (i = 0; i < atom->nlocal; i++) {
-      tmp = atom->type[i];
-      neach[tmp]++;
-      csum += 64.0/20.0*B[7*tmp+3]*B[7*tmp+3];
-    }
-  }
-  if (function[3]) {
-    for (i = 1; i <= ntypes; i++)
-      for (j = 0; j < nsplit; j++)
-        cii[i] += B[j]*B[nsplit*i + j]*B[nsplit*i + j];
-    int tmp;
-    for (i = 0; i < atom->nlocal; i++) {
-      tmp = atom->type[i];
-      neach[tmp]++;
-      for (j = 0; j < nsplit; j++)
-        csum += B[j]*B[nsplit*tmp + j]*B[nsplit*tmp + j];
-    }
-  }
-
-
-  double tmp2;
-  MPI_Allreduce(&csum,&tmp2,1,MPI_DOUBLE,MPI_SUM,world);
-  csum = tmp2;
-  csumflag = 1;
-
-  int *neach_all = new int[ntypes+1];
-  MPI_Allreduce(neach,neach_all,ntypes+1,MPI_INT,MPI_SUM,world);
-
-  // copmute csumij and csumi
-  double d1, d2;
-  if (function[1]){
-    for (i=1; i<=ntypes; i++) {
-      for (j=1; j<=ntypes; j++) {
-        csumi[i] += neach_all[j]*B[i]*B[j];
-        d1 = neach_all[i]*B[i];
-        d2 = neach_all[j]*B[j];
-        csumij += d1*d2;
-        //csumij += neach_all[i]*neach_all[j]*B[i]*B[j]; 
-      }
-    }
-  }
-  if (function[2]) {
-    for (i=1; i<=ntypes; i++) {
-      for (j=1; j<=ntypes; j++) {
-        for (k=0; k<=6; k++) {
-          csumi[i] += neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1];
-          d1 = neach_all[i]*B[7*i + k];
-          d2 = neach_all[j]*B[7*(j+1)-k-1];
-          csumij += d1*d2;
-          //csumij += neach_all[i]*neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1];
-        }
-      }
-    }
-  }
-  if (function[3]) {
-    for (i=1; i<=ntypes; i++) {
-      for (j=1; j<=ntypes; j++) {
-        for (k=0; k<nsplit; k++) {
-	  csumi[i] += neach_all[j]*B[k]*B[nsplit*i+k]*B[nsplit*j+k];
-	  d1 = neach_all[i]*B[nsplit*i+k];
-	  d2 = neach_all[j]*B[nsplit*j+k];
-          csumij += B[k]*d1*d2;
-	}
-      }
-    }
-  }    
-
-  delete [] neach;
-  delete [] neach_all;
-}
-
-/* ----------------------------------------------------------------------
-   adjust g_ewald_6 to the new grid size
-------------------------------------------------------------------------- */
-
-void PPPMDisp::adjust_gewald_6()
-{
-  // Use Newton solver to find g_ewald_6
-  double dx;
-
-  // Start loop
-
-  for (int i = 0; i <  LARGE; i++) {
-    dx = f_6() / derivf_6();
-    g_ewald_6 -= dx; //update g_ewald_6
-    if (fabs(f_6()) < SMALL) return;
-  }
-
-  // Failed to converge
-
-  char str[128];
-  sprintf(str, "Could not adjust g_ewald_6");
-  error->all(FLERR, str);
-
-}
-
-/* ----------------------------------------------------------------------
- Calculate f(x) for Dispersion interaction
- ------------------------------------------------------------------------- */
-
-double PPPMDisp::f_6()
-{
-  double df_rspace, df_kspace;
-  double *prd;
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-  bigint natoms = atom->natoms;
-
-  df_rspace = lj_rspace_error();
-   
-  double qopt = compute_qopt_6();
-  df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
-   
-  return df_rspace - df_kspace;
-}
-
-/* ----------------------------------------------------------------------
- Calculate numerical derivative f'(x) using forward difference
- [f(x + h) - f(x)] / h
- ------------------------------------------------------------------------- */
-            
-double PPPMDisp::derivf_6()
-{  
-  double h = 0.000001;  //Derivative step-size
-  double df,f1,f2,g_ewald_old;
-  
-  f1 = f_6();
-  g_ewald_old = g_ewald_6;
-  g_ewald_6 += h;
-  f2 = f_6();
-  g_ewald_6 = g_ewald_old;
-  df = (f2 - f1)/h;
-  
-  return df;
-} 
-
-
-/* ----------------------------------------------------------------------
-   calculate an initial value for g_ewald_6
-   ---------------------------------------------------------------------- */
-
-void PPPMDisp::set_init_g6()
-{
-  // use xprd,yprd,zprd even if triclinic so grid size is the same
-  // adjust z dimension for 2d slab PPPM
-  // 3d PPPM just uses zprd since slab_volfactor = 1.0
-
-  // make initial g_ewald estimate
-  // based on desired error and real space cutoff
- 
-  // compute initial value for df_real with g_ewald_6 = 1/cutoff_lj
-  // if df_real > 0, repeat divide g_ewald_6 by 2 until df_real < 0
-  // else, repeat multiply g_ewald_6 by 2 until df_real > 0
-  // perform bisection for the last two values of
-  double df_real;
-  double g_ewald_old; 
-  double gmin, gmax;
-
-  // check if there is a user defined accuracy
-  double acc_rspace = accuracy;
-  if (accuracy_real_6 > 0) acc_rspace = accuracy_real_6;
-
-  g_ewald_6 = 1.0/cutoff_lj;
-  df_real = lj_rspace_error() - acc_rspace;
-  int counter = 0;
-  if (df_real > 0) {
-    while (df_real > 0 && counter < LARGE) {
-      counter++;
-      g_ewald_old = g_ewald_6;
-      g_ewald_6 *= 2;
-      df_real = lj_rspace_error() - acc_rspace;
-    }
-  }
-
-  if (df_real < 0) {
-    while (df_real < 0 && counter < LARGE) {
-      counter++;
-      g_ewald_old = g_ewald_6;
-      g_ewald_6 *= 0.5;
-      df_real = lj_rspace_error() - acc_rspace;
-    }
-  }
-
-  if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp");
-
-  gmin = MIN(g_ewald_6, g_ewald_old);
-  gmax = MAX(g_ewald_6, g_ewald_old);
-  g_ewald_6 = gmin + 0.5*(gmax-gmin);
-  counter = 0;
-  while (gmax-gmin > SMALL && counter < LARGE) {
-    counter++;
-    df_real = lj_rspace_error() -acc_rspace;
-    if (df_real < 0) gmax = g_ewald_6;
-    else gmin = g_ewald_6;
-    g_ewald_6 = gmin + 0.5*(gmax-gmin);
-  }
-  if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp");
-
-}
-
-/* ----------------------------------------------------------------------
-   calculate nx_pppm, ny_pppm, nz_pppm for dispersion interaction
-   ---------------------------------------------------------------------- */
-
-void PPPMDisp::set_n_pppm_6()
-{
-  bigint natoms = atom->natoms;
-
-  double *prd;
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-  double h, h_x,h_y,h_z;
-
-  double acc_kspace = accuracy;
-  if (accuracy_kspace_6 > 0.0) acc_kspace = accuracy_kspace_6;
-
-  // initial value for the grid spacing
-  h = h_x = h_y = h_z = 4.0/g_ewald_6;
-  // decrease grid spacing untill required precision is obtained
-  int count = 0;
-  while(1) {
-  
-    // set grid dimension
-    nx_pppm_6 = static_cast<int> (xprd/h_x);
-    ny_pppm_6 = static_cast<int> (yprd/h_y);
-    nz_pppm_6 = static_cast<int> (zprd_slab/h_z);
-
-    if (nx_pppm_6 <= 1) nx_pppm_6 = 2;
-    if (ny_pppm_6 <= 1) ny_pppm_6 = 2;
-    if (nz_pppm_6 <= 1) nz_pppm_6 = 2;
-
-    //set local grid dimension
-    int npey_fft,npez_fft;
-    if (nz_pppm_6 >= nprocs) {
-      npey_fft = 1;
-      npez_fft = nprocs;
-    } else procs2grid2d(nprocs,ny_pppm_6,nz_pppm_6,&npey_fft,&npez_fft);
-
-    int me_y = me % npey_fft;
-    int me_z = me / npey_fft;
-
-    nxlo_fft_6 = 0;
-    nxhi_fft_6 = nx_pppm_6 - 1;
-    nylo_fft_6 = me_y*ny_pppm_6/npey_fft;
-    nyhi_fft_6 = (me_y+1)*ny_pppm_6/npey_fft - 1;
-    nzlo_fft_6 = me_z*nz_pppm_6/npez_fft;
-    nzhi_fft_6 = (me_z+1)*nz_pppm_6/npez_fft - 1;
-
-    double qopt = compute_qopt_6();
- 
-    double df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
-
-    count++;
-
-    // break loop if the accuracy has been reached or too many loops have been performed
-    if (df_kspace <= acc_kspace) break;
-    if (count > 500) error->all(FLERR, "Could not compute grid size for Dispersion");
-    h *= 0.95;
-    h_x = h_y = h_z = h;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   calculate the real space error for dispersion interactions
-   ---------------------------------------------------------------------- */
-
-double PPPMDisp::lj_rspace_error()
-{
-  bigint natoms = atom->natoms;
-  double xprd = domain->xprd;
-  double yprd = domain->yprd;
-  double zprd = domain->zprd;
-  double zprd_slab = zprd*slab_volfactor;
-
-  double deltaf;
-  double rgs = (cutoff_lj*g_ewald_6);
-  rgs *= rgs;
-  double rgs_inv = 1.0/rgs;
-  deltaf = csum/sqrt(natoms*xprd*yprd*zprd_slab*cutoff_lj)*sqrt(MY_PI)*pow(g_ewald_6, 5)*
-    exp(-rgs)*(1+rgs_inv*(3+rgs_inv*(6+rgs_inv*6)));
-  return deltaf;
-}
-
-
-/* ----------------------------------------------------------------------
-   Compyute the modified (hockney-eastwood) coulomb green function
-   ---------------------------------------------------------------------- */ 
-
-void PPPMDisp::compute_gf()
-{
-  int k,l,m,n;
-  double *prd;
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-  volume = xprd * yprd * zprd_slab;
-
-  double unitkx = (2.0*MY_PI/xprd);
-  double unitky = (2.0*MY_PI/yprd);
-  double unitkz = (2.0*MY_PI/zprd_slab);
-
-  int kper,lper,mper;
-  double snx,sny,snz,snx2,sny2,snz2;
-  double sqk;
-  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
-  double numerator,denominator;
-
-
-  n = 0;
-  for (m = nzlo_fft; m <= nzhi_fft; m++) {
-    mper = m - nz_pppm*(2*m/nz_pppm);
-    qz = unitkz*mper;
-    snz = sin(0.5*qz*zprd_slab/nz_pppm);
-    snz2 = snz*snz;
-    sz = exp(-0.25*pow(qz/g_ewald,2.0));
-    wz = 1.0;
-    argz = 0.5*qz*zprd_slab/nz_pppm;
-    if (argz != 0.0) wz = pow(sin(argz)/argz,order);
-    wz *= wz;
-
-    for (l = nylo_fft; l <= nyhi_fft; l++) {
-      lper = l - ny_pppm*(2*l/ny_pppm);
-      qy = unitky*lper;
-      sny = sin(0.5*qy*yprd/ny_pppm);
-      sny2 = sny*sny;
-      sy = exp(-0.25*pow(qy/g_ewald,2.0));
-      wy = 1.0;
-      argy = 0.5*qy*yprd/ny_pppm;
-      if (argy != 0.0) wy = pow(sin(argy)/argy,order);
-      wy *= wy;
-
-      for (k = nxlo_fft; k <= nxhi_fft; k++) {
-        kper = k - nx_pppm*(2*k/nx_pppm);
-        qx = unitkx*kper;
-        snx = sin(0.5*qx*xprd/nx_pppm);
-        snx2 = snx*snx;
-        sx = exp(-0.25*pow(qx/g_ewald,2.0));
-        wx = 1.0;
-        argx = 0.5*qx*xprd/nx_pppm;
-        if (argx != 0.0) wx = pow(sin(argx)/argx,order);
-        wx *= wx;
-
-        sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
-
-        if (sqk != 0.0) {
-          numerator = 4.0*MY_PI/sqk;
-          denominator = gf_denom(snx2,sny2,snz2, gf_b, order);  
-          greensfn[n++] = numerator*sx*sy*sz*wx*wy*wz/denominator;
-        } else greensfn[n++] = 0.0;
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   compute self force coefficients for ad-differentiation scheme
-   and Coulomb interaction 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::compute_sf_precoeff(int nxp, int nyp, int nzp, int ord, 
-                                    int nxlo_ft, int nylo_ft, int nzlo_ft,
-                                    int nxhi_ft, int nyhi_ft, int nzhi_ft,
-                                    double *sf_pre1, double *sf_pre2, double *sf_pre3,
-                                    double *sf_pre4, double *sf_pre5, double *sf_pre6)
-{
-
-  int i,k,l,m,n;
-  double *prd;
-
-  // volume-dependent factors
-  // adjust z dimension for 2d slab PPPM
-  // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-
-  double unitkx = (2.0*MY_PI/xprd);
-  double unitky = (2.0*MY_PI/yprd);
-  double unitkz = (2.0*MY_PI/zprd_slab);
-
-  int nx,ny,nz,kper,lper,mper;
-  double argx,argy,argz;
-  double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5];
-  double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2;
-  double u0,u1,u2,u3,u4,u5,u6;
-  double sum1,sum2,sum3,sum4,sum5,sum6;
-
-  int nb = 2;
-
-  n = 0;
-  for (m = nzlo_ft; m <= nzhi_ft; m++) {
-    mper = m - nzp*(2*m/nzp);
-
-    for (l = nylo_ft; l <= nyhi_ft; l++) {
-      lper = l - nyp*(2*l/nyp);
-
-      for (k = nxlo_ft; k <= nxhi_ft; k++) {
-        kper = k - nxp*(2*k/nxp);
-      
-        sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
-        for (i = -nb; i <= nb; i++) {
-
-          qx0 = unitkx*(kper+nxp*i);
-          qx1 = unitkx*(kper+nxp*(i+1));
-          qx2 = unitkx*(kper+nxp*(i+2));
-          wx0[i+2] = 1.0;
-          wx1[i+2] = 1.0;
-          wx2[i+2] = 1.0;
-          argx = 0.5*qx0*xprd/nxp;
-          if (argx != 0.0) wx0[i+2] = pow(sin(argx)/argx,ord);
-          argx = 0.5*qx1*xprd/nxp;
-          if (argx != 0.0) wx1[i+2] = pow(sin(argx)/argx,ord);
-          argx = 0.5*qx2*xprd/nxp;
-          if (argx != 0.0) wx2[i+2] = pow(sin(argx)/argx,ord);
-
-          qy0 = unitky*(lper+nyp*i);
-          qy1 = unitky*(lper+nyp*(i+1));
-          qy2 = unitky*(lper+nyp*(i+2));
-          wy0[i+2] = 1.0;
-          wy1[i+2] = 1.0;
-          wy2[i+2] = 1.0;
-          argy = 0.5*qy0*yprd/nyp;
-          if (argy != 0.0) wy0[i+2] = pow(sin(argy)/argy,ord);
-          argy = 0.5*qy1*yprd/nyp;
-          if (argy != 0.0) wy1[i+2] = pow(sin(argy)/argy,ord);
-          argy = 0.5*qy2*yprd/nyp;
-          if (argy != 0.0) wy2[i+2] = pow(sin(argy)/argy,ord);
-   
-          qz0 = unitkz*(mper+nzp*i);
-          qz1 = unitkz*(mper+nzp*(i+1));
-          qz2 = unitkz*(mper+nzp*(i+2));
-          wz0[i+2] = 1.0;
-          wz1[i+2] = 1.0;
-          wz2[i+2] = 1.0;
-          argz = 0.5*qz0*zprd_slab/nzp;
-          if (argz != 0.0) wz0[i+2] = pow(sin(argz)/argz,ord);
-          argz = 0.5*qz1*zprd_slab/nzp;
-          if (argz != 0.0) wz1[i+2] = pow(sin(argz)/argz,ord);
-           argz = 0.5*qz2*zprd_slab/nzp;
-          if (argz != 0.0) wz2[i+2] = pow(sin(argz)/argz,ord);
-        }
-    
-        for (nx = 0; nx <= 4; nx++) {
-          for (ny = 0; ny <= 4; ny++) {
-            for (nz = 0; nz <= 4; nz++) {
-              u0 = wx0[nx]*wy0[ny]*wz0[nz];
-              u1 = wx1[nx]*wy0[ny]*wz0[nz];
-              u2 = wx2[nx]*wy0[ny]*wz0[nz];
-              u3 = wx0[nx]*wy1[ny]*wz0[nz];
-              u4 = wx0[nx]*wy2[ny]*wz0[nz];
-              u5 = wx0[nx]*wy0[ny]*wz1[nz];
-              u6 = wx0[nx]*wy0[ny]*wz2[nz];
-
-              sum1 += u0*u1;
-              sum2 += u0*u2;
-              sum3 += u0*u3;
-              sum4 += u0*u4;
-              sum5 += u0*u5;
-              sum6 += u0*u6;
-            }
-          }
-        }
-        
-        // store values
-
-        sf_pre1[n] = sum1;
-        sf_pre2[n] = sum2;
-        sf_pre3[n] = sum3;
-        sf_pre4[n] = sum4;
-        sf_pre5[n] = sum5;
-        sf_pre6[n++] = sum6;
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   Compute the modified (hockney-eastwood) dispersion green function
-   ---------------------------------------------------------------------- */
-
-void PPPMDisp::compute_gf_6()
-{
-  double *prd;
-  int k,l,m,n;
-
-  // volume-dependent factors
-  // adjust z dimension for 2d slab PPPM
-  // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-
-  double unitkx = (2.0*MY_PI/xprd);
-  double unitky = (2.0*MY_PI/yprd);
-  double unitkz = (2.0*MY_PI/zprd_slab);
-
-  int kper,lper,mper;
-  double sqk;
-  double snx,sny,snz,snx2,sny2,snz2;
-  double argx,argy,argz,wx,wy,wz,sx,sy,sz;
-  double qx,qy,qz;
-  double rtsqk, term;
-  double numerator,denominator;
-  double inv2ew = 2*g_ewald_6;
-  inv2ew = 1/inv2ew;
-  double rtpi = sqrt(MY_PI);
-
-  numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0);
-
-  n = 0;
-  for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
-    mper = m - nz_pppm_6*(2*m/nz_pppm_6);
-    qz = unitkz*mper;
-    snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6);
-    snz2 = snz*snz;
-    sz = exp(-qz*qz*inv2ew*inv2ew);
-    wz = 1.0;
-    argz = 0.5*qz*zprd_slab/nz_pppm_6;
-    if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
-    wz *= wz;
-              
-    for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
-      lper = l - ny_pppm_6*(2*l/ny_pppm_6);
-      qy = unitky*lper;
-      sny = sin(0.5*unitky*lper*yprd/ny_pppm_6);
-      sny2 = sny*sny;
-      sy = exp(-qy*qy*inv2ew*inv2ew);
-      wy = 1.0;
-      argy = 0.5*qy*yprd/ny_pppm_6;
-      if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
-      wy *= wy;
-
-      for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
-	kper = k - nx_pppm_6*(2*k/nx_pppm_6);
-        qx = unitkx*kper;
-	snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6);
-	snx2 = snx*snx;
-        sx = exp(-qx*qx*inv2ew*inv2ew);
-	wx = 1.0;
-	argx = 0.5*qx*xprd/nx_pppm_6;
-	if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
-        wx *= wx;
-      
-	sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
-
-        if (sqk != 0.0) {
-	  denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6); 
-	  rtsqk = sqrt(sqk);
-          term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz +
-                  2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew);
-	  greensfn_6[n++] = numerator*term*wx*wy*wz/denominator;
-        } else greensfn_6[n++] = 0.0;
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   compute self force coefficients for ad-differentiation scheme
-   and Coulomb interaction 
-------------------------------------------------------------------------- */
-void PPPMDisp::compute_sf_coeff()
-{
-  int i,k,l,m,n;
-  double *prd;
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-  volume = xprd * yprd * zprd_slab;
-
-  for (i = 0; i <= 5; i++) sf_coeff[i] = 0.0;
-
-  n = 0;
-  for (m = nzlo_fft; m <= nzhi_fft; m++) {
-    for (l = nylo_fft; l <= nyhi_fft; l++) {
-      for (k = nxlo_fft; k <= nxhi_fft; k++) {
-        sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
-        sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
-        sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
-        sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
-        sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
-        sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
-        ++n;
-      }
-    }
-  }
-
-  // Compute the coefficients for the self-force correction
-
-  double prex, prey, prez;
-  prex = prey = prez = MY_PI/volume;
-  prex *= nx_pppm/xprd;
-  prey *= ny_pppm/yprd;
-  prez *= nz_pppm/zprd_slab;
-  sf_coeff[0] *= prex;
-  sf_coeff[1] *= prex*2;
-  sf_coeff[2] *= prey;
-  sf_coeff[3] *= prey*2;
-  sf_coeff[4] *= prez;
-  sf_coeff[5] *= prez*2;
-
-  // communicate values with other procs
-
-  double tmp[6];
-  MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
-  for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
-}
-
-/* ----------------------------------------------------------------------
-   compute self force coefficients for ad-differentiation scheme
-   and Dispersion interaction 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::compute_sf_coeff_6()
-{
-  int i,k,l,m,n;
-  double *prd;
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-  volume = xprd * yprd * zprd_slab;
-
-  for (i = 0; i <= 5; i++) sf_coeff_6[i] = 0.0;
-
-  n = 0;
-  for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
-    for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
-      for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
-        sf_coeff_6[0] += sf_precoeff1_6[n]*greensfn_6[n];
-        sf_coeff_6[1] += sf_precoeff2_6[n]*greensfn_6[n];
-        sf_coeff_6[2] += sf_precoeff3_6[n]*greensfn_6[n];
-        sf_coeff_6[3] += sf_precoeff4_6[n]*greensfn_6[n];
-        sf_coeff_6[4] += sf_precoeff5_6[n]*greensfn_6[n];
-        sf_coeff_6[5] += sf_precoeff6_6[n]*greensfn_6[n];
-        ++n;
-      }
-    }
-  }
-
-  
-  // perform multiplication with prefactors
-  
-  double prex, prey, prez;
-  prex = prey = prez = MY_PI/volume;
-  prex *= nx_pppm_6/xprd;
-  prey *= ny_pppm_6/yprd;
-  prez *= nz_pppm_6/zprd_slab;
-  sf_coeff_6[0] *= prex;
-  sf_coeff_6[1] *= prex*2;
-  sf_coeff_6[2] *= prey;
-  sf_coeff_6[3] *= prey*2;
-  sf_coeff_6[4] *= prez;
-  sf_coeff_6[5] *= prez*2;
-  
-  // communicate values with other procs
-  
-  double tmp[6];
-  MPI_Allreduce(sf_coeff_6,tmp,6,MPI_DOUBLE,MPI_SUM,world);
-  for (n = 0; n < 6; n++) sf_coeff_6[n] = tmp[n];
-
-}
-
-/* ----------------------------------------------------------------------
-   denominator for Hockney-Eastwood Green's function
-     of x,y,z = sin(kx*deltax/2), etc
-
-            inf                 n-1
-   S(n,k) = Sum  W(k+pi*j)**2 = Sum b(l)*(z*z)**l
-           j=-inf               l=0
-
-          = -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x)  at z = sin(x)
-   gf_b = denominator expansion coeffs 
-------------------------------------------------------------------------- */
-
-double PPPMDisp::gf_denom(double x, double y, double z, double *g_b, int ord)
-{
-  double sx,sy,sz;
-  sz = sy = sx = 0.0;
-  for (int l = ord-1; l >= 0; l--) {
-    sx = g_b[l] + sx*x;
-    sy = g_b[l] + sy*y;
-    sz = g_b[l] + sz*z;
-  }
-  double s = sx*sy*sz;
-  return s*s;
-}
-
-/* ----------------------------------------------------------------------
-   pre-compute Green's function denominator expansion coeffs, Gamma(2n) 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::compute_gf_denom(double* gf, int ord)
-{
-  int k,l,m;
-  
-  for (l = 1; l < ord; l++) gf[l] = 0.0;
-  gf[0] = 1.0;
-  
-  for (m = 1; m < ord; m++) {
-    for (l = m; l > 0; l--) 
-      gf[l] = 4.0 * (gf[l]*(l-m)*(l-m-0.5)-gf[l-1]*(l-m-1)*(l-m-1));
-    gf[0] = 4.0 * (gf[0]*(l-m)*(l-m-0.5));
-  }
-
-  bigint ifact = 1;
-  for (k = 1; k < 2*ord; k++) ifact *= k;
-  double gaminv = 1.0/ifact;
-  for (l = 0; l < ord; l++) gf[l] *= gaminv;
-}
-
-/* ----------------------------------------------------------------------
-   ghost-swap to accumulate full density in brick decomposition 
-   remap density from 3d brick decomposition to FFTdecomposition
-   for coulomb interaction or dispersion interaction with geometric
-   mixing
-------------------------------------------------------------------------- */
-
-void PPPMDisp::brick2fft(int nxlo_i, int nylo_i, int nzlo_i,
-                         int nxhi_i, int nyhi_i, int nzhi_i,
-                         FFT_SCALAR*** dbrick, FFT_SCALAR* dfft, FFT_SCALAR* work,
-                         LAMMPS_NS::Remap* rmp)
-{
-  int n,ix,iy,iz;
-
-  // copy grabs inner portion of density from 3d brick
-  // remap could be done as pre-stage of FFT,
-  //   but this works optimally on only double values, not complex values
-
-  n = 0;
-  for (iz = nzlo_i; iz <= nzhi_i; iz++)
-    for (iy = nylo_i; iy <= nyhi_i; iy++)
-      for (ix = nxlo_i; ix <= nxhi_i; ix++)
-	dfft[n++] = dbrick[iz][iy][ix];
-
-  rmp->perform(dfft,dfft,work);
-}
-
-
-/* ----------------------------------------------------------------------
-   ghost-swap to accumulate full density in brick decomposition 
-   remap density from 3d brick decomposition to FFTdecomposition
-   for dispersion with arithmetic mixing rule
-------------------------------------------------------------------------- */
-
-void PPPMDisp::brick2fft_a()
-{
-  int n,ix,iy,iz;
-
-  // copy grabs inner portion of density from 3d brick
-  // remap could be done as pre-stage of FFT,
-  //   but this works optimally on only double values, not complex values
-
-  n = 0;
-  for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++)
-    for (iy = nylo_in_6; iy <= nyhi_in_6; iy++)
-      for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) {
-        density_fft_a0[n] = density_brick_a0[iz][iy][ix];
-        density_fft_a1[n] = density_brick_a1[iz][iy][ix];
-        density_fft_a2[n] = density_brick_a2[iz][iy][ix];
-        density_fft_a3[n] = density_brick_a3[iz][iy][ix];
-        density_fft_a4[n] = density_brick_a4[iz][iy][ix];
-        density_fft_a5[n] = density_brick_a5[iz][iy][ix];
-        density_fft_a6[n++] = density_brick_a6[iz][iy][ix];
-      }
-
-  remap_6->perform(density_fft_a0,density_fft_a0,work1_6);
-  remap_6->perform(density_fft_a1,density_fft_a1,work1_6);
-  remap_6->perform(density_fft_a2,density_fft_a2,work1_6);
-  remap_6->perform(density_fft_a3,density_fft_a3,work1_6);
-  remap_6->perform(density_fft_a4,density_fft_a4,work1_6);
-  remap_6->perform(density_fft_a5,density_fft_a5,work1_6);
-  remap_6->perform(density_fft_a6,density_fft_a6,work1_6);
-
-}
-
-/* ----------------------------------------------------------------------
-   ghost-swap to accumulate full density in brick decomposition 
-   remap density from 3d brick decomposition to FFTdecomposition
-   for dispersion with special case
-------------------------------------------------------------------------- */
-
-void PPPMDisp::brick2fft_none()
-{
-  int k,n,ix,iy,iz;
-
-  // copy grabs inner portion of density from 3d brick
-  // remap could be done as pre-stage of FFT,
-  //   but this works optimally on only double values, not complex values
-
-  for (k = 0; k<nsplit_alloc; k++) {
-    n = 0;
-    for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++)
-      for (iy = nylo_in_6; iy <= nyhi_in_6; iy++)
-        for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) 
-          density_fft_none[k][n++] = density_brick_none[k][iz][iy][ix];
-  }
-
-  for (k=0; k<nsplit_alloc; k++)
-    remap_6->perform(density_fft_none[k],density_fft_none[k],work1_6);
-}
-
-/* ----------------------------------------------------------------------
-   find center grid pt for each of my particles
-   check that full stencil for the particle will fit in my 3d brick
-   store central grid pt indices in part2grid array 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::particle_map(double delx, double dely, double delz,
-                             double sft, int** p2g, int nup, int nlow,
-                             int nxlo, int nylo, int nzlo,
-                             int nxhi, int nyhi, int nzhi)
-{
-  int nx,ny,nz;
-
-  double **x = atom->x;
-  int nlocal = atom->nlocal;
-
-  int flag = 0;
-  for (int i = 0; i < nlocal; i++) {
-    
-    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-    // current particle coord can be outside global and local box
-    // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
-
-    nx = static_cast<int> ((x[i][0]-boxlo[0])*delx+sft) - OFFSET;
-    ny = static_cast<int> ((x[i][1]-boxlo[1])*dely+sft) - OFFSET;
-    nz = static_cast<int> ((x[i][2]-boxlo[2])*delz+sft) - OFFSET;
-
-    p2g[i][0] = nx;
-    p2g[i][1] = ny;
-    p2g[i][2] = nz;
-
-    // check that entire stencil around nx,ny,nz will fit in my 3d brick
-
-    if (nx+nlow < nxlo || nx+nup > nxhi ||
-	ny+nlow < nylo || ny+nup > nyhi ||
-	nz+nlow < nzlo || nz+nup > nzhi)
-      flag = 1;
-  }
-
-  if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPMDisp");
-}
-
-
-void PPPMDisp::particle_map_c(double delx, double dely, double delz,
-                               double sft, int** p2g, int nup, int nlow,
-                               int nxlo, int nylo, int nzlo,
-                               int nxhi, int nyhi, int nzhi)
-{
-  particle_map(delx, dely, delz, sft, p2g, nup, nlow,
-               nxlo, nylo, nzlo, nxhi, nyhi, nzhi);
-}
-
-/* ----------------------------------------------------------------------
-   create discretized "density" on section of global grid due to my particles
-   density(x,y,z) = charge "density" at grid points of my 3d brick
-   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
-   in global grid 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::make_rho_c()
-{
-  int l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-
-  // clear 3d density array
-
-  memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
-	 ngrid*sizeof(FFT_SCALAR));
-
-  // loop over my charges, add their contribution to nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-
-  double *q = atom->q;
-  double **x = atom->x;
-  int nlocal = atom->nlocal;
-
-  for (int i = 0; i < nlocal; i++) {
-
-    nx = part2grid[i][0];
-    ny = part2grid[i][1];
-    nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-    compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
-
-    z0 = delvolinv * q[i];
-    for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
-      y0 = z0*rho1d[2][n];
-      for (m = nlower; m <= nupper; m++) {
-	my = m+ny;
-	x0 = y0*rho1d[1][m];
-	for (l = nlower; l <= nupper; l++) {
-	  mx = l+nx;
-	  density_brick[mz][my][mx] += x0*rho1d[0][l];
-	}
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   create discretized "density" on section of global grid due to my particles
-   density(x,y,z) = dispersion "density" at grid points of my 3d brick
-   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
-   in global grid --- geometric mixing
-------------------------------------------------------------------------- */
-
-void PPPMDisp::make_rho_g()
-{
-  int l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-
-  // clear 3d density array
-
-  memset(&(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
-	 ngrid_6*sizeof(FFT_SCALAR));
-
-  // loop over my charges, add their contribution to nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  int type;
-  double **x = atom->x;
-  int nlocal = atom->nlocal;
-
-  for (int i = 0; i < nlocal; i++) {
-
-    nx = part2grid_6[i][0];
-    ny = part2grid_6[i][1];
-    nz = part2grid_6[i][2];
-    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
-    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
-    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-
-    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
-    type = atom->type[i];
-    z0 = delvolinv_6 * B[type];
-    for (n = nlower_6; n <= nupper_6; n++) {
-      mz = n+nz;
-      y0 = z0*rho1d_6[2][n];
-      for (m = nlower_6; m <= nupper_6; m++) {
-	my = m+ny;
-	x0 = y0*rho1d_6[1][m];
-	for (l = nlower_6; l <= nupper_6; l++) {
-	  mx = l+nx;
-	  density_brick_g[mz][my][mx] += x0*rho1d_6[0][l];
-	}
-      }
-    }
-  }
-}
-
-
-/* ----------------------------------------------------------------------
-   create discretized "density" on section of global grid due to my particles
-   density(x,y,z) = dispersion "density" at grid points of my 3d brick
-   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
-   in global grid --- arithmetic mixing
-------------------------------------------------------------------------- */
-
-void PPPMDisp::make_rho_a()
-{
-  int l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0,w;
-
-  // clear 3d density array
-
-  memset(&(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
-	 ngrid_6*sizeof(FFT_SCALAR));
-  memset(&(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
-	 ngrid_6*sizeof(FFT_SCALAR));
-  memset(&(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
-	 ngrid_6*sizeof(FFT_SCALAR));
-  memset(&(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
-	 ngrid_6*sizeof(FFT_SCALAR));
-  memset(&(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
-	 ngrid_6*sizeof(FFT_SCALAR));
-  memset(&(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
-	 ngrid_6*sizeof(FFT_SCALAR));
-  memset(&(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
-	 ngrid_6*sizeof(FFT_SCALAR));
-
-  // loop over my particles, add their contribution to nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  int type;
-  double **x = atom->x;
-  int nlocal = atom->nlocal;
-  
-  for (int i = 0; i < nlocal; i++) {
-
-    //do the following for all 4 grids
-    nx = part2grid_6[i][0];
-    ny = part2grid_6[i][1];
-    nz = part2grid_6[i][2];
-    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
-    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
-    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
-    type = atom->type[i];
-    z0 = delvolinv_6;
-    for (n = nlower_6; n <= nupper_6; n++) {
-      mz = n+nz;
-      y0 = z0*rho1d_6[2][n];
-      for (m = nlower_6; m <= nupper_6; m++) {
-	my = m+ny;
-	x0 = y0*rho1d_6[1][m];
-	for (l = nlower_6; l <= nupper_6; l++) {
-	  mx = l+nx;
-          w = x0*rho1d_6[0][l];
-	  density_brick_a0[mz][my][mx] += w*B[7*type];
-	  density_brick_a1[mz][my][mx] += w*B[7*type+1];
-	  density_brick_a2[mz][my][mx] += w*B[7*type+2];
-	  density_brick_a3[mz][my][mx] += w*B[7*type+3];
-	  density_brick_a4[mz][my][mx] += w*B[7*type+4];
-	  density_brick_a5[mz][my][mx] += w*B[7*type+5];
-	  density_brick_a6[mz][my][mx] += w*B[7*type+6];
-	}
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   create discretized "density" on section of global grid due to my particles
-   density(x,y,z) = dispersion "density" at grid points of my 3d brick
-   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
-   in global grid --- case when mixing rules don't apply
-------------------------------------------------------------------------- */
-
-void PPPMDisp::make_rho_none()
-{
-  int k,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0,w;
-
-  // clear 3d density array
-  for (k = 0; k < nsplit_alloc; k++)
-    memset(&(density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
-	   ngrid_6*sizeof(FFT_SCALAR));
-
-
-  // loop over my particles, add their contribution to nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  int type;
-  double **x = atom->x;
-  int nlocal = atom->nlocal;
-  
-  for (int i = 0; i < nlocal; i++) {
-
-    //do the following for all 4 grids
-    nx = part2grid_6[i][0];
-    ny = part2grid_6[i][1];
-    nz = part2grid_6[i][2];
-    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
-    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
-    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
-    type = atom->type[i];
-    z0 = delvolinv_6;
-    for (n = nlower_6; n <= nupper_6; n++) {
-      mz = n+nz;
-      y0 = z0*rho1d_6[2][n];
-      for (m = nlower_6; m <= nupper_6; m++) {
-	my = m+ny;
-	x0 = y0*rho1d_6[1][m];
-	for (l = nlower_6; l <= nupper_6; l++) {
-	  mx = l+nx;
-          w = x0*rho1d_6[0][l];
-          for (k = 0; k < nsplit; k++)
-	    density_brick_none[k][mz][my][mx] += w*B[nsplit*type + k];
-	}
-      }
-    }
-  }
-}
-
-
-/* ----------------------------------------------------------------------
-   FFT-based Poisson solver for ik differentiation
-------------------------------------------------------------------------- */
-
-void PPPMDisp::poisson_ik(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
-                           FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, 
-                           int nx_p, int ny_p, int nz_p, int nft,
-                           int nxlo_ft, int nylo_ft, int nzlo_ft,
-                           int nxhi_ft, int nyhi_ft, int nzhi_ft,
-                           int nxlo_i, int nylo_i, int nzlo_i,
-                           int nxhi_i, int nyhi_i, int nzhi_i,
-                           double& egy, double* gfn,
-                           double* kx, double* ky, double* kz,
-                           double* kx2, double* ky2, double* kz2,
-                           FFT_SCALAR*** vx_brick, FFT_SCALAR*** vy_brick, FFT_SCALAR*** vz_brick,
-                           double* vir, double** vcoeff, double** vcoeff2,
-                           FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
-                           FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
-
-
-{
-  int i,j,k,n;
-  double eng;
-
-  // transform charge/dispersion density (r -> k) 
-  n = 0;
-  for (i = 0; i < nft; i++) {
-    wk1[n++] = dfft[i];
-    wk1[n++] = ZEROF;
-  }
-
-  ft1->compute(wk1,wk1,1);
-
-  // if requested, compute energy and virial contribution
-
-  double scaleinv = 1.0/(nx_p*ny_p*nz_p);
-  double s2 = scaleinv*scaleinv;
-
-  if (eflag_global || vflag_global) {
-    if (vflag_global) {
-      n = 0;
-      for (i = 0; i < nft; i++) {
-	eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
-	for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j];
-	if (eflag_global) egy += eng;
-	n += 2;
-      }
-    } else {
-      n = 0;
-      for (i = 0; i < nft; i++) {
-	egy += 
-	  s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
-	n += 2;
-      }
-    }
-  }
-
-  // scale by 1/total-grid-pts to get rho(k)
-  // multiply by Green's function to get V(k)
-
-  n = 0;
-  for (i = 0; i < nft; i++) {
-    wk1[n++] *= scaleinv * gfn[i];
-    wk1[n++] *= scaleinv * gfn[i];
-  }
-
-  // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
-  // FFT leaves data in 3d brick decomposition
-  // copy it into inner portion of vdx,vdy,vdz arrays
-
-  // x & y direction gradient
-
-  n = 0;
-  for (k = nzlo_ft; k <= nzhi_ft; k++)
-    for (j = nylo_ft; j <= nyhi_ft; j++)
-      for (i = nxlo_ft; i <= nxhi_ft; i++) {
-	wk2[n] = 0.5*(kx[i]-kx2[i])*wk1[n+1] + 0.5*(ky[j]-ky2[j])*wk1[n];
-	wk2[n+1] = -0.5*(kx[i]-kx2[i])*wk1[n] + 0.5*(ky[j]-ky2[j])*wk1[n+1];
-	n += 2;
-      }
-
-  ft2->compute(wk2,wk2,-1);
-
-  n = 0;
-  for (k = nzlo_i; k <= nzhi_i; k++)
-    for (j = nylo_i; j <= nyhi_i; j++)
-      for (i = nxlo_i; i <= nxhi_i; i++) {
-	vx_brick[k][j][i] = wk2[n++];
-	vy_brick[k][j][i] = wk2[n++];
-      }
-
-  if (!eflag_atom) {
-    // z direction gradient only
-
-    n = 0;
-    for (k = nzlo_ft; k <= nzhi_ft; k++)
-      for (j = nylo_ft; j <= nyhi_ft; j++)
-        for (i = nxlo_ft; i <= nxhi_ft; i++) {
-	  wk2[n] = kz[k]*wk1[n+1];
-	  wk2[n+1] = -kz[k]*wk1[n];
-	  n += 2;
-        }
-
-    ft2->compute(wk2,wk2,-1);
-
-
-    n = 0;
-    for (k = nzlo_i; k <= nzhi_i; k++)
-      for (j = nylo_i; j <= nyhi_i; j++)
-        for (i = nxlo_i; i <= nxhi_i; i++) {
-	  vz_brick[k][j][i] = wk2[n];
-	  n += 2;
-        }
-
-  }
-
-  else {
-    // z direction gradient & per-atom energy
-
-    n = 0;
-    for (k = nzlo_ft; k <= nzhi_ft; k++)
-      for (j = nylo_ft; j <= nyhi_ft; j++)
-        for (i = nxlo_ft; i <= nxhi_ft; i++) {
-	  wk2[n] = 0.5*(kz[k]-kz2[k])*wk1[n+1] - wk1[n+1];
-	  wk2[n+1] = -0.5*(kz[k]-kz2[k])*wk1[n] + wk1[n];
-	  n += 2;
-        }
-
-    ft2->compute(wk2,wk2,-1);
-
-    n = 0;
-    for (k = nzlo_i; k <= nzhi_i; k++)
-      for (j = nylo_i; j <= nyhi_i; j++)
-        for (i = nxlo_i; i <= nxhi_i; i++) {
-	  vz_brick[k][j][i] = wk2[n++];
-	  u_pa[k][j][i] = wk2[n++];;
-        }
-  }
-
-  if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft,
-                                  nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i,
-                                  v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa);
-}
-
-/* ----------------------------------------------------------------------
-   FFT-based Poisson solver for ad differentiation
-------------------------------------------------------------------------- */
-
-void PPPMDisp::poisson_ad(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
-                           FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, 
-                           int nx_p, int ny_p, int nz_p, int nft,
-                           int nxlo_ft, int nylo_ft, int nzlo_ft,
-                           int nxhi_ft, int nyhi_ft, int nzhi_ft,
-                           int nxlo_i, int nylo_i, int nzlo_i,
-                           int nxhi_i, int nyhi_i, int nzhi_i,
-                           double& egy, double* gfn,
-                           double* vir, double** vcoeff, double** vcoeff2,
-                           FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
-                           FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
-
-
-{
-  int i,j,k,n;
-  double eng;
-
-  // transform charge/dispersion density (r -> k) 
-  n = 0;
-  for (i = 0; i < nft; i++) {
-    wk1[n++] = dfft[i];
-    wk1[n++] = ZEROF;
-  }
-
-  ft1->compute(wk1,wk1,1);
-
-  // if requested, compute energy and virial contribution
-
-  double scaleinv = 1.0/(nx_p*ny_p*nz_p);
-  double s2 = scaleinv*scaleinv;
-
-  if (eflag_global || vflag_global) {
-    if (vflag_global) {
-      n = 0;
-      for (i = 0; i < nft; i++) {
-	eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
-	for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j];
-	if (eflag_global) egy += eng;
-	n += 2;
-      }
-    } else {
-      n = 0;
-      for (i = 0; i < nft; i++) {
-	egy += 
-	  s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
-	n += 2;
-      }
-    }
-  }
-
-  // scale by 1/total-grid-pts to get rho(k)
-  // multiply by Green's function to get V(k)
-
-  n = 0;
-  for (i = 0; i < nft; i++) {
-    wk1[n++] *= scaleinv * gfn[i];
-    wk1[n++] *= scaleinv * gfn[i];
-  }
-
-
-  n = 0;
-  for (k = nzlo_ft; k <= nzhi_ft; k++)
-    for (j = nylo_ft; j <= nyhi_ft; j++)
-      for (i = nxlo_ft; i <= nxhi_ft; i++) {
-        wk2[n] = wk1[n];
-	wk2[n+1] = wk1[n+1];
-	n += 2;
-      }
-
-  ft2->compute(wk2,wk2,-1);
-
-
-  n = 0;
-  for (k = nzlo_i; k <= nzhi_i; k++)
-    for (j = nylo_i; j <= nyhi_i; j++)
-      for (i = nxlo_i; i <= nxhi_i; i++) {
-	u_pa[k][j][i] = wk2[n++];
-        n++;
-      }
-
-
-  if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft,
-                                  nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i,
-                                  v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa);
-
-}
-
-/* ----------------------------------------------------------------------
-   Fourier Transform for per atom virial calculations
-------------------------------------------------------------------------- */
-
-void PPPMDisp:: poisson_peratom(FFT_SCALAR* wk1, FFT_SCALAR* wk2, LAMMPS_NS::FFT3d* ft2, 
-                                 double** vcoeff, double** vcoeff2, int nft,
-                                 int nxlo_i, int nylo_i, int nzlo_i,
-                                 int nxhi_i, int nyhi_i, int nzhi_i,
-                                 FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
-                                 FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
-{
- //v0 & v1 term
-  int n, i, j, k;
-  n = 0;
-  for (i = 0; i < nft; i++) {
-    wk2[n] = wk1[n]*vcoeff[i][0] - wk1[n+1]*vcoeff[i][1];
-    wk2[n+1] = wk1[n+1]*vcoeff[i][0] +  wk1[n]*vcoeff[i][1];
-    n += 2;
-  }
-
-  ft2->compute(wk2,wk2,-1); 
-
-  n = 0;
-  for (k = nzlo_i; k <= nzhi_i; k++)
-    for (j = nylo_i; j <= nyhi_i; j++)
-      for (i = nxlo_i; i <= nxhi_i; i++) {
-        v0_pa[k][j][i] = wk2[n++];
-        v1_pa[k][j][i] = wk2[n++];
-      }
-
-  //v2 & v3 term
-   
-  n = 0;
-  for (i = 0; i < nft; i++) {
-    wk2[n] = wk1[n]*vcoeff[i][2] - wk1[n+1]*vcoeff2[i][0];
-    wk2[n+1] = wk1[n+1]*vcoeff[i][2] + wk1[n]*vcoeff2[i][0];
-    n += 2;
-  }
-
-  ft2->compute(wk2,wk2,-1); 
-
-  n = 0;
-  for (k = nzlo_i; k <= nzhi_i; k++)
-    for (j = nylo_i; j <= nyhi_i; j++)
-      for (i = nxlo_i; i <= nxhi_i; i++) {
-        v2_pa[k][j][i] = wk2[n++];
-        v3_pa[k][j][i] = wk2[n++];
-      }
-
-  //v4 & v5 term
-   
-  n = 0;
-  for (i = 0; i < nft; i++) {
-    wk2[n] = wk1[n]*vcoeff2[i][1] - wk1[n+1]*vcoeff2[i][2];
-    wk2[n+1] = wk1[n+1]*vcoeff2[i][1] + wk1[n]*vcoeff2[i][2];
-    n += 2;
-  }
-
-  ft2->compute(wk2,wk2,-1); 
-
-  n = 0;
-  for (k = nzlo_i; k <= nzhi_i; k++)
-    for (j = nylo_i; j <= nyhi_i; j++)
-      for (i = nxlo_i; i <= nxhi_i; i++) {
-        v4_pa[k][j][i] = wk2[n++];
-        v5_pa[k][j][i] = wk2[n++];
-      }	 
- 
-}
-
-/* ----------------------------------------------------------------------
-   Poisson solver for one mesh with 2 different dispersion densities 
-   for ik scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::poisson_2s_ik(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
-                              FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1,
-                              FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2,
-                              FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
-                              FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
-                              FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
-                              FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
-
-{
-  int i,j,k,n;
-  double eng;
-
-  double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
-
-  // transform charge/dispersion density (r -> k)
-  // only one tansform required when energies and pressures do not
-  //  need to be calculated 
-  if (eflag_global + vflag_global == 0) {
-    n = 0;
-    for (i = 0; i < nfft_6; i++) {
-      work1_6[n++] = dfft_1[i];
-      work1_6[n++] = dfft_2[i];
-    }
-  
-    fft1_6->compute(work1_6,work1_6,1);
-  }
-  // two transforms are required when energies and pressures are
-  //   calculated
-  else {
-    n = 0;
-    for (i = 0; i < nfft_6; i++) {
-      work1_6[n] = dfft_1[i];
-      work2_6[n++] = ZEROF;
-      work1_6[n] = ZEROF;
-      work2_6[n++] = dfft_2[i];
-    }
-
-    fft1_6->compute(work1_6,work1_6,1);
-    fft1_6->compute(work2_6,work2_6,1);
-
-    double s2 = scaleinv*scaleinv;
-
-    if (vflag_global) {
-      n = 0;
-      for (i = 0; i < nfft_6; i++) {
-	eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
-	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
-	if (eflag_global)energy_6 += eng;
-	n += 2;
-      }
-    } else {
-      n = 0;
-      for (i = 0; i < nfft_6; i++) {
-	energy_6 += 
-	  2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
-	n += 2;
-      }
-    }
-    // unify the two transformed vectors for efficient calculations later
-    for ( i = 0; i < 2*nfft_6; i++) {
-      work1_6[i] += work2_6[i];
-    }
-  }
-
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work1_6[n++] *= scaleinv * greensfn_6[i];
-    work1_6[n++] *= scaleinv * greensfn_6[i];
-  }
-
-  // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
-  // FFT leaves data in 3d brick decomposition
-  // copy it into inner portion of vdx,vdy,vdz arrays
-
-  // x direction gradient
-
-  n = 0;
-  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
-    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
-      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
-	work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1];
-	work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n];
-	n += 2;
-      }
-
-  fft2_6->compute(work2_6,work2_6,-1);
-  
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-	vxbrick_1[k][j][i] = work2_6[n++];
-        vxbrick_2[k][j][i] = work2_6[n++];
-      }
-
-  // y direction gradient
-
-  n = 0;
-  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
-    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
-      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
-	work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1];
-	work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n];
-	n += 2;
-      }
-
-  fft2_6->compute(work2_6,work2_6,-1);
-
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-	vybrick_1[k][j][i] = work2_6[n++];
-        vybrick_2[k][j][i] = work2_6[n++];
-      }
-
-  // z direction gradient
-
-  n = 0;
-  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
-    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
-      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
-	work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1];
-	work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n];
-	n += 2;
-      }
-
-  fft2_6->compute(work2_6,work2_6,-1);
-
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-	vzbrick_1[k][j][i] = work2_6[n++];
-	vzbrick_2[k][j][i] = work2_6[n++];
-      }
-
-  //Per-atom energy
-    
-  if (eflag_atom) {
-    n = 0;
-    for (i = 0; i < nfft_6; i++) {
-      work2_6[n] = work1_6[n];
-      work2_6[n+1] = work1_6[n+1];
-      n += 2;
-    }
-    
-    fft2_6->compute(work2_6,work2_6,-1); 
-    
-    n = 0;
-    for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-      for (j = nylo_in_6; j <= nyhi_in_6; j++)
-        for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-          u_pa_1[k][j][i] = work2_6[n++];
-          u_pa_2[k][j][i] = work2_6[n++];
-        }
-  } 
-
-  if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1,
-                                     v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2);
-}
-
-
-/* ----------------------------------------------------------------------
-   Poisson solver for one mesh with 2 different dispersion densities 
-   for ik scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::poisson_none_ik(int n1, int n2,FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
-                              FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1,
-                              FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2,
-                              FFT_SCALAR**** u_pa, FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa,
-                              FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa)
-{
-  int i,j,k,n;
-  double eng;
-
-  double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
-
-  // transform charge/dispersion density (r -> k)
-  // only one tansform required when energies and pressures do not
-  //  need to be calculated 
-  if (eflag_global + vflag_global == 0) {
-    n = 0;
-    for (i = 0; i < nfft_6; i++) {
-      work1_6[n++] = dfft_1[i];
-      work1_6[n++] = dfft_2[i];
-    }
-  
-    fft1_6->compute(work1_6,work1_6,1);
-  }
-
-
-  // two transforms are required when energies and pressures are
-  //   calculated
-  else {
-    n = 0;
-    for (i = 0; i < nfft_6; i++) {
-      work1_6[n] = dfft_1[i];
-      work2_6[n++] = ZEROF;
-      work1_6[n] = ZEROF;
-      work2_6[n++] = dfft_2[i];
-    }
-   
-
-    fft1_6->compute(work1_6,work1_6,1);
-    fft1_6->compute(work2_6,work2_6,1);
-
-    double s2 = scaleinv*scaleinv;
-
-    if (vflag_global) {
-      n = 0;
-      for (i = 0; i < nfft_6; i++) {
-	eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
-	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
-	if (eflag_global)energy_6 += eng;
-	n += 2;
-      }
-    } else {
-      n = 0;
-      for (i = 0; i < nfft_6; i++) {
-	energy_6 += 
-	  s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
-	n += 2;
-      }
-    }
-    // unify the two transformed vectors for efficient calculations later
-    for ( i = 0; i < 2*nfft_6; i++) {
-      work1_6[i] += work2_6[i];
-    }
-  }
-
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work1_6[n++] *= scaleinv * greensfn_6[i];
-    work1_6[n++] *= scaleinv * greensfn_6[i];
-  }
-
-  // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
-  // FFT leaves data in 3d brick decomposition
-  // copy it into inner portion of vdx,vdy,vdz arrays
-
-  // x direction gradient
-
-  n = 0;
-  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
-    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
-      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
-	work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1];
-	work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n];
-	n += 2;
-      }
-
-  fft2_6->compute(work2_6,work2_6,-1);
-  
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-	vxbrick_1[k][j][i] = B[n1]*work2_6[n++];
-        vxbrick_2[k][j][i] = B[n2]*work2_6[n++];
-      }
-
-  // y direction gradient
-
-  n = 0;
-  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
-    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
-      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
-	work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1];
-	work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n];
-	n += 2;
-      }
-
-  fft2_6->compute(work2_6,work2_6,-1);
-
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-	vybrick_1[k][j][i] = B[n1]*work2_6[n++];
-        vybrick_2[k][j][i] = B[n2]*work2_6[n++];
-      }
-
-  // z direction gradient
-
-  n = 0;
-  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
-    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
-      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
-	work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1];
-	work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n];
-	n += 2;
-      }
-
-  fft2_6->compute(work2_6,work2_6,-1);
-
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-	vzbrick_1[k][j][i] = B[n1]*work2_6[n++];
-	vzbrick_2[k][j][i] = B[n2]*work2_6[n++];
-      }
-
-  //Per-atom energy
-    
-  if (eflag_atom) {
-    n = 0;
-    for (i = 0; i < nfft_6; i++) {
-      work2_6[n] = work1_6[n];
-      work2_6[n+1] = work1_6[n+1];
-      n += 2;
-    }
-    
-    fft2_6->compute(work2_6,work2_6,-1); 
-    
-    n = 0;
-    for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-      for (j = nylo_in_6; j <= nyhi_in_6; j++)
-        for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-          u_pa[n1][k][j][i] = B[n1]*work2_6[n++];
-          u_pa[n2][k][j][i] = B[n2]*work2_6[n++];
-        }
-  } 
-
-  if (vflag_atom) poisson_none_peratom(n1,n2,
-                                       v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1],
-                                       v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]);
-}
-
-/* ----------------------------------------------------------------------
-   Poisson solver for one mesh with 2 different dispersion densities 
-   for ad scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::poisson_2s_ad(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
-                              FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
-                              FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
-                              FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
-                              FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
-
-{
-  int i,j,k,n;
-  double eng;
-
-  double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
-
-  // transform charge/dispersion density (r -> k)
-  // only one tansform required when energies and pressures do not
-  //  need to be calculated 
-  if (eflag_global + vflag_global == 0) {
-    n = 0;
-    for (i = 0; i < nfft_6; i++) {
-      work1_6[n++] = dfft_1[i];
-      work1_6[n++] = dfft_2[i];
-    }
-  
-    fft1_6->compute(work1_6,work1_6,1);
-  }
-  // two transforms are required when energies and pressures are
-  //   calculated
-  else {
-    n = 0;
-    for (i = 0; i < nfft_6; i++) {
-      work1_6[n] = dfft_1[i];
-      work2_6[n++] = ZEROF;
-      work1_6[n] = ZEROF;
-      work2_6[n++] = dfft_2[i];
-    }
-
-    fft1_6->compute(work1_6,work1_6,1);
-    fft1_6->compute(work2_6,work2_6,1);
-
-    double s2 = scaleinv*scaleinv;
-
-    if (vflag_global) {
-      n = 0;
-      for (i = 0; i < nfft_6; i++) {
-	eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
-	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
-	if (eflag_global)energy_6 += eng;
-	n += 2;
-      }
-    } else {
-      n = 0;
-      for (i = 0; i < nfft_6; i++) {
-	energy_6 += 
-	  2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
-	n += 2;
-      }
-    }
-    // unify the two transformed vectors for efficient calculations later
-    for ( i = 0; i < 2*nfft_6; i++) {
-      work1_6[i] += work2_6[i];
-    }
-  }
-
-
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work1_6[n++] *= scaleinv * greensfn_6[i];
-    work1_6[n++] *= scaleinv * greensfn_6[i];
-  }
-
-
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work2_6[n] = work1_6[n];
-    work2_6[n+1] = work1_6[n+1];
-    n += 2;
-  }
-    
-  fft2_6->compute(work2_6,work2_6,-1); 
-    
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-        u_pa_1[k][j][i] = work2_6[n++];
-        u_pa_2[k][j][i] = work2_6[n++];
-      } 
-
-  if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1,
-                                     v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2);
-}
-
-/* ----------------------------------------------------------------------
-   Poisson solver for one mesh with 2 different dispersion densities 
-   for ad scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::poisson_none_ad(int n1, int n2, FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
-                               FFT_SCALAR*** u_pa_1, FFT_SCALAR*** u_pa_2,
-                               FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa,
-                               FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa)
-{
-  int i,j,k,n;
-  double eng;
-
-  double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
-
-  // transform charge/dispersion density (r -> k)
-  // only one tansform required when energies and pressures do not
-  //  need to be calculated 
-  if (eflag_global + vflag_global == 0) {
-    n = 0;
-    for (i = 0; i < nfft_6; i++) {
-      work1_6[n++] = dfft_1[i];
-      work1_6[n++] = dfft_2[i];
-    }
-  
-    fft1_6->compute(work1_6,work1_6,1);
-  }
-  // two transforms are required when energies and pressures are
-  //   calculated
-  else {
-    n = 0;
-    for (i = 0; i < nfft_6; i++) {
-      work1_6[n] = dfft_1[i];
-      work2_6[n++] = ZEROF;
-      work1_6[n] = ZEROF;
-      work2_6[n++] = dfft_2[i];
-    }
-
-    fft1_6->compute(work1_6,work1_6,1);
-    fft1_6->compute(work2_6,work2_6,1);
-
-    double s2 = scaleinv*scaleinv;
-
-    if (vflag_global) {
-      n = 0;
-      for (i = 0; i < nfft_6; i++) {
-	eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
-	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
-	if (eflag_global)energy_6 += eng;
-	n += 2;
-      }
-    } else {
-      n = 0;
-      for (i = 0; i < nfft_6; i++) {
-	energy_6 += 
-	  s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
-	n += 2;
-      }
-    }
-    // unify the two transformed vectors for efficient calculations later
-    for ( i = 0; i < 2*nfft_6; i++) {
-      work1_6[i] += work2_6[i];
-    }
-  }
-
-
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work1_6[n++] *= scaleinv * greensfn_6[i];
-    work1_6[n++] *= scaleinv * greensfn_6[i];
-  }
-
-
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work2_6[n] = work1_6[n];
-    work2_6[n+1] = work1_6[n+1];
-    n += 2;
-  }
-  
-  fft2_6->compute(work2_6,work2_6,-1); 
-    
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-        u_pa_1[k][j][i] = B[n1]*work2_6[n++];
-        u_pa_2[k][j][i] = B[n2]*work2_6[n++];
-      } 
-
-  if (vflag_atom) poisson_none_peratom(n1,n2,
-                                       v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1],
-                                       v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]);
-}
-
-/* ----------------------------------------------------------------------
-   Fourier Transform for per atom virial calculations
-------------------------------------------------------------------------- */
-
-void PPPMDisp::poisson_2s_peratom(FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
-                                   FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
-                                   FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
-                                   FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
-{
-  //Compute first virial term v0
-  int n, i, j, k;
-
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work2_6[n] = work1_6[n]*vg_6[i][0];
-    work2_6[n+1] = work1_6[n+1]*vg_6[i][0];
-    n += 2;
-  }
-   
-  fft2_6->compute(work2_6,work2_6,-1); 
-    
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-        v0_pa_1[k][j][i] = work2_6[n++];
-        v0_pa_2[k][j][i] = work2_6[n++];
-      }
-	 
-  //Compute second virial term v1  
-  
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work2_6[n] = work1_6[n]*vg_6[i][1];
-    work2_6[n+1] = work1_6[n+1]*vg_6[i][1];
-    n += 2;
-  }
-    
-  fft2_6->compute(work2_6,work2_6,-1); 
-  
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-        v1_pa_1[k][j][i] = work2_6[n++];
-        v1_pa_2[k][j][i] = work2_6[n++];
-      }
-	  
-  //Compute third virial term v2
-   
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work2_6[n] = work1_6[n]*vg_6[i][2];
-    work2_6[n+1] = work1_6[n+1]*vg_6[i][2];
-    n += 2;
-  }
-    
-  fft2_6->compute(work2_6,work2_6,-1); 
-    
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-        v2_pa_1[k][j][i] = work2_6[n++];
-        v2_pa_2[k][j][i] = work2_6[n++];
-      }
-
-  //Compute fourth virial term v3
-   
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work2_6[n] = work1_6[n]*vg2_6[i][0];
-    work2_6[n+1] = work1_6[n+1]*vg2_6[i][0];
-    n += 2;
-  }
-    
-  fft2_6->compute(work2_6,work2_6,-1); 
-    
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-        v3_pa_1[k][j][i] = work2_6[n++];
-        v3_pa_2[k][j][i] = work2_6[n++];
-      }
-
-  //Compute fifth virial term v4
-   
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work2_6[n] = work1_6[n]*vg2_6[i][1];
-    work2_6[n+1] = work1_6[n+1]*vg2_6[i][1];
-    n += 2;
-  }
-    
-  fft2_6->compute(work2_6,work2_6,-1); 
-    
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-        v4_pa_1[k][j][i] = work2_6[n++];
-        v4_pa_2[k][j][i] = work2_6[n++];
-      }
-   
-  //Compute last virial term v5
-   
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work2_6[n] = work1_6[n]*vg2_6[i][2];
-    work2_6[n+1] = work1_6[n+1]*vg2_6[i][2];
-    n += 2;
-  }
-    
-  fft2_6->compute(work2_6,work2_6,-1); 
-    
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-        v5_pa_1[k][j][i] = work2_6[n++];
-        v5_pa_2[k][j][i] = work2_6[n++];
-      }
-}
-
-/* ----------------------------------------------------------------------
-   Fourier Transform for per atom virial calculations
-------------------------------------------------------------------------- */
-
-void PPPMDisp::poisson_none_peratom(int n1, int n2,                              
-                                 FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
-                                 FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
-                                 FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
-                                 FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
-{
-  //Compute first virial term v0
-  int n, i, j, k;
-
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work2_6[n] = work1_6[n]*vg_6[i][0];
-    work2_6[n+1] = work1_6[n+1]*vg_6[i][0];
-    n += 2;
-  }
-   
-  fft2_6->compute(work2_6,work2_6,-1); 
-    
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-        v0_pa_1[k][j][i] = B[n1]*work2_6[n++];
-        v0_pa_2[k][j][i] = B[n2]*work2_6[n++];
-      }
-	 
-  //Compute second virial term v1  
-  
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work2_6[n] = work1_6[n]*vg_6[i][1];
-    work2_6[n+1] = work1_6[n+1]*vg_6[i][1];
-    n += 2;
-  }
-    
-  fft2_6->compute(work2_6,work2_6,-1); 
-  
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-        v1_pa_1[k][j][i] = B[n1]*work2_6[n++];
-        v1_pa_2[k][j][i] = B[n2]*work2_6[n++];
-      }
-	  
-  //Compute third virial term v2
-   
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work2_6[n] = work1_6[n]*vg_6[i][2];
-    work2_6[n+1] = work1_6[n+1]*vg_6[i][2];
-    n += 2;
-  }
-    
-  fft2_6->compute(work2_6,work2_6,-1); 
-    
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-        v2_pa_1[k][j][i] = B[n1]*work2_6[n++];
-        v2_pa_2[k][j][i] = B[n2]*work2_6[n++];
-      }
-
-  //Compute fourth virial term v3
-   
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work2_6[n] = work1_6[n]*vg2_6[i][0];
-    work2_6[n+1] = work1_6[n+1]*vg2_6[i][0];
-    n += 2;
-  }
-    
-  fft2_6->compute(work2_6,work2_6,-1); 
-    
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-        v3_pa_1[k][j][i] = B[n1]*work2_6[n++];
-        v3_pa_2[k][j][i] = B[n2]*work2_6[n++];
-      }
-
-  //Compute fifth virial term v4
-   
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work2_6[n] = work1_6[n]*vg2_6[i][1];
-    work2_6[n+1] = work1_6[n+1]*vg2_6[i][1];
-    n += 2;
-  }
-    
-  fft2_6->compute(work2_6,work2_6,-1); 
-    
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-        v4_pa_1[k][j][i] = B[n1]*work2_6[n++];
-        v4_pa_2[k][j][i] = B[n2]*work2_6[n++];
-      }
-   
-  //Compute last virial term v5
-   
-  n = 0;
-  for (i = 0; i < nfft_6; i++) {
-    work2_6[n] = work1_6[n]*vg2_6[i][2];
-    work2_6[n+1] = work1_6[n+1]*vg2_6[i][2];
-    n += 2;
-  }
-    
-  fft2_6->compute(work2_6,work2_6,-1); 
-    
-  n = 0;
-  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
-    for (j = nylo_in_6; j <= nyhi_in_6; j++)
-      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
-        v5_pa_1[k][j][i] = B[n1]*work2_6[n++];
-        v5_pa_2[k][j][i] = B[n2]*work2_6[n++];
-      }
-}
- 
-/* ----------------------------------------------------------------------
-   interpolate from grid to get electric field & force on my particles 
-   for ik scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_c_ik()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-  FFT_SCALAR ekx,eky,ekz;
-
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of E-field on particle
-
-  double *q = atom->q;
-  double **x = atom->x;
-  double **f = atom->f;
-
-  int nlocal = atom->nlocal;
-
-  for (i = 0; i < nlocal; i++) {
-    nx = part2grid[i][0];
-    ny = part2grid[i][1];
-    nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-    compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
-
-    ekx = eky = ekz = ZEROF;
-    for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
-      z0 = rho1d[2][n];
-      for (m = nlower; m <= nupper; m++) {
-	my = m+ny;
-	y0 = z0*rho1d[1][m];
-	for (l = nlower; l <= nupper; l++) {
-	  mx = l+nx;
-	  x0 = y0*rho1d[0][l];
-	  ekx -= x0*vdx_brick[mz][my][mx];
-	  eky -= x0*vdy_brick[mz][my][mx];
-	  ekz -= x0*vdz_brick[mz][my][mx];
-	}
-      }
-    }
-
-    // convert E-field to force
-
-    const double qfactor = force->qqrd2e * scale * q[i];
-    f[i][0] += qfactor*ekx;
-    f[i][1] += qfactor*eky;
-    if (slabflag != 2) f[i][2] += qfactor*ekz;
-  }
-}
-/* ----------------------------------------------------------------------
-   interpolate from grid to get electric field & force on my particles
-   for ad scheme 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_c_ad()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz;
-  FFT_SCALAR ekx,eky,ekz;
-  double s1,s2,s3;
-  double sf = 0.0;
-
-  double *prd;
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-
-  double hx_inv = nx_pppm/xprd;
-  double hy_inv = ny_pppm/yprd;
-  double hz_inv = nz_pppm/zprd_slab;
-
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of E-field on particle
-
-  double *q = atom->q;
-  double **x = atom->x;
-  double **f = atom->f;
-
-  int nlocal = atom->nlocal;
-
-  for (i = 0; i < nlocal; i++) {
-    nx = part2grid[i][0];
-    ny = part2grid[i][1];
-    nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-    compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
-    compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d);
-
-    ekx = eky = ekz = ZEROF;
-    for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
-      for (m = nlower; m <= nupper; m++) {
-        my = m+ny;
-        for (l = nlower; l <= nupper; l++) {
-          mx = l+nx;
-          ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
-          eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
-          ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
-        }
-      }
-    }
-    ekx *= hx_inv;
-    eky *= hy_inv;
-    ekz *= hz_inv;
-    // convert E-field to force and substract self forces
-    const double qfactor = force->qqrd2e * scale;
-
-    s1 = x[i][0]*hx_inv;
-    s2 = x[i][1]*hy_inv;
-    s3 = x[i][2]*hz_inv;
-    sf = sf_coeff[0]*sin(2*MY_PI*s1);
-    sf += sf_coeff[1]*sin(4*MY_PI*s1);
-    sf *= 2*q[i]*q[i];
-    f[i][0] += qfactor*(ekx*q[i] - sf);
-
-    sf = sf_coeff[2]*sin(2*MY_PI*s2);
-    sf += sf_coeff[3]*sin(4*MY_PI*s2);
-    sf *= 2*q[i]*q[i];
-    f[i][1] += qfactor*(eky*q[i] - sf);
-
-
-    sf = sf_coeff[4]*sin(2*MY_PI*s3);
-    sf += sf_coeff[5]*sin(4*MY_PI*s3);
-    sf *= 2*q[i]*q[i];
-    if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
-  }
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get electric field & force on my particles 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_c_peratom()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-  FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;
-
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of E-field on particle
-
-  double *q = atom->q;
-  double **x = atom->x;
-
-  int nlocal = atom->nlocal;
-
-  for (i = 0; i < nlocal; i++) {
-    nx = part2grid[i][0];
-    ny = part2grid[i][1];
-    nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-    compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
-
-    u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
-    for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
-      z0 = rho1d[2][n];
-      for (m = nlower; m <= nupper; m++) {
-	my = m+ny;
-	y0 = z0*rho1d[1][m];
-	for (l = nlower; l <= nupper; l++) {
-	  mx = l+nx;
-	  x0 = y0*rho1d[0][l];
-	  if (eflag_atom) u_pa += x0*u_brick[mz][my][mx];	
-	  if (vflag_atom) {
-            v0 += x0*v0_brick[mz][my][mx];
-            v1 += x0*v1_brick[mz][my][mx];
-            v2 += x0*v2_brick[mz][my][mx];
-            v3 += x0*v3_brick[mz][my][mx];
-            v4 += x0*v4_brick[mz][my][mx];
-            v5 += x0*v5_brick[mz][my][mx];
-          }
-	}
-      }
-    }
-
-    // convert E-field to force
-
-    const double qfactor = 0.5*force->qqrd2e * scale * q[i];
-
-    if (eflag_atom) eatom[i] += u_pa*qfactor;
-    if (vflag_atom) {
-      vatom[i][0] += v0*qfactor;
-      vatom[i][1] += v1*qfactor;
-      vatom[i][2] += v2*qfactor;
-      vatom[i][3] += v3*qfactor;
-      vatom[i][4] += v4*qfactor;
-      vatom[i][5] += v5*qfactor;
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get dispersion field & force on my particles
-   for geometric mixing rule 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_g_ik()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-  FFT_SCALAR ekx,eky,ekz;
-
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of dispersion field on particle
-
-  double **x = atom->x;
-  double **f = atom->f;
-  int type;
-  double lj;
-
-  int nlocal = atom->nlocal;
-
-  for (i = 0; i < nlocal; i++) {
-    nx = part2grid_6[i][0];
-    ny = part2grid_6[i][1];
-    nz = part2grid_6[i][2];
-    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
-    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
-    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-
-    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
-
-    ekx = eky = ekz = ZEROF;
-    for (n = nlower_6; n <= nupper_6; n++) {
-      mz = n+nz;
-      z0 = rho1d_6[2][n];
-      for (m = nlower_6; m <= nupper_6; m++) {
-	my = m+ny;
-	y0 = z0*rho1d_6[1][m];
-	for (l = nlower_6; l <= nupper_6; l++) {
-	  mx = l+nx;
-	  x0 = y0*rho1d_6[0][l];
-	  ekx -= x0*vdx_brick_g[mz][my][mx];
-	  eky -= x0*vdy_brick_g[mz][my][mx];
-	  ekz -= x0*vdz_brick_g[mz][my][mx];
-	}
-      }
-    }
-
-    // convert E-field to force
-    type = atom->type[i];
-    lj = B[type];
-    f[i][0] += lj*ekx;
-    f[i][1] += lj*eky;
-    if (slabflag != 2) f[i][2] += lj*ekz;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get dispersion field & force on my particles
-   for geometric mixing rule for ad scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_g_ad()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz;
-  FFT_SCALAR ekx,eky,ekz;
-  double s1,s2,s3;
-  double sf = 0.0;
-  double *prd;
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-
-  double hx_inv = nx_pppm_6/xprd;
-  double hy_inv = ny_pppm_6/yprd;
-  double hz_inv = nz_pppm_6/zprd_slab;
-
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of dispersion field on particle
-
-  double **x = atom->x;
-  double **f = atom->f;
-  int type;
-  double lj;
-
-  int nlocal = atom->nlocal;
-
- 
-  for (i = 0; i < nlocal; i++) {
-    nx = part2grid_6[i][0];
-    ny = part2grid_6[i][1];
-    nz = part2grid_6[i][2];
-    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
-    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
-    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-
-    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
-    compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
-
-
-    ekx = eky = ekz = ZEROF;
-    for (n = nlower_6; n <= nupper_6; n++) {
-      mz = n+nz;
-      for (m = nlower_6; m <= nupper_6; m++) {
-        my = m+ny;
-        for (l = nlower_6; l <= nupper_6; l++) {
-          mx = l+nx;
-          ekx += drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx];
-          eky += rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx];
-          ekz += rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]*u_brick_g[mz][my][mx];
-        }
-      }
-    }
-    ekx *= hx_inv;
-    eky *= hy_inv;
-    ekz *= hz_inv;
-
-    // convert E-field to force
-    type = atom->type[i];
-    lj = B[type];
-
-    s1 = x[i][0]*hx_inv;
-    s2 = x[i][1]*hy_inv;
-    s3 = x[i][2]*hz_inv;
-
-    sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
-    sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
-    sf *= 2*lj*lj;
-    f[i][0] += ekx*lj - sf;
-
-    sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
-    sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
-    sf *= 2*lj*lj;
-    f[i][1] += eky*lj - sf;
-
-
-    sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
-    sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
-    sf *= 2*lj*lj;
-    if (slabflag != 2) f[i][2] += ekz*lj - sf;
-
-  }
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get dispersion field & force on my particles
-   for geometric mixing rule for per atom quantities
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_g_peratom()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-  FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;
-
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of dispersion field on particle
-
-  double **x = atom->x;
-  int type;
-  double lj;
-
-  int nlocal = atom->nlocal;
-
-  for (i = 0; i < nlocal; i++) {
-    nx = part2grid_6[i][0];
-    ny = part2grid_6[i][1];
-    nz = part2grid_6[i][2];
-    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
-    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
-    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-
-    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
-
-    u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
-    for (n = nlower_6; n <= nupper_6; n++) {
-      mz = n+nz;
-      z0 = rho1d_6[2][n];
-      for (m = nlower_6; m <= nupper_6; m++) {
-	my = m+ny;
-	y0 = z0*rho1d_6[1][m];
-	for (l = nlower_6; l <= nupper_6; l++) {
-	  mx = l+nx;
-	  x0 = y0*rho1d_6[0][l];
-	  if (eflag_atom) u_pa += x0*u_brick_g[mz][my][mx];	
-	  if (vflag_atom) {
-            v0 += x0*v0_brick_g[mz][my][mx];
-            v1 += x0*v1_brick_g[mz][my][mx];
-            v2 += x0*v2_brick_g[mz][my][mx];
-            v3 += x0*v3_brick_g[mz][my][mx];
-            v4 += x0*v4_brick_g[mz][my][mx];
-            v5 += x0*v5_brick_g[mz][my][mx];
-          }
-	}
-      }
-    }
-
-    // convert E-field to force
-    type = atom->type[i];
-    lj = B[type]*0.5;
-
-    if (eflag_atom) eatom[i] += u_pa*lj;
-    if (vflag_atom) {
-      vatom[i][0] += v0*lj;
-      vatom[i][1] += v1*lj;
-      vatom[i][2] += v2*lj;
-      vatom[i][3] += v3*lj;
-      vatom[i][4] += v4*lj;
-      vatom[i][5] += v5*lj;
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get dispersion field & force on my particles
-   for arithmetic mixing rule and ik scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_a_ik()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-  FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
-  FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
-  FFT_SCALAR ekx6, eky6, ekz6;
-
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of dispersion field on particle
-
-  double **x = atom->x;
-  double **f = atom->f;
-  int type;
-  double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
-
-  int nlocal = atom->nlocal;
-
-  for (i = 0; i < nlocal; i++) {
-
-    nx = part2grid_6[i][0];
-    ny = part2grid_6[i][1];
-    nz = part2grid_6[i][2];
-    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
-    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
-    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
-    ekx0 = eky0 = ekz0 = ZEROF;
-    ekx1 = eky1 = ekz1 = ZEROF;
-    ekx2 = eky2 = ekz2 = ZEROF;
-    ekx3 = eky3 = ekz3 = ZEROF;
-    ekx4 = eky4 = ekz4 = ZEROF;
-    ekx5 = eky5 = ekz5 = ZEROF;
-    ekx6 = eky6 = ekz6 = ZEROF;
-    for (n = nlower_6; n <= nupper_6; n++) {
-      mz = n+nz;
-      z0 = rho1d_6[2][n];
-      for (m = nlower_6; m <= nupper_6; m++) {
-	my = m+ny;
-	y0 = z0*rho1d_6[1][m];
-	for (l = nlower_6; l <= nupper_6; l++) {
-	  mx = l+nx;
-	  x0 = y0*rho1d_6[0][l];
-	  ekx0 -= x0*vdx_brick_a0[mz][my][mx];
-	  eky0 -= x0*vdy_brick_a0[mz][my][mx];
-	  ekz0 -= x0*vdz_brick_a0[mz][my][mx];
-	  ekx1 -= x0*vdx_brick_a1[mz][my][mx];
-	  eky1 -= x0*vdy_brick_a1[mz][my][mx];
-	  ekz1 -= x0*vdz_brick_a1[mz][my][mx];
-          ekx2 -= x0*vdx_brick_a2[mz][my][mx];
-	  eky2 -= x0*vdy_brick_a2[mz][my][mx];
-	  ekz2 -= x0*vdz_brick_a2[mz][my][mx];
-	  ekx3 -= x0*vdx_brick_a3[mz][my][mx];
-	  eky3 -= x0*vdy_brick_a3[mz][my][mx];
-	  ekz3 -= x0*vdz_brick_a3[mz][my][mx];
-	  ekx4 -= x0*vdx_brick_a4[mz][my][mx];
-	  eky4 -= x0*vdy_brick_a4[mz][my][mx];
-	  ekz4 -= x0*vdz_brick_a4[mz][my][mx];
-          ekx5 -= x0*vdx_brick_a5[mz][my][mx];
-	  eky5 -= x0*vdy_brick_a5[mz][my][mx];
-	  ekz5 -= x0*vdz_brick_a5[mz][my][mx];
-          ekx6 -= x0*vdx_brick_a6[mz][my][mx];
-	  eky6 -= x0*vdy_brick_a6[mz][my][mx];
-	  ekz6 -= x0*vdz_brick_a6[mz][my][mx];
-	}
-      }
-    }
-    // convert D-field to force
-    type = atom->type[i];
-    lj0 = B[7*type+6];
-    lj1 = B[7*type+5];
-    lj2 = B[7*type+4];
-    lj3 = B[7*type+3];
-    lj4 = B[7*type+2];
-    lj5 = B[7*type+1];
-    lj6 = B[7*type];
-    f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6;
-    f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6;
-    if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get dispersion field & force on my particles
-   for arithmetic mixing rule for the ad scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_a_ad()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-  FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
-  FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
-  FFT_SCALAR ekx6, eky6, ekz6;
-
-  double s1,s2,s3;
-  double sf = 0.0;
-  double *prd;
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-
-  double hx_inv = nx_pppm_6/xprd;
-  double hy_inv = ny_pppm_6/yprd;
-  double hz_inv = nz_pppm_6/zprd_slab;
-
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of dispersion field on particle
-
-  double **x = atom->x;
-  double **f = atom->f;
-  int type;
-  double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
-
-  int nlocal = atom->nlocal;
-
-  for (i = 0; i < nlocal; i++) {
-
-    nx = part2grid_6[i][0];
-    ny = part2grid_6[i][1];
-    nz = part2grid_6[i][2];
-    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
-    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
-    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-
-    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
-    compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
-
-    ekx0 = eky0 = ekz0 = ZEROF;
-    ekx1 = eky1 = ekz1 = ZEROF;
-    ekx2 = eky2 = ekz2 = ZEROF;
-    ekx3 = eky3 = ekz3 = ZEROF;
-    ekx4 = eky4 = ekz4 = ZEROF;
-    ekx5 = eky5 = ekz5 = ZEROF;
-    ekx6 = eky6 = ekz6 = ZEROF;
-    for (n = nlower_6; n <= nupper_6; n++) {
-      mz = n+nz;
-      for (m = nlower_6; m <= nupper_6; m++) {
-	my = m+ny;
-	for (l = nlower_6; l <= nupper_6; l++) {
-	  mx = l+nx;
-          x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n];
-          y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n];
-          z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n];
-
-          ekx0 += x0*u_brick_a0[mz][my][mx];
-          eky0 += y0*u_brick_a0[mz][my][mx];
-          ekz0 += z0*u_brick_a0[mz][my][mx];
-
-          ekx1 += x0*u_brick_a1[mz][my][mx];
-          eky1 += y0*u_brick_a1[mz][my][mx];
-          ekz1 += z0*u_brick_a1[mz][my][mx];
-
-          ekx2 += x0*u_brick_a2[mz][my][mx];
-          eky2 += y0*u_brick_a2[mz][my][mx];
-          ekz2 += z0*u_brick_a2[mz][my][mx];
-
-          ekx3 += x0*u_brick_a3[mz][my][mx];
-          eky3 += y0*u_brick_a3[mz][my][mx];
-          ekz3 += z0*u_brick_a3[mz][my][mx];
-
-          ekx4 += x0*u_brick_a4[mz][my][mx];
-          eky4 += y0*u_brick_a4[mz][my][mx];
-          ekz4 += z0*u_brick_a4[mz][my][mx];
-
-          ekx5 += x0*u_brick_a5[mz][my][mx];
-          eky5 += y0*u_brick_a5[mz][my][mx];
-          ekz5 += z0*u_brick_a5[mz][my][mx];
-
-          ekx6 += x0*u_brick_a6[mz][my][mx];
-          eky6 += y0*u_brick_a6[mz][my][mx];
-          ekz6 += z0*u_brick_a6[mz][my][mx];
-	}
-      }
-    }
-
-    ekx0 *= hx_inv;
-    eky0 *= hy_inv;
-    ekz0 *= hz_inv;
-
-    ekx1 *= hx_inv;
-    eky1 *= hy_inv;
-    ekz1 *= hz_inv;
-
-    ekx2 *= hx_inv;
-    eky2 *= hy_inv;
-    ekz2 *= hz_inv;
-
-    ekx3 *= hx_inv;
-    eky3 *= hy_inv;
-    ekz3 *= hz_inv;
-
-    ekx4 *= hx_inv;
-    eky4 *= hy_inv;
-    ekz4 *= hz_inv;
-
-    ekx5 *= hx_inv;
-    eky5 *= hy_inv;
-    ekz5 *= hz_inv;
-
-    ekx6 *= hx_inv;
-    eky6 *= hy_inv;
-    ekz6 *= hz_inv;
-
-    // convert D-field to force
-    type = atom->type[i];
-    lj0 = B[7*type+6];
-    lj1 = B[7*type+5];
-    lj2 = B[7*type+4];
-    lj3 = B[7*type+3];
-    lj4 = B[7*type+2];
-    lj5 = B[7*type+1];
-    lj6 = B[7*type];
-
-    s1 = x[i][0]*hx_inv;
-    s2 = x[i][1]*hy_inv;
-    s3 = x[i][2]*hz_inv;
-
-    sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
-    sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
-    sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
-    f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf;
-
-    sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
-    sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
-    sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
-    f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf;
-
-    sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
-    sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
-    sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
-    if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get dispersion field & force on my particles
-   for arithmetic mixing rule for per atom quantities
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_a_peratom()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-  FFT_SCALAR u_pa0,v00,v10,v20,v30,v40,v50;
-  FFT_SCALAR u_pa1,v01,v11,v21,v31,v41,v51;
-  FFT_SCALAR u_pa2,v02,v12,v22,v32,v42,v52;
-  FFT_SCALAR u_pa3,v03,v13,v23,v33,v43,v53;
-  FFT_SCALAR u_pa4,v04,v14,v24,v34,v44,v54;
-  FFT_SCALAR u_pa5,v05,v15,v25,v35,v45,v55;
-  FFT_SCALAR u_pa6,v06,v16,v26,v36,v46,v56;
-
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of dispersion field on particle
-
-  double **x = atom->x;
-  int type;
-  double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
-
-  int nlocal = atom->nlocal;
-
-  for (i = 0; i < nlocal; i++) {
-
-    nx = part2grid_6[i][0];
-    ny = part2grid_6[i][1];
-    nz = part2grid_6[i][2];
-    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
-    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
-    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
-
-    u_pa0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF;
-    u_pa1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF;
-    u_pa2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF;
-    u_pa3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF;
-    u_pa4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF;
-    u_pa5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF;
-    u_pa6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF;
-    for (n = nlower_6; n <= nupper_6; n++) {
-      mz = n+nz;
-      z0 = rho1d_6[2][n];
-      for (m = nlower_6; m <= nupper_6; m++) {
-	my = m+ny;
-	y0 = z0*rho1d_6[1][m];
-	for (l = nlower_6; l <= nupper_6; l++) {
-	  mx = l+nx;
-	  x0 = y0*rho1d_6[0][l];
-          if (eflag_atom) {
-            u_pa0 += x0*u_brick_a0[mz][my][mx];
-            u_pa1 += x0*u_brick_a1[mz][my][mx];
-            u_pa2 += x0*u_brick_a2[mz][my][mx];
-            u_pa3 += x0*u_brick_a3[mz][my][mx];
-            u_pa4 += x0*u_brick_a4[mz][my][mx];
-            u_pa5 += x0*u_brick_a5[mz][my][mx];
-            u_pa6 += x0*u_brick_a6[mz][my][mx];
-	  }
-          if (vflag_atom) {
-            v00 += x0*v0_brick_a0[mz][my][mx];
-            v10 += x0*v1_brick_a0[mz][my][mx];
-            v20 += x0*v2_brick_a0[mz][my][mx];
-            v30 += x0*v3_brick_a0[mz][my][mx];
-            v40 += x0*v4_brick_a0[mz][my][mx];
-            v50 += x0*v5_brick_a0[mz][my][mx];
-            v01 += x0*v0_brick_a1[mz][my][mx];
-            v11 += x0*v1_brick_a1[mz][my][mx];
-            v21 += x0*v2_brick_a1[mz][my][mx];
-            v31 += x0*v3_brick_a1[mz][my][mx];
-            v41 += x0*v4_brick_a1[mz][my][mx];
-            v51 += x0*v5_brick_a1[mz][my][mx];
-            v02 += x0*v0_brick_a2[mz][my][mx];
-            v12 += x0*v1_brick_a2[mz][my][mx];
-            v22 += x0*v2_brick_a2[mz][my][mx];
-            v32 += x0*v3_brick_a2[mz][my][mx];
-            v42 += x0*v4_brick_a2[mz][my][mx];
-            v52 += x0*v5_brick_a2[mz][my][mx];
-            v03 += x0*v0_brick_a3[mz][my][mx];
-            v13 += x0*v1_brick_a3[mz][my][mx];
-            v23 += x0*v2_brick_a3[mz][my][mx];
-            v33 += x0*v3_brick_a3[mz][my][mx];
-            v43 += x0*v4_brick_a3[mz][my][mx];
-            v53 += x0*v5_brick_a3[mz][my][mx];
-            v04 += x0*v0_brick_a4[mz][my][mx];
-            v14 += x0*v1_brick_a4[mz][my][mx];
-            v24 += x0*v2_brick_a4[mz][my][mx];
-            v34 += x0*v3_brick_a4[mz][my][mx];
-            v44 += x0*v4_brick_a4[mz][my][mx];
-            v54 += x0*v5_brick_a4[mz][my][mx];
-            v05 += x0*v0_brick_a5[mz][my][mx];
-            v15 += x0*v1_brick_a5[mz][my][mx];
-            v25 += x0*v2_brick_a5[mz][my][mx];
-            v35 += x0*v3_brick_a5[mz][my][mx];
-            v45 += x0*v4_brick_a5[mz][my][mx];
-            v55 += x0*v5_brick_a5[mz][my][mx];
-            v06 += x0*v0_brick_a6[mz][my][mx];
-            v16 += x0*v1_brick_a6[mz][my][mx];
-            v26 += x0*v2_brick_a6[mz][my][mx];
-            v36 += x0*v3_brick_a6[mz][my][mx];
-            v46 += x0*v4_brick_a6[mz][my][mx];
-            v56 += x0*v5_brick_a6[mz][my][mx];
-          }
-	}
-      }
-    }
-    // convert D-field to force
-    type = atom->type[i];
-    lj0 = B[7*type+6]*0.5;
-    lj1 = B[7*type+5]*0.5;
-    lj2 = B[7*type+4]*0.5;
-    lj3 = B[7*type+3]*0.5;
-    lj4 = B[7*type+2]*0.5;
-    lj5 = B[7*type+1]*0.5;
-    lj6 = B[7*type]*0.5;
-
- 
-    if (eflag_atom) 
-      eatom[i] += u_pa0*lj0 + u_pa1*lj1 + u_pa2*lj2 + 
-        u_pa3*lj3 + u_pa4*lj4 + u_pa5*lj5 + u_pa6*lj6;
-    if (vflag_atom) {
-      vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 + 
-        v04*lj4 + v05*lj5 + v06*lj6;
-      vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 + 
-        v14*lj4 + v15*lj5 + v16*lj6;
-      vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 + 
-        v24*lj4 + v25*lj5 + v26*lj6;
-      vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 + 
-        v34*lj4 + v35*lj5 + v36*lj6;
-      vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 + 
-        v44*lj4 + v45*lj5 + v46*lj6;
-      vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 + 
-        v54*lj4 + v55*lj5 + v56*lj6;
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get dispersion field & force on my particles
-   for arithmetic mixing rule and ik scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_none_ik()
-{
-  int i,k,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-  FFT_SCALAR *ekx, *eky, *ekz;
-
-  ekx = new FFT_SCALAR[nsplit];
-  eky = new FFT_SCALAR[nsplit];
-  ekz = new FFT_SCALAR[nsplit];
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of dispersion field on particle
-
-  double **x = atom->x;
-  double **f = atom->f;
-  int type;
-  double lj;
-
-  int nlocal = atom->nlocal;
-
-  for (i = 0; i < nlocal; i++) {
-
-    nx = part2grid_6[i][0];
-    ny = part2grid_6[i][1];
-    nz = part2grid_6[i][2];
-    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
-    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
-    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
-    for (k = 0; k < nsplit; k++)
-      ekx[k] = eky[k] = ekz[k] = ZEROF;
-    for (n = nlower_6; n <= nupper_6; n++) {
-      mz = n+nz;
-      z0 = rho1d_6[2][n];
-      for (m = nlower_6; m <= nupper_6; m++) {
-	my = m+ny;
-	y0 = z0*rho1d_6[1][m];
-	for (l = nlower_6; l <= nupper_6; l++) {
-	  mx = l+nx;
-	  x0 = y0*rho1d_6[0][l];
-          for (k = 0; k < nsplit; k++) {
-	    ekx[k] -= x0*vdx_brick_none[k][mz][my][mx];
-	    eky[k] -= x0*vdy_brick_none[k][mz][my][mx];
-	    ekz[k] -= x0*vdz_brick_none[k][mz][my][mx];
-          }
-	}
-      }
-    }
-    // convert D-field to force
-    type = atom->type[i];
-    for (k = 0; k < nsplit; k++) {
-      lj = B[nsplit*type + k];
-      f[i][0] += lj*ekx[k];
-      f[i][1] +=lj*eky[k];
-      if (slabflag != 2) f[i][2] +=lj*ekz[k];
-    }
-  }
-
-  delete [] ekx;
-  delete [] eky;
-  delete [] ekz;
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get dispersion field & force on my particles
-   for arithmetic mixing rule for the ad scheme
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_none_ad()
-{
-  int i,k,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-  FFT_SCALAR *ekx, *eky, *ekz;
-
-  ekx = new FFT_SCALAR[nsplit];
-  eky = new FFT_SCALAR[nsplit];
-  ekz = new FFT_SCALAR[nsplit];
-
-
-  double s1,s2,s3;
-  double sf1,sf2,sf3;
-  double sf = 0.0;
-  double *prd;
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-
-  double hx_inv = nx_pppm_6/xprd;
-  double hy_inv = ny_pppm_6/yprd;
-  double hz_inv = nz_pppm_6/zprd_slab;
-
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of dispersion field on particle
-
-  double **x = atom->x;
-  double **f = atom->f;
-  int type;
-  double lj;
-
-  int nlocal = atom->nlocal;
-
-  for (i = 0; i < nlocal; i++) {
-
-    nx = part2grid_6[i][0];
-    ny = part2grid_6[i][1];
-    nz = part2grid_6[i][2];
-    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
-    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
-    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-
-    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
-    compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
-
-    for (k = 0; k < nsplit; k++)
-      ekx[k] = eky[k] = ekz[k] = ZEROF;
-
-    for (n = nlower_6; n <= nupper_6; n++) {
-      mz = n+nz;
-      for (m = nlower_6; m <= nupper_6; m++) {
-	my = m+ny;
-	for (l = nlower_6; l <= nupper_6; l++) {
-	  mx = l+nx;
-          x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n];
-          y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n];
-          z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n];
-          
-          for (k = 0; k < nsplit; k++) {
-            ekx[k] += x0*u_brick_none[k][mz][my][mx];
-            eky[k] += y0*u_brick_none[k][mz][my][mx];
-            ekz[k] += z0*u_brick_none[k][mz][my][mx];
-          }
-	}
-      }
-    }
-
-    for (k = 0; k < nsplit; k++) {
-      ekx[k] *= hx_inv;
-      eky[k] *= hy_inv;
-      ekz[k] *= hz_inv;
-    }
-
-    // convert D-field to force
-    type = atom->type[i];
-
-    s1 = x[i][0]*hx_inv;
-    s2 = x[i][1]*hy_inv;
-    s3 = x[i][2]*hz_inv;
-
-    sf1 = sf_coeff_6[0]*sin(2*MY_PI*s1);
-    sf1 += sf_coeff_6[1]*sin(4*MY_PI*s1);
-
-    sf2 = sf_coeff_6[2]*sin(2*MY_PI*s2);
-    sf2 += sf_coeff_6[3]*sin(4*MY_PI*s2);
-
-    sf3 = sf_coeff_6[4]*sin(2*MY_PI*s3);
-    sf3 += sf_coeff_6[5]*sin(4*MY_PI*s3);
-
-    for (k = 0; k < nsplit; k++) {
-      lj = B[nsplit*type + k];
-
-      sf = sf1*B[k]*2*lj*lj;
-      f[i][0] += lj*ekx[k] - sf;
-
-
-      sf = sf2*B[k]*2*lj*lj;
-      f[i][1] += lj*eky[k] - sf;
-
-      sf = sf3*B[k]*2*lj*lj;
-      if (slabflag != 2) f[i][2] += lj*ekz[k] - sf;
-    }
-  }
-
-  delete [] ekx;
-  delete [] eky;
-  delete [] ekz;
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get dispersion field & force on my particles
-   for arithmetic mixing rule for per atom quantities
-------------------------------------------------------------------------- */
-
-void PPPMDisp::fieldforce_none_peratom()
-{
-  int i,k,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-  FFT_SCALAR *u_pa,*v0,*v1,*v2,*v3,*v4,*v5;
-  
-  u_pa = new FFT_SCALAR[nsplit];
-  v0 = new FFT_SCALAR[nsplit];
-  v1 = new FFT_SCALAR[nsplit];
-  v2 = new FFT_SCALAR[nsplit];
-  v3 = new FFT_SCALAR[nsplit];
-  v4 = new FFT_SCALAR[nsplit];
-  v5 = new FFT_SCALAR[nsplit];
-
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of dispersion field on particle
-
-  double **x = atom->x;
-  int type;
-  double lj;
-
-  int nlocal = atom->nlocal;
-
-  for (i = 0; i < nlocal; i++) {
-
-    nx = part2grid_6[i][0];
-    ny = part2grid_6[i][1];
-    nz = part2grid_6[i][2];
-    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
-    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
-    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
-    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
-
-    for (k = 0; k < nsplit; k++) 
-      u_pa[k] = v0[k] = v1[k] = v2[k] = v3[k] = v4[k] = v5[k] = ZEROF;
- 
-    for (n = nlower_6; n <= nupper_6; n++) {
-      mz = n+nz;
-      z0 = rho1d_6[2][n];
-      for (m = nlower_6; m <= nupper_6; m++) {
-	my = m+ny;
-	y0 = z0*rho1d_6[1][m];
-	for (l = nlower_6; l <= nupper_6; l++) {
-	  mx = l+nx;
-	  x0 = y0*rho1d_6[0][l];
-          if (eflag_atom) {
-            for (k = 0; k < nsplit; k++)
-              u_pa[k] += x0*u_brick_none[k][mz][my][mx];
-	  }
-          if (vflag_atom) {
-            for (k = 0; k < nsplit; k++) {
-              v0[k] += x0*v0_brick_none[k][mz][my][mx];
-              v1[k] += x0*v1_brick_none[k][mz][my][mx];
-              v2[k] += x0*v2_brick_none[k][mz][my][mx];
-              v3[k] += x0*v3_brick_none[k][mz][my][mx];
-              v4[k] += x0*v4_brick_none[k][mz][my][mx];
-              v5[k] += x0*v5_brick_none[k][mz][my][mx];
-            }
-          }
-	}
-      }
-    }
-    // convert D-field to force
-    type = atom->type[i];
-    for (k = 0; k < nsplit; k++) {
-      lj = B[nsplit*type + k]*0.5;
- 
-      if (eflag_atom) {
-        eatom[i] += u_pa[k]*lj;
-      }
-      if (vflag_atom) {
-        vatom[i][0] += v0[k]*lj;
-        vatom[i][1] += v1[k]*lj;
-        vatom[i][2] += v2[k]*lj;
-        vatom[i][3] += v3[k]*lj;
-        vatom[i][4] += v4[k]*lj;
-        vatom[i][5] += v5[k]*lj;
-      }
-    }
-  }
-
-  delete [] u_pa;
-  delete [] v0;
-  delete [] v1;
-  delete [] v2;
-  delete [] v3;
-  delete [] v4;
-  delete [] v5;
-}
-
-/* ----------------------------------------------------------------------
-   pack values to buf to send to another proc
-------------------------------------------------------------------------- */
-
-void PPPMDisp::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
-  int n = 0;
-
-  switch (flag) {
-
-  // Coulomb interactions
-
-  case FORWARD_IK: {
-    FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++) {
-      buf[n++] = xsrc[list[i]];
-      buf[n++] = ysrc[list[i]];
-      buf[n++] = zsrc[list[i]];
-    }
-    break;
-  }
-
-  case FORWARD_AD: {
-    FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++)
-      buf[i] = src[list[i]];
-    break;
-  }
-
-  case FORWARD_IK_PERATOM: {
-    FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++) {
-      if (eflag_atom) buf[n++] = esrc[list[i]];
-      if (vflag_atom) {
-        buf[n++] = v0src[list[i]];
-        buf[n++] = v1src[list[i]];
-        buf[n++] = v2src[list[i]];
-        buf[n++] = v3src[list[i]];
-        buf[n++] = v4src[list[i]];
-        buf[n++] = v5src[list[i]];
-      }
-    }
-    break;
-  }
-
-  case FORWARD_AD_PERATOM: {
-    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++) {
-      buf[n++] = v0src[list[i]];
-      buf[n++] = v1src[list[i]];
-      buf[n++] = v2src[list[i]];
-      buf[n++] = v3src[list[i]];
-      buf[n++] = v4src[list[i]];
-      buf[n++] = v5src[list[i]];
-    }
-    break;
-  }
-
-  // Dispersion interactions, geometric mixing
-
-  case FORWARD_IK_G: {
-    FFT_SCALAR *xsrc = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *ysrc = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *zsrc = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    for (int i = 0; i < nlist; i++) {
-      buf[n++] = xsrc[list[i]];
-      buf[n++] = ysrc[list[i]];
-      buf[n++] = zsrc[list[i]];
-    }
-    break;
-  }
-
-  case FORWARD_AD_G: {
-    FFT_SCALAR *src = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    for (int i = 0; i < nlist; i++)
-      buf[i] = src[list[i]];
-    break;
-  }
-
-  case FORWARD_IK_PERATOM_G: {
-    FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    for (int i = 0; i < nlist; i++) {
-      if (eflag_atom) buf[n++] = esrc[list[i]];
-      if (vflag_atom) {
-        buf[n++] = v0src[list[i]];
-        buf[n++] = v1src[list[i]];
-        buf[n++] = v2src[list[i]];
-        buf[n++] = v3src[list[i]];
-        buf[n++] = v4src[list[i]];
-        buf[n++] = v5src[list[i]];
-      }
-    }
-    break;
-  }
-
-  case FORWARD_AD_PERATOM_G: {
-    FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    for (int i = 0; i < nlist; i++) {
-      buf[n++] = v0src[list[i]];
-      buf[n++] = v1src[list[i]];
-      buf[n++] = v2src[list[i]];
-      buf[n++] = v3src[list[i]];
-      buf[n++] = v4src[list[i]];
-      buf[n++] = v5src[list[i]];
-    }
-    break;
-  }
-
-  // Dispersion interactions, arithmetic mixing
-
-  case FORWARD_IK_A: {
-    FFT_SCALAR *xsrc0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *ysrc0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *zsrc0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *xsrc1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *ysrc1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *zsrc1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *xsrc2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *ysrc2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *zsrc2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *xsrc3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *ysrc3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *zsrc3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *xsrc4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *ysrc4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *zsrc4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *xsrc5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *ysrc5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *zsrc5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *xsrc6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *ysrc6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *zsrc6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    for (int i = 0; i < nlist; i++) {
-      buf[n++] = xsrc0[list[i]];
-      buf[n++] = ysrc0[list[i]];
-      buf[n++] = zsrc0[list[i]];
-
-      buf[n++] = xsrc1[list[i]];
-      buf[n++] = ysrc1[list[i]];
-      buf[n++] = zsrc1[list[i]];
-
-      buf[n++] = xsrc2[list[i]];
-      buf[n++] = ysrc2[list[i]];
-      buf[n++] = zsrc2[list[i]];
-
-      buf[n++] = xsrc3[list[i]];
-      buf[n++] = ysrc3[list[i]];
-      buf[n++] = zsrc3[list[i]];
-
-      buf[n++] = xsrc4[list[i]];
-      buf[n++] = ysrc4[list[i]];
-      buf[n++] = zsrc4[list[i]];
-
-      buf[n++] = xsrc5[list[i]];
-      buf[n++] = ysrc5[list[i]];
-      buf[n++] = zsrc5[list[i]];
-
-      buf[n++] = xsrc6[list[i]];
-      buf[n++] = ysrc6[list[i]];
-      buf[n++] = zsrc6[list[i]];
-    }
-    break;
-  }
-
-  case FORWARD_AD_A: {
-    FFT_SCALAR *src0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *src1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *src2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *src3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *src4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *src5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *src6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    for (int i = 0; i < nlist; i++) {
-      buf[n++] = src0[list[i]];
-      buf[n++] = src1[list[i]];
-      buf[n++] = src2[list[i]];
-      buf[n++] = src3[list[i]];
-      buf[n++] = src4[list[i]];
-      buf[n++] = src5[list[i]];
-      buf[n++] = src6[list[i]];
-    }
-    break;
-  }
-
-  case FORWARD_IK_PERATOM_A: {
-    FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    for (int i = 0; i < nlist; i++) {
-      if (eflag_atom) {
-        buf[n++] = esrc0[list[i]];
-        buf[n++] = esrc1[list[i]];
-        buf[n++] = esrc2[list[i]];
-        buf[n++] = esrc3[list[i]];
-        buf[n++] = esrc4[list[i]];
-        buf[n++] = esrc5[list[i]];
-        buf[n++] = esrc6[list[i]];
-      }
-      if (vflag_atom) {
-        buf[n++] = v0src0[list[i]];
-        buf[n++] = v1src0[list[i]];
-        buf[n++] = v2src0[list[i]];
-        buf[n++] = v3src0[list[i]];
-        buf[n++] = v4src0[list[i]];
-        buf[n++] = v5src0[list[i]];
-
-        buf[n++] = v0src1[list[i]];
-        buf[n++] = v1src1[list[i]];
-        buf[n++] = v2src1[list[i]];
-        buf[n++] = v3src1[list[i]];
-        buf[n++] = v4src1[list[i]];
-        buf[n++] = v5src1[list[i]];
-
-        buf[n++] = v0src2[list[i]];
-        buf[n++] = v1src2[list[i]];
-        buf[n++] = v2src2[list[i]];
-        buf[n++] = v3src2[list[i]];
-        buf[n++] = v4src2[list[i]];
-        buf[n++] = v5src2[list[i]];
-
-        buf[n++] = v0src3[list[i]];
-        buf[n++] = v1src3[list[i]];
-        buf[n++] = v2src3[list[i]];
-        buf[n++] = v3src3[list[i]];
-        buf[n++] = v4src3[list[i]];
-        buf[n++] = v5src3[list[i]];
-
-        buf[n++] = v0src4[list[i]];
-        buf[n++] = v1src4[list[i]];
-        buf[n++] = v2src4[list[i]];
-        buf[n++] = v3src4[list[i]];
-        buf[n++] = v4src4[list[i]];
-        buf[n++] = v5src4[list[i]];
-
-        buf[n++] = v0src5[list[i]];
-        buf[n++] = v1src5[list[i]];
-        buf[n++] = v2src5[list[i]];
-        buf[n++] = v3src5[list[i]];
-        buf[n++] = v4src5[list[i]];
-        buf[n++] = v5src5[list[i]];
-
-        buf[n++] = v0src6[list[i]];
-        buf[n++] = v1src6[list[i]];
-        buf[n++] = v2src6[list[i]];
-        buf[n++] = v3src6[list[i]];
-        buf[n++] = v4src6[list[i]];
-        buf[n++] = v5src6[list[i]];
-      }
-    }
-    break;
-  }
-
-  case FORWARD_AD_PERATOM_A: {
-    FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    for (int i = 0; i < nlist; i++) {
-      buf[n++] = v0src0[list[i]];
-      buf[n++] = v1src0[list[i]];
-      buf[n++] = v2src0[list[i]];
-      buf[n++] = v3src0[list[i]];
-      buf[n++] = v4src0[list[i]];
-      buf[n++] = v5src0[list[i]];
-
-      buf[n++] = v0src1[list[i]];
-      buf[n++] = v1src1[list[i]];
-      buf[n++] = v2src1[list[i]];
-      buf[n++] = v3src1[list[i]];
-      buf[n++] = v4src1[list[i]];
-      buf[n++] = v5src1[list[i]];
-
-      buf[n++] = v0src2[list[i]];
-      buf[n++] = v1src2[list[i]];
-      buf[n++] = v2src2[list[i]];
-      buf[n++] = v3src2[list[i]];
-      buf[n++] = v4src2[list[i]];
-      buf[n++] = v5src2[list[i]];
-
-      buf[n++] = v0src3[list[i]];
-      buf[n++] = v1src3[list[i]];
-      buf[n++] = v2src3[list[i]];
-      buf[n++] = v3src3[list[i]];
-      buf[n++] = v4src3[list[i]];
-      buf[n++] = v5src3[list[i]];
-
-      buf[n++] = v0src4[list[i]];
-      buf[n++] = v1src4[list[i]];
-      buf[n++] = v2src4[list[i]];
-      buf[n++] = v3src4[list[i]];
-      buf[n++] = v4src4[list[i]];
-      buf[n++] = v5src4[list[i]];
-
-      buf[n++] = v0src5[list[i]];
-      buf[n++] = v1src5[list[i]];
-      buf[n++] = v2src5[list[i]];
-      buf[n++] = v3src5[list[i]];
-      buf[n++] = v4src5[list[i]];
-      buf[n++] = v5src5[list[i]];
-
-      buf[n++] = v0src6[list[i]];
-      buf[n++] = v1src6[list[i]];
-      buf[n++] = v2src6[list[i]];
-      buf[n++] = v3src6[list[i]];
-      buf[n++] = v4src6[list[i]];
-      buf[n++] = v5src6[list[i]];
-    }
-    break;
-  }
-
-  // Dispersion interactions, no mixing
-
-  case FORWARD_IK_NONE: {
-    for (int k = 0; k < nsplit_alloc; k++) {
-      FFT_SCALAR *xsrc = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *ysrc = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *zsrc = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      for (int i = 0; i < nlist; i++) {
-        buf[n++] = xsrc[list[i]];
-        buf[n++] = ysrc[list[i]];
-        buf[n++] = zsrc[list[i]];
-      }
-    }
-    break;
-  }
-
-  case FORWARD_AD_NONE: {
-    for (int k = 0; k < nsplit_alloc; k++) {
-      FFT_SCALAR *src = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      for (int i = 0; i < nlist; i++)
-        buf[n++] = src[list[i]];
-    }
-    break;
-  }
-
-  case FORWARD_IK_PERATOM_NONE: {
-    for (int k = 0; k < nsplit_alloc; k++) {
-      FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      for (int i = 0; i < nlist; i++) {
-        if (eflag_atom) buf[n++] = esrc[list[i]];
-        if (vflag_atom) {
-          buf[n++] = v0src[list[i]];
-          buf[n++] = v1src[list[i]];
-          buf[n++] = v2src[list[i]];
-          buf[n++] = v3src[list[i]];
-          buf[n++] = v4src[list[i]];
-          buf[n++] = v5src[list[i]];
-        }
-      }
-    }
-    break;
-  }
-
-  case FORWARD_AD_PERATOM_NONE: {
-    for (int k = 0; k < nsplit_alloc; k++) {
-      FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      for (int i = 0; i < nlist; i++) {
-        buf[n++] = v0src[list[i]];
-        buf[n++] = v1src[list[i]];
-        buf[n++] = v2src[list[i]];
-        buf[n++] = v3src[list[i]];
-        buf[n++] = v4src[list[i]];
-        buf[n++] = v5src[list[i]];
-      }
-    }
-    break;
-  }
-
-  }
-}
-
-/* ----------------------------------------------------------------------
-   unpack another proc's own values from buf and set own ghost values
-------------------------------------------------------------------------- */
-
-void PPPMDisp::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
-  int n = 0;
-
-  switch (flag) {
-
-  // Coulomb interactions
-
-  case FORWARD_IK: {
-    FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++) {
-      xdest[list[i]] = buf[n++];
-      ydest[list[i]] = buf[n++];
-      zdest[list[i]] = buf[n++];
-    }
-    break;
-  }
-
-  case FORWARD_AD: {
-    FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++)
-      dest[list[i]] = buf[n++];
-    break;
-  }
-
-  case FORWARD_IK_PERATOM: {
-    FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++) {
-      if (eflag_atom) esrc[list[i]] = buf[n++];
-      if (vflag_atom) {
-        v0src[list[i]] = buf[n++];
-        v1src[list[i]] = buf[n++];
-        v2src[list[i]] = buf[n++];
-        v3src[list[i]] = buf[n++];
-        v4src[list[i]] = buf[n++];
-        v5src[list[i]] = buf[n++];
-      }
-    }
-    break;
-  }
-
-  case FORWARD_AD_PERATOM: {
-    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
-    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++) {
-      v0src[list[i]] = buf[n++];
-      v1src[list[i]] = buf[n++];
-      v2src[list[i]] = buf[n++];
-      v3src[list[i]] = buf[n++];
-      v4src[list[i]] = buf[n++];
-      v5src[list[i]] = buf[n++];
-    }
-    break;
-  }
-
-  // Disperion interactions, geometric mixing
-
-  case FORWARD_IK_G: {
-    FFT_SCALAR *xdest = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *ydest = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *zdest = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    for (int i = 0; i < nlist; i++) {
-      xdest[list[i]] = buf[n++];
-      ydest[list[i]] = buf[n++];
-      zdest[list[i]] = buf[n++];
-    }
-    break;
-  }
-
-  case FORWARD_AD_G: {
-    FFT_SCALAR *dest = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    for (int i = 0; i < nlist; i++)
-      dest[list[i]] = buf[n++];
-    break;
-  }
-
-  case FORWARD_IK_PERATOM_G: {
-    FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    for (int i = 0; i < nlist; i++) {
-      if (eflag_atom) esrc[list[i]] = buf[n++];
-      if (vflag_atom) {
-        v0src[list[i]] = buf[n++];
-        v1src[list[i]] = buf[n++];
-        v2src[list[i]] = buf[n++];
-        v3src[list[i]] = buf[n++];
-        v4src[list[i]] = buf[n++];
-        v5src[list[i]] = buf[n++];
-      }
-    }
-    break;
-  }
-
-  case FORWARD_AD_PERATOM_G: {
-    FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    for (int i = 0; i < nlist; i++) {
-      v0src[list[i]] = buf[n++];
-      v1src[list[i]] = buf[n++];
-      v2src[list[i]] = buf[n++];
-      v3src[list[i]] = buf[n++];
-      v4src[list[i]] = buf[n++];
-      v5src[list[i]] = buf[n++];
-    }
-    break;
-  }
-
-  // Disperion interactions, arithmetic mixing
-
-  case FORWARD_IK_A: {
-    FFT_SCALAR *xdest0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *ydest0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *zdest0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *xdest1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *ydest1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *zdest1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *xdest2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *ydest2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *zdest2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *xdest3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *ydest3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *zdest3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *xdest4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *ydest4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *zdest4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *xdest5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *ydest5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *zdest5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *xdest6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *ydest6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *zdest6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    for (int i = 0; i < nlist; i++) {
-      xdest0[list[i]] = buf[n++];
-      ydest0[list[i]] = buf[n++];
-      zdest0[list[i]] = buf[n++];
-
-      xdest1[list[i]] = buf[n++];
-      ydest1[list[i]] = buf[n++];
-      zdest1[list[i]] = buf[n++];
-
-      xdest2[list[i]] = buf[n++];
-      ydest2[list[i]] = buf[n++];
-      zdest2[list[i]] = buf[n++];
-
-      xdest3[list[i]] = buf[n++];
-      ydest3[list[i]] = buf[n++];
-      zdest3[list[i]] = buf[n++];
-
-      xdest4[list[i]] = buf[n++];
-      ydest4[list[i]] = buf[n++];
-      zdest4[list[i]] = buf[n++];
-
-      xdest5[list[i]] = buf[n++];
-      ydest5[list[i]] = buf[n++];
-      zdest5[list[i]] = buf[n++];
-
-      xdest6[list[i]] = buf[n++];
-      ydest6[list[i]] = buf[n++];
-      zdest6[list[i]] = buf[n++];
-    }
-    break;
-  }
-
-  case FORWARD_AD_A: {
-    FFT_SCALAR *dest0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *dest1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *dest2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *dest3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *dest4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *dest5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *dest6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    for (int i = 0; i < nlist; i++) {
-      dest0[list[i]] = buf[n++];
-      dest1[list[i]] = buf[n++];
-      dest2[list[i]] = buf[n++];
-      dest3[list[i]] = buf[n++];
-      dest4[list[i]] = buf[n++];
-      dest5[list[i]] = buf[n++];
-      dest6[list[i]] = buf[n++];
-    }
-    break;
-  }
-
-  case FORWARD_IK_PERATOM_A: {
-    FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    for (int i = 0; i < nlist; i++) {
-      if (eflag_atom) {
-        esrc0[list[i]] = buf[n++];
-        esrc1[list[i]] = buf[n++];
-        esrc2[list[i]] = buf[n++];
-        esrc3[list[i]] = buf[n++];
-        esrc4[list[i]] = buf[n++];
-        esrc5[list[i]] = buf[n++];
-        esrc6[list[i]] = buf[n++];
-      }
-      if (vflag_atom) {
-        v0src0[list[i]] = buf[n++];
-        v1src0[list[i]] = buf[n++];
-        v2src0[list[i]] = buf[n++];
-        v3src0[list[i]] = buf[n++];
-        v4src0[list[i]] = buf[n++];
-        v5src0[list[i]] = buf[n++];
-
-        v0src1[list[i]] = buf[n++];
-        v1src1[list[i]] = buf[n++];
-        v2src1[list[i]] = buf[n++];
-        v3src1[list[i]] = buf[n++];
-        v4src1[list[i]] = buf[n++];
-        v5src1[list[i]] = buf[n++];
-
-        v0src2[list[i]] = buf[n++];
-        v1src2[list[i]] = buf[n++];
-        v2src2[list[i]] = buf[n++];
-        v3src2[list[i]] = buf[n++];
-        v4src2[list[i]] = buf[n++];
-        v5src2[list[i]] = buf[n++];
-
-        v0src3[list[i]] = buf[n++];
-        v1src3[list[i]] = buf[n++];
-        v2src3[list[i]] = buf[n++];
-        v3src3[list[i]] = buf[n++];
-        v4src3[list[i]] = buf[n++];
-        v5src3[list[i]] = buf[n++];
-
-        v0src4[list[i]] = buf[n++];
-        v1src4[list[i]] = buf[n++];
-        v2src4[list[i]] = buf[n++];
-        v3src4[list[i]] = buf[n++];
-        v4src4[list[i]] = buf[n++];
-        v5src4[list[i]] = buf[n++];
-
-        v0src5[list[i]] = buf[n++];
-        v1src5[list[i]] = buf[n++];
-        v2src5[list[i]] = buf[n++];
-        v3src5[list[i]] = buf[n++];
-        v4src5[list[i]] = buf[n++];
-        v5src5[list[i]] = buf[n++];
-
-        v0src6[list[i]] = buf[n++];
-        v1src6[list[i]] = buf[n++];
-        v2src6[list[i]] = buf[n++];
-        v3src6[list[i]] = buf[n++];
-        v4src6[list[i]] = buf[n++];
-        v5src6[list[i]] = buf[n++];
-      }
-    }
-    break;
-  }
-
-  case FORWARD_AD_PERATOM_A: {
-    FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-
-    for (int i = 0; i < nlist; i++) {
-      v0src0[list[i]] = buf[n++];
-      v1src0[list[i]] = buf[n++];
-      v2src0[list[i]] = buf[n++];
-      v3src0[list[i]] = buf[n++];
-      v4src0[list[i]] = buf[n++];
-      v5src0[list[i]] = buf[n++];
-
-      v0src1[list[i]] = buf[n++];
-      v1src1[list[i]] = buf[n++];
-      v2src1[list[i]] = buf[n++];
-      v3src1[list[i]] = buf[n++];
-      v4src1[list[i]] = buf[n++];
-      v5src1[list[i]] = buf[n++];
-
-      v0src2[list[i]] = buf[n++];
-      v1src2[list[i]] = buf[n++];
-      v2src2[list[i]] = buf[n++];
-      v3src2[list[i]] = buf[n++];
-      v4src2[list[i]] = buf[n++];
-      v5src2[list[i]] = buf[n++];
-
-      v0src3[list[i]] = buf[n++];
-      v1src3[list[i]] = buf[n++];
-      v2src3[list[i]] = buf[n++];
-      v3src3[list[i]] = buf[n++];
-      v4src3[list[i]] = buf[n++];
-      v5src3[list[i]] = buf[n++];
-
-      v0src4[list[i]] = buf[n++];
-      v1src4[list[i]] = buf[n++];
-      v2src4[list[i]] = buf[n++];
-      v3src4[list[i]] = buf[n++];
-      v4src4[list[i]] = buf[n++];
-      v5src4[list[i]] = buf[n++];
-
-      v0src5[list[i]] = buf[n++];
-      v1src5[list[i]] = buf[n++];
-      v2src5[list[i]] = buf[n++];
-      v3src5[list[i]] = buf[n++];
-      v4src5[list[i]] = buf[n++];
-      v5src5[list[i]] = buf[n++];
-
-      v0src6[list[i]] = buf[n++];
-      v1src6[list[i]] = buf[n++];
-      v2src6[list[i]] = buf[n++];
-      v3src6[list[i]] = buf[n++];
-      v4src6[list[i]] = buf[n++];
-      v5src6[list[i]] = buf[n++];
-    }
-    break;
-  }
-
-  // Disperion interactions, geometric mixing
-
-  case FORWARD_IK_NONE: {
-    for (int k = 0; k < nsplit_alloc; k++) {
-      FFT_SCALAR *xdest = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *ydest = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *zdest = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      for (int i = 0; i < nlist; i++) {
-        xdest[list[i]] = buf[n++];
-        ydest[list[i]] = buf[n++];
-        zdest[list[i]] = buf[n++];
-      }
-    }
-    break;
-  }
-
-  case FORWARD_AD_NONE: {
-    for (int k = 0; k < nsplit_alloc; k++) {
-      FFT_SCALAR *dest = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      for (int i = 0; i < nlist; i++)
-        dest[list[i]] = buf[n++];
-    }
-    break;
-  }
-
-  case FORWARD_IK_PERATOM_NONE: {
-    for (int k = 0; k < nsplit_alloc; k++) {
-      FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      for (int i = 0; i < nlist; i++) {
-        if (eflag_atom) esrc[list[i]] = buf[n++];
-        if (vflag_atom) {
-          v0src[list[i]] = buf[n++];
-          v1src[list[i]] = buf[n++];
-          v2src[list[i]] = buf[n++];
-          v3src[list[i]] = buf[n++];
-          v4src[list[i]] = buf[n++];
-          v5src[list[i]] = buf[n++];
-        }
-      }
-    }
-    break;
-  }
-
-  case FORWARD_AD_PERATOM_NONE: {
-    for (int k = 0; k < nsplit_alloc; k++) {
-      FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      for (int i = 0; i < nlist; i++) {
-        v0src[list[i]] = buf[n++];
-        v1src[list[i]] = buf[n++];
-        v2src[list[i]] = buf[n++];
-        v3src[list[i]] = buf[n++];
-        v4src[list[i]] = buf[n++];
-        v5src[list[i]] = buf[n++];
-      }
-    }
-    break;
-  }
-
-  }
-}
-
-/* ----------------------------------------------------------------------
-   pack ghost values into buf to send to another proc
-------------------------------------------------------------------------- */
-
-void PPPMDisp::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
-  int n = 0;
-
-  //Coulomb interactions
-
-  if (flag == REVERSE_RHO) {
-    FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++)
-      buf[i] = src[list[i]];
-
-  //Dispersion interactions, geometric mixing
-
-  } else if (flag == REVERSE_RHO_G) {
-    FFT_SCALAR *src = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    for (int i = 0; i < nlist; i++)
-      buf[i] = src[list[i]];
-
-  //Dispersion interactions, arithmetic mixing
-
-  } else if (flag == REVERSE_RHO_A) {
-    FFT_SCALAR *src0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *src1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *src2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *src3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *src4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *src5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *src6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    for (int i = 0; i < nlist; i++) {
-      buf[n++] = src0[list[i]];
-      buf[n++] = src1[list[i]];
-      buf[n++] = src2[list[i]];
-      buf[n++] = src3[list[i]];
-      buf[n++] = src4[list[i]];
-      buf[n++] = src5[list[i]];
-      buf[n++] = src6[list[i]];
-    }
-
-  //Dispersion interactions, no mixing
-
-  } else if (flag == REVERSE_RHO_NONE) {
-    for (int k = 0; k < nsplit_alloc; k++) {
-      FFT_SCALAR *src = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      for (int i = 0; i < nlist; i++) {
-        buf[n++] = src[list[i]];
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   unpack another proc's ghost values from buf and add to own values
-------------------------------------------------------------------------- */
-
-void PPPMDisp::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
-{
-  int n = 0;
-
-  //Coulomb interactions
-
-  if (flag == REVERSE_RHO) {
-    FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
-    for (int i = 0; i < nlist; i++)
-      dest[list[i]] += buf[i];
-
-  //Dispersion interactions, geometric mixing
-
-  } else if (flag == REVERSE_RHO_G) {
-    FFT_SCALAR *dest = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    for (int i = 0; i < nlist; i++)
-      dest[list[i]] += buf[i];
-
-  //Dispersion interactions, arithmetic mixing
-
-  } else if (flag == REVERSE_RHO_A) {
-    FFT_SCALAR *dest0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *dest1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *dest2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *dest3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *dest4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *dest5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    FFT_SCALAR *dest6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
-    for (int i = 0; i < nlist; i++) {
-      dest0[list[i]] += buf[n++];
-      dest1[list[i]] += buf[n++];
-      dest2[list[i]] += buf[n++];
-      dest3[list[i]] += buf[n++];
-      dest4[list[i]] += buf[n++];
-      dest5[list[i]] += buf[n++];
-      dest6[list[i]] += buf[n++];
-    }
-
-  //Dispersion interactions, no mixing
-
-  } else if (flag == REVERSE_RHO_NONE) {
-    for (int k = 0; k < nsplit_alloc; k++) {
-      FFT_SCALAR *dest = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
-      for (int i = 0; i < nlist; i++)
-        dest[list[i]] += buf[n++];
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   map nprocs to NX by NY grid as PX by PY procs - return optimal px,py 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
-{
-  // loop thru all possible factorizations of nprocs
-  // surf = surface area of largest proc sub-domain
-  // innermost if test minimizes surface area and surface/volume ratio
-
-  int bestsurf = 2 * (nx + ny);
-  int bestboxx = 0;
-  int bestboxy = 0;
-
-  int boxx,boxy,surf,ipx,ipy;
-
-  ipx = 1;
-  while (ipx <= nprocs) {
-    if (nprocs % ipx == 0) {
-      ipy = nprocs/ipx;
-      boxx = nx/ipx;
-      if (nx % ipx) boxx++;
-      boxy = ny/ipy;
-      if (ny % ipy) boxy++;
-      surf = boxx + boxy;
-      if (surf < bestsurf || 
-	  (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
-	bestsurf = surf;
-	bestboxx = boxx;
-	bestboxy = boxy;
-	*px = ipx;
-	*py = ipy;
-      }
-    }
-    ipx++;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   charge assignment into rho1d
-   dx,dy,dz = distance of particle from "lower left" grid point 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
-			      const FFT_SCALAR &dz, int ord, 
-                             FFT_SCALAR **rho_c, FFT_SCALAR **r1d)
-{
-  int k,l;
-  FFT_SCALAR r1,r2,r3;
-
-  for (k = (1-ord)/2; k <= ord/2; k++) {
-    r1 = r2 = r3 = ZEROF;
-
-    for (l = ord-1; l >= 0; l--) {
-      r1 = rho_c[l][k] + r1*dx;
-      r2 = rho_c[l][k] + r2*dy;
-      r3 = rho_c[l][k] + r3*dz;
-    }
-    r1d[0][k] = r1;
-    r1d[1][k] = r2;
-    r1d[2][k] = r3;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   charge assignment into drho1d
-   dx,dy,dz = distance of particle from "lower left" grid point
-------------------------------------------------------------------------- */
-
-void PPPMDisp::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
-                          const FFT_SCALAR &dz, int ord, 
-                              FFT_SCALAR **drho_c, FFT_SCALAR **dr1d)
-{
-  int k,l;
-  FFT_SCALAR r1,r2,r3;
-
-  for (k = (1-ord)/2; k <= ord/2; k++) {
-    r1 = r2 = r3 = ZEROF;
-
-    for (l = ord-2; l >= 0; l--) {
-      r1 = drho_c[l][k] + r1*dx;
-      r2 = drho_c[l][k] + r2*dy;
-      r3 = drho_c[l][k] + r3*dz;
-    }
-    dr1d[0][k] = r1;
-    dr1d[1][k] = r2;
-    dr1d[2][k] = r3;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   generate coeffients for the weight function of order n
-
-              (n-1)
-  Wn(x) =     Sum    wn(k,x) , Sum is over every other integer
-           k=-(n-1)
-  For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
-      k is odd integers if n is even and even integers if n is odd
-              ---
-             | n-1
-             | Sum a(l,j)*(x-k/2)**l   if abs(x-k/2) < 1/2
-  wn(k,x) = <  l=0
-             |
-             |  0                       otherwise
-              ---
-  a coeffients are packed into the array rho_coeff to eliminate zeros
-  rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) 
-------------------------------------------------------------------------- */
-
-void PPPMDisp::compute_rho_coeff(FFT_SCALAR **coeff , FFT_SCALAR **dcoeff, 
-                                 int ord)
-{
-  int j,k,l,m;
-  FFT_SCALAR s;
-
-  FFT_SCALAR **a;
-  memory->create2d_offset(a,ord,-ord,ord,"pppm/disp:a");
-
-  for (k = -ord; k <= ord; k++) 
-    for (l = 0; l < ord; l++)
-      a[l][k] = 0.0;
-        
-  a[0][0] = 1.0;
-  for (j = 1; j < ord; j++) {
-    for (k = -j; k <= j; k += 2) {
-      s = 0.0;
-      for (l = 0; l < j; l++) {
-	a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
-#ifdef FFT_SINGLE
-	s += powf(0.5,(float) l+1) *
-	  (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
-#else
-	s += pow(0.5,(double) l+1) * 
-	  (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
-#endif
-      }
-      a[0][k] = s;
-    }
-  }
-
-  m = (1-ord)/2;
-  for (k = -(ord-1); k < ord; k += 2) {
-    for (l = 0; l < ord; l++)
-      coeff[l][m] = a[l][k];
-    for (l = 1; l < ord; l++)
-      dcoeff[l-1][m] = l*a[l][k];
-    m++;
-  }
-
-  memory->destroy2d_offset(a,-ord);
-}
-
-/* ----------------------------------------------------------------------
-   Slab-geometry correction term to dampen inter-slab interactions between
-   periodically repeating slabs.  Yields good approximation to 2D Ewald if
-   adequate empty space is left between repeating slabs (J. Chem. Phys.
-   111, 3155).  Slabs defined here to be parallel to the xy plane. Also
-   extended to non-neutral systems (J. Chem. Phys. 131, 094107).
-------------------------------------------------------------------------- */
-
-void PPPMDisp::slabcorr(int eflag)
-{
-  // compute local contribution to global dipole moment
-
-  double *q = atom->q;
-  double **x = atom->x;
-  double zprd = domain->zprd;
-  int nlocal = atom->nlocal;
-
-  double dipole = 0.0;
-  for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
-
-  // sum local contributions to get global dipole moment
-
-  double dipole_all;
-  MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
-
-  // need to make non-neutral systems and/or
-  //  per-atom energy translationally invariant
-
-  double dipole_r2 = 0.0;
-  if (eflag_atom || fabs(qsum) > SMALL) {
-    for (int i = 0; i < nlocal; i++)
-      dipole_r2 += q[i]*x[i][2]*x[i][2];
-
-    // sum local contributions
-
-    double tmp;
-    MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-    dipole_r2 = tmp;
-  }
-
-  // compute corrections
-
-  const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
-    qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
-  const double qscale = force->qqrd2e * scale;
-
-  if (eflag_global) energy_1 += qscale * e_slabcorr;
-
-  // per-atom energy
-
-  if (eflag_atom) {
-    double efact = qscale * MY_2PI/volume;
-    for (int i = 0; i < nlocal; i++)
-      eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
-        qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
-  }
-
-  // add on force corrections
-
-  double ffact = qscale * (-4.0*MY_PI/volume);
-  double **f = atom->f;
-
-  for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
-}
-
-/* ----------------------------------------------------------------------
-   perform and time the 1d FFTs required for N timesteps
-------------------------------------------------------------------------- */
-
-int PPPMDisp::timing_1d(int n, double &time1d)
-{
-  double time1,time2;
-  int mixing = 1;
-  if (function[2]) mixing = 4;
-  if (function[3]) mixing = nsplit_alloc/2;
-
-  if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
-  if (function[1] + function[2] + function[3])
-    for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF;
-
-  MPI_Barrier(world);
-  time1 = MPI_Wtime();
-
-  if (function[0]) {
-    for (int i = 0; i < n; i++) {
-      fft1->timing1d(work1,nfft_both,1);
-      fft2->timing1d(work1,nfft_both,-1);
-      if (differentiation_flag != 1){
-        fft2->timing1d(work1,nfft_both,-1);
-        fft2->timing1d(work1,nfft_both,-1);
-      }
-    }
-  }
-
-  MPI_Barrier(world);
-  time2 = MPI_Wtime();
-  time1d = time2 - time1;
-
-  MPI_Barrier(world);
-  time1 = MPI_Wtime();
-
-  if (function[1] + function[2] + function[3]) {
-    for (int i = 0; i < n; i++) {
-      fft1_6->timing1d(work1_6,nfft_both_6,1);
-      fft2_6->timing1d(work1_6,nfft_both_6,-1);
-      if (differentiation_flag != 1){
-        fft2_6->timing1d(work1_6,nfft_both_6,-1);
-        fft2_6->timing1d(work1_6,nfft_both_6,-1);
-      }
-    }
-  }
-
-  MPI_Barrier(world);
-  time2 = MPI_Wtime();
-  time1d += (time2 - time1)*mixing;
-
-  if (differentiation_flag) return 2;
-  return 4;
-}
-
-/* ----------------------------------------------------------------------
-   perform and time the 3d FFTs required for N timesteps
-------------------------------------------------------------------------- */
-
-int PPPMDisp::timing_3d(int n, double &time3d)
-{
-  double time1,time2;
-  int mixing = 1;
-  if (function[2]) mixing = 4;
-  if (function[3]) mixing = nsplit_alloc/2;
-
-  if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
-  if (function[1] + function[2] + function[3]) 
-    for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF;
-
-
-
-  MPI_Barrier(world);
-  time1 = MPI_Wtime();
-
-  if (function[0]) {
-    for (int i = 0; i < n; i++) {
-      fft1->compute(work1,work1,1);
-      fft2->compute(work1,work1,-1);
-      if (differentiation_flag != 1) {
-        fft2->compute(work1,work1,-1);
-        fft2->compute(work1,work1,-1);
-      }
-    }
-  }
-
-  MPI_Barrier(world);
-  time2 = MPI_Wtime();
-  time3d = time2 - time1;
-
-  MPI_Barrier(world);
-  time1 = MPI_Wtime();
-  
-  if (function[1] + function[2] + function[3]) {
-    for (int i = 0; i < n; i++) {
-      fft1_6->compute(work1_6,work1_6,1);
-      fft2_6->compute(work1_6,work1_6,-1);
-      if (differentiation_flag != 1) {
-        fft2_6->compute(work1_6,work1_6,-1);
-        fft2_6->compute(work1_6,work1_6,-1);
-      }
-    }
-  }
-
-  MPI_Barrier(world);
-  time2 = MPI_Wtime();
-  time3d += (time2 - time1) * mixing;
-
-  if (differentiation_flag) return 2;
-  return 4;
-}
-
-/* ----------------------------------------------------------------------
-   memory usage of local arrays 
-------------------------------------------------------------------------- */
-
-double PPPMDisp::memory_usage()
-{
-  double bytes = nmax*3 * sizeof(double);
-  int mixing = 1;
-  int diff = 3;     //depends on differentiation
-  int per = 7;      //depends on per atom calculations
-  if (differentiation_flag) {
-    diff = 1;
-    per = 6;
-  }
-  if (!evflag_atom) per = 0;
-  if (function[2]) mixing = 7;
-  if (function[3]) mixing = nsplit_alloc;
-
-  if (function[0]) {
-    int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * 
-      (nzhi_out-nzlo_out+1);
-    bytes += (1 + diff +  per) * nbrick * sizeof(FFT_SCALAR);     //brick memory
-    bytes += 6 * nfft_both * sizeof(double);      // vg
-    bytes += nfft_both * sizeof(double);          // greensfn
-    bytes += nfft_both * 3 * sizeof(FFT_SCALAR);    // density_FFT, work1, work2 
-    bytes += cg->memory_usage();
-  }
-
-  if (function[1] + function[2] + function[3]) {
-    int nbrick = (nxhi_out_6-nxlo_out_6+1) * (nyhi_out_6-nylo_out_6+1) * 
-      (nzhi_out_6-nzlo_out_6+1);
-    bytes += (1 + diff + per ) * nbrick * sizeof(FFT_SCALAR) * mixing;     // density_brick + vd_brick + per atom bricks
-    bytes += 6 * nfft_both_6 * sizeof(double);      // vg
-    bytes += nfft_both_6 * sizeof(double);          // greensfn
-    bytes += nfft_both_6 * (mixing + 2) * sizeof(FFT_SCALAR);    // density_FFT, work1, work2 
-    bytes += cg_6->memory_usage();
-  }
-  return bytes;
-}
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Rolf Isele-Holder (Aachen University)
+                         Paul Crozier (SNL)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "string.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "math.h"
+#include "pppm_disp.h"
+#include "math_const.h"
+#include "atom.h"
+#include "comm.h"
+#include "commgrid.h"
+#include "neighbor.h"
+#include "force.h"
+#include "pair.h"
+#include "bond.h"
+#include "angle.h"
+#include "domain.h"
+#include "fft3d_wrap.h"
+#include "remap_wrap.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define MAXORDER   7
+#define OFFSET 16384
+#define SMALL 0.00001
+#define LARGE 10000.0
+#define EPS_HOC 1.0e-7
+
+enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER};
+enum{REVERSE_RHO, REVERSE_RHO_G, REVERSE_RHO_A, REVERSE_RHO_NONE};
+enum{FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM,
+     FORWARD_IK_G, FORWARD_AD_G, FORWARD_IK_PERATOM_G, FORWARD_AD_PERATOM_G,
+     FORWARD_IK_A, FORWARD_AD_A, FORWARD_IK_PERATOM_A, FORWARD_AD_PERATOM_A,
+     FORWARD_IK_NONE, FORWARD_AD_NONE, FORWARD_IK_PERATOM_NONE, FORWARD_AD_PERATOM_NONE};
+
+
+#ifdef FFT_SINGLE
+#define ZEROF 0.0f
+#define ONEF  1.0f
+#else
+#define ZEROF 0.0
+#define ONEF  1.0
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+PPPMDisp::PPPMDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
+{
+  if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm/disp command");
+
+  triclinic_support = 0;
+  pppmflag = dispersionflag = 1;
+  accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
+  
+  nfactors = 3;
+  factors = new int[nfactors];
+  factors[0] = 2;
+  factors[1] = 3;
+  factors[2] = 5;
+
+  MPI_Comm_rank(world,&me);
+  MPI_Comm_size(world,&nprocs);
+
+  csumflag = 0;
+  B = NULL;
+  cii = NULL;
+  csumi = NULL;
+  peratom_allocate_flag = 0;
+
+  density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
+  density_fft = NULL;
+  u_brick = v0_brick = v1_brick = v2_brick = v3_brick = 
+    v4_brick = v5_brick = NULL;
+
+  density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL;
+  density_fft_g = NULL;
+  u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = 
+    v4_brick_g = v5_brick_g = NULL;
+
+  density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL;
+  density_fft_a0 = NULL;
+  u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = 
+    v4_brick_a0 = v5_brick_a0 = NULL;
+
+  density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL;
+  density_fft_a1 = NULL;
+  u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = 
+    v4_brick_a1 = v5_brick_a1 = NULL;
+
+  density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL;
+  density_fft_a2 = NULL;
+  u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = 
+    v4_brick_a2 = v5_brick_a2 = NULL;
+
+  density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL;
+  density_fft_a3 = NULL;
+  u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = 
+    v4_brick_a3 = v5_brick_a3 = NULL;
+
+  density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL;
+  density_fft_a4 = NULL;
+  u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = 
+    v4_brick_a4 = v5_brick_a4 = NULL;
+
+  density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL;
+  density_fft_a5 = NULL;
+  u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = 
+    v4_brick_a5 = v5_brick_a5 = NULL;
+
+  density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL;
+  density_fft_a6 = NULL;
+  u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = 
+    v4_brick_a6 = v5_brick_a6 = NULL;
+
+  density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL;
+  density_fft_none = NULL;
+  u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = 
+    v4_brick_none = v5_brick_none = NULL;
+
+  greensfn = NULL;
+  greensfn_6 = NULL;
+  work1 = work2 = NULL;
+  work1_6 = work2_6 = NULL;
+  vg = NULL;
+  vg2 = NULL;
+  vg_6 = NULL;
+  vg2_6 = NULL;
+  fkx = fky = fkz = NULL;
+  fkx2 = fky2 = fkz2 = NULL;
+  fkx_6 = fky_6 = fkz_6 = NULL;
+  fkx2_6 = fky2_6 = fkz2_6 = NULL;
+
+  sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = 
+    sf_precoeff5 = sf_precoeff6 = NULL;
+  sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = 
+    sf_precoeff5_6 = sf_precoeff6_6 = NULL;
+
+  gf_b = NULL;
+  gf_b_6 = NULL;
+  rho1d = rho_coeff = NULL;
+  drho1d = drho_coeff = NULL;
+  rho1d_6 = rho_coeff_6 = NULL;
+  drho1d_6 = drho_coeff_6 = NULL;
+  fft1 = fft2 = NULL;
+  fft1_6 = fft2_6 = NULL;
+  remap = NULL;
+  remap_6 = NULL;
+
+  nmax = 0;
+  part2grid = NULL;
+  part2grid_6 = NULL;
+
+  cg = NULL;
+  cg_peratom = NULL;
+  cg_6 = NULL;
+  cg_peratom_6 = NULL;
+
+  memset(function, 0, EWALD_FUNCS*sizeof(int));
+}
+
+/* ----------------------------------------------------------------------
+   free all memory 
+------------------------------------------------------------------------- */
+
+PPPMDisp::~PPPMDisp()
+{
+  delete [] factors;
+  delete [] B;
+  B = NULL;
+  delete [] cii;
+  cii = NULL;
+  delete [] csumi;
+  csumi = NULL;
+  deallocate();
+  deallocate_peratom();
+  memory->destroy(part2grid);
+  memory->destroy(part2grid_6);
+  part2grid = part2grid_6 = NULL;
+}
+
+/* ----------------------------------------------------------------------
+   called once before run 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::init()
+{
+  if (me == 0) {
+    if (screen) fprintf(screen,"PPPMDisp initialization ...\n");
+    if (logfile) fprintf(logfile,"PPPMDisp initialization ...\n");
+  }
+
+  triclinic_check();
+  if (domain->dimension == 2)
+    error->all(FLERR,"Cannot use PPPMDisp with 2d simulation");
+
+  if (slabflag == 0 && domain->nonperiodic > 0)
+    error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp");
+  if (slabflag == 1) {
+    if (domain->xperiodic != 1 || domain->yperiodic != 1 || 
+	domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
+      error->all(FLERR,"Incorrect boundaries with slab PPPMDisp");
+  }
+ 
+  if (order > MAXORDER || order_6 > MAXORDER) {
+    char str[128];
+    sprintf(str,"PPPMDisp coulomb order cannot be greater than %d",MAXORDER);
+    error->all(FLERR,str);
+  }
+
+  // free all arrays previously allocated
+
+  deallocate();
+  deallocate_peratom(); 
+
+  // set scale
+
+  scale = 1.0;
+
+  triclinic = domain->triclinic;
+
+  // check whether cutoff and pair style are set
+
+  pair_check();
+
+  int tmp;
+  Pair *pair = force->pair;
+  int *ptr = pair ? (int *) pair->extract("ewald_order",tmp) : NULL;
+  double *p_cutoff = pair ? (double *) pair->extract("cut_coul",tmp) : NULL;
+  double *p_cutoff_lj = pair ? (double *) pair->extract("cut_LJ",tmp) : NULL;
+  if (!(ptr||*p_cutoff||*p_cutoff_lj)) 
+    error->all(FLERR,"KSpace style is incompatible with Pair style");
+  cutoff = *p_cutoff;
+  cutoff_lj = *p_cutoff_lj;
+
+  double tmp2;
+  MPI_Allreduce(&cutoff, &tmp2,1,MPI_DOUBLE,MPI_SUM,world); 
+
+  // check out which types of potentials will have to be calculated
+
+  int ewald_order = ptr ? *((int *) ptr) : 1<<1;
+  int ewald_mix = ptr ? *((int *) pair->extract("ewald_mix",tmp)) : GEOMETRIC;
+  memset(function, 0, EWALD_FUNCS*sizeof(int));
+  for (int i=0; i<=EWALD_MAXORDER; ++i)			// transcribe order
+    if (ewald_order&(1<<i)) {				// from pair_style
+      int  k=-1;
+      char str[128];
+      switch (i) {
+	case 1:
+	  k = 0; break;
+	case 6:
+	  if ((ewald_mix==GEOMETRIC || ewald_mix==SIXTHPOWER|| mixflag == 1) && mixflag!= 2) { k = 1; break; }
+	  else if (ewald_mix==ARITHMETIC && mixflag!=2) { k = 2; break; }
+	  else if (mixflag == 2) { k = 3; break; }
+	default:
+	  sprintf(str, "Unsupported order in kspace_style "
+                  "pppm/disp, pair_style %s", force->pair_style);
+	  error->all(FLERR,str);
+      }
+      function[k] = 1;
+    }
+ 
+
+  // warn, if function[0] is not set but charge attribute is set!
+  if (!function[0] && atom->q_flag && me == 0) {
+    char str[128];
+    sprintf(str, "Charges are set, but coulombic solver is not used");
+    error->warning(FLERR, str);
+  }
+
+  // compute qsum & qsqsum, if function[0] is set, print error if no charges are set or warn if not charge-neutral  
+ 
+  if (function[0]) {
+    if (!atom->q_flag) 
+      error->all(FLERR,"Kspace style with selected options "
+                 "requires atom attribute q");
+ 
+    qsum = qsqsum = 0.0;
+    for (int i = 0; i < atom->nlocal; i++) {
+      qsum += atom->q[i];
+      qsqsum += atom->q[i]*atom->q[i];
+
+    }
+
+    double tmp;
+    MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+    qsum = tmp;
+    MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+    qsqsum = tmp;
+
+    if (qsqsum == 0.0)
+      error->all(FLERR,"Cannot use kspace solver with selected options "
+                 "on system with no charge");
+    if (fabs(qsum) > SMALL && me == 0) {
+      char str[128];
+      sprintf(str,"System is not charge neutral, net charge = %g",qsum);
+      error->warning(FLERR,str);
+    }
+  }
+
+  // if kspace is TIP4P, extract TIP4P params from pair style
+  // bond/angle are not yet init(), so insure equilibrium request is valid
+
+  qdist = 0.0;
+ 
+  if (tip4pflag) {
+    int itmp;
+    double *p_qdist = (double *) force->pair->extract("qdist",itmp);
+    int *p_typeO = (int *) force->pair->extract("typeO",itmp);
+    int *p_typeH = (int *) force->pair->extract("typeH",itmp);
+    int *p_typeA = (int *) force->pair->extract("typeA",itmp);
+    int *p_typeB = (int *) force->pair->extract("typeB",itmp);
+    if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
+      error->all(FLERR,"KSpace style is incompatible with Pair style");
+    qdist = *p_qdist;
+    typeO = *p_typeO;
+    typeH = *p_typeH;
+    int typeA = *p_typeA;
+    int typeB = *p_typeB;
+
+    if (force->angle == NULL || force->bond == NULL)
+      error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
+    if (typeA < 1 || typeA > atom->nangletypes || 
+	force->angle->setflag[typeA] == 0)
+      error->all(FLERR,"Bad TIP4P angle type for PPPMDisp/TIP4P");
+    if (typeB < 1 || typeB > atom->nbondtypes || 
+	force->bond->setflag[typeB] == 0)
+      error->all(FLERR,"Bad TIP4P bond type for PPPMDisp/TIP4P");
+    double theta = force->angle->equilibrium_angle(typeA);
+    double blen = force->bond->equilibrium_distance(typeB);
+    alpha = qdist / (cos(0.5*theta) * blen);
+  }
+
+
+  // initialize the pair style to get the coefficients
+  neighrequest_flag = 0;
+  pair->init();
+  neighrequest_flag = 1;
+  init_coeffs();
+
+  //if g_ewald and g_ewald_6 have not been specified, set some initial value
+  //  to avoid problems when calculating the energies!
+
+  if (!gewaldflag) g_ewald = 1;
+  if (!gewaldflag_6) g_ewald_6 = 1;
+
+  // set accuracy (force units) from accuracy_relative or accuracy_absolute
+  
+  if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
+  else accuracy = accuracy_relative * two_charge_force;
+
+  int (*procneigh)[2] = comm->procneigh;
+
+  int iteration = 0;
+  if (function[0]) {
+    CommGrid *cgtmp = NULL;
+    while (order >= minorder) {
+
+      if (iteration && me == 0)
+          error->warning(FLERR,"Reducing PPPMDisp Coulomb order "
+                         "b/c stencil extends beyond neighbor processor");
+      iteration++;
+
+      // set grid for dispersion interaction and coulomb interactions
+ 
+      set_grid();
+
+      if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
+      error->all(FLERR,"PPPMDisp Coulomb grid is too large");
+
+      set_fft_parameters(nx_pppm, ny_pppm, nz_pppm,
+                         nxlo_fft, nylo_fft, nzlo_fft,
+                         nxhi_fft, nyhi_fft, nzhi_fft,
+                         nxlo_in, nylo_in, nzlo_in,
+                         nxhi_in, nyhi_in, nzhi_in,
+                         nxlo_out, nylo_out, nzlo_out,
+                         nxhi_out, nyhi_out, nzhi_out,
+                         nlower, nupper,
+                         ngrid, nfft, nfft_both,
+                         shift, shiftone, order);
+
+      if (overlap_allowed) break;
+
+      cgtmp = new CommGrid(lmp, world,1,1,
+                           nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+                           nxlo_out,nxhi_out,nylo_out,nyhi_out,
+                           nzlo_out,nzhi_out,
+                           procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                           procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+      cgtmp->ghost_notify();
+      if (!cgtmp->ghost_overlap()) break;
+      delete cgtmp;
+
+      order--;
+    }
+
+    if (order < minorder)
+      error->all(FLERR,
+                 "Coulomb PPPMDisp order has been reduced below minorder");
+    if (cgtmp) delete cgtmp;
+
+    // adjust g_ewald
+  
+    if (!gewaldflag) adjust_gewald();
+
+    // calculate the final accuracy
+  
+    double acc = final_accuracy();
+  
+    // print stats
+
+    int ngrid_max,nfft_both_max,nbuf_max;
+    MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
+    MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
+
+    if (me == 0) {
+    #ifdef FFT_SINGLE
+      const char fft_prec[] = "single";
+    #else
+      const char fft_prec[] = "double";
+    #endif
+  
+      if (screen) {
+        fprintf(screen,"  Coulomb G vector (1/distance)= %g\n",g_ewald);
+        fprintf(screen,"  Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
+        fprintf(screen,"  Coulomb stencil order = %d\n",order);
+        fprintf(screen,"  Coulomb estimated absolute RMS force accuracy = %g\n",
+                acc);
+        fprintf(screen,"  Coulomb estimated relative force accuracy = %g\n",
+                acc/two_charge_force);
+        fprintf(screen,"  using %s precision FFTs\n",fft_prec);
+        fprintf(screen,"  3d grid and FFT values/proc = %d %d\n",
+		ngrid_max, nfft_both_max);
+      }
+      if (logfile) {
+        fprintf(logfile,"  Coulomb G vector (1/distance) = %g\n",g_ewald);
+        fprintf(logfile,"  Coulomb grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
+        fprintf(logfile,"  Coulomb stencil order = %d\n",order);
+        fprintf(logfile,
+                "  Coulomb estimated absolute RMS force accuracy = %g\n",
+                acc);
+        fprintf(logfile,"  Coulomb estimated relative force accuracy = %g\n",
+                acc/two_charge_force);
+        fprintf(logfile,"  using %s precision FFTs\n",fft_prec);
+        fprintf(logfile,"  3d grid and FFT values/proc = %d %d\n",
+		ngrid_max, nfft_both_max);
+      }
+    }
+  }
+
+  iteration = 0;
+  if (function[1] + function[2] + function[3]) {
+    CommGrid *cgtmp = NULL;
+    while (order_6 >= minorder) {
+
+      if (iteration && me == 0)
+          error->warning(FLERR,"Reducing PPPMDisp dispersion order "
+                         "b/c stencil extends beyond neighbor processor");
+      iteration++;
+
+      set_grid_6();
+   
+      if (nx_pppm_6 >= OFFSET || ny_pppm_6 >= OFFSET || nz_pppm_6 >= OFFSET)
+      error->all(FLERR,"PPPMDisp Dispersion grid is too large");
+
+      set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6,
+                         nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
+                         nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
+                         nxlo_in_6, nylo_in_6, nzlo_in_6,
+                         nxhi_in_6, nyhi_in_6, nzhi_in_6,
+                         nxlo_out_6, nylo_out_6, nzlo_out_6,
+                         nxhi_out_6, nyhi_out_6, nzhi_out_6,
+                         nlower_6, nupper_6,
+                         ngrid_6, nfft_6, nfft_both_6,
+                         shift_6, shiftone_6, order_6);
+
+      if (overlap_allowed) break;
+
+      cgtmp = new CommGrid(lmp,world,1,1,
+                           nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,
+                           nzlo_in_6,nzhi_in_6,
+                           nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,
+                           nzlo_out_6,nzhi_out_6,
+                           procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                           procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+      cgtmp->ghost_notify();
+      if (!cgtmp->ghost_overlap()) break;
+      delete cgtmp;
+      order_6--;
+    }
+
+    if (order_6 < minorder) 
+      error->all(FLERR,"Dispersion PPPMDisp order has been "
+                 "reduced below minorder");
+    if (cgtmp) delete cgtmp;
+
+    // adjust g_ewald_6
+
+    if (!gewaldflag_6 && accuracy_kspace_6 == accuracy_real_6) 
+      adjust_gewald_6();
+
+    // calculate the final accuracy
+
+    double acc, acc_real, acc_kspace;
+    final_accuracy_6(acc, acc_real, acc_kspace);
+
+
+    // print stats
+
+    int ngrid_max,nfft_both_max,nbuf_max;
+    MPI_Allreduce(&ngrid_6,&ngrid_max,1,MPI_INT,MPI_MAX,world);
+    MPI_Allreduce(&nfft_both_6,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
+
+    if (me == 0) {
+    #ifdef FFT_SINGLE
+      const char fft_prec[] = "single";
+    #else
+      const char fft_prec[] = "double";
+    #endif
+  
+      if (screen) {
+        fprintf(screen,"  Dispersion G vector (1/distance)= %g\n",g_ewald_6);
+        fprintf(screen,"  Dispersion grid = %d %d %d\n",
+                nx_pppm_6,ny_pppm_6,nz_pppm_6);
+        fprintf(screen,"  Dispersion stencil order = %d\n",order_6);
+        fprintf(screen,"  Dispersion estimated absolute "
+                "RMS force accuracy = %g\n",acc);
+        fprintf(screen,"  Dispersion estimated absolute "
+                "real space RMS force accuracy = %g\n",acc_real);
+        fprintf(screen,"  Dispersion estimated absolute "
+                "kspace RMS force accuracy = %g\n",acc_kspace);
+        fprintf(screen,"  Dispersion estimated relative force accuracy = %g\n",
+                acc/two_charge_force);
+        fprintf(screen,"  using %s precision FFTs\n",fft_prec);
+        fprintf(screen,"  3d grid and FFT values/proc dispersion = %d %d\n",
+                          ngrid_max,nfft_both_max);
+      }
+      if (logfile) {
+        fprintf(logfile,"  Dispersion G vector (1/distance) = %g\n",g_ewald_6);
+        fprintf(logfile,"  Dispersion grid = %d %d %d\n",
+                nx_pppm_6,ny_pppm_6,nz_pppm_6);
+        fprintf(logfile,"  Dispersion stencil order = %d\n",order_6);
+        fprintf(logfile,"  Dispersion estimated absolute "
+                "RMS force accuracy = %g\n",acc);
+        fprintf(logfile,"  Dispersion estimated absolute "
+                "real space RMS force accuracy = %g\n",acc_real);
+        fprintf(logfile,"  Dispersion estimated absolute "
+                "kspace RMS force accuracy = %g\n",acc_kspace);
+        fprintf(logfile,"  Disperion estimated relative force accuracy = %g\n",
+                acc/two_charge_force);
+        fprintf(logfile,"  using %s precision FFTs\n",fft_prec);
+        fprintf(logfile,"  3d grid and FFT values/proc dispersion = %d %d\n",
+                           ngrid_max,nfft_both_max);
+      }
+    }
+  }
+
+  // allocate K-space dependent memory
+
+  allocate();
+
+  // pre-compute Green's function denomiator expansion
+  // pre-compute 1d charge distribution coefficients
+
+  if (function[0]) {
+    compute_gf_denom(gf_b, order);
+    compute_rho_coeff(rho_coeff, drho_coeff, order);
+    cg->ghost_notify();
+    cg->setup();
+    if (differentiation_flag == 1)
+      compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order,
+                          nxlo_fft, nylo_fft, nzlo_fft, 
+                          nxhi_fft, nyhi_fft, nzhi_fft,
+                          sf_precoeff1, sf_precoeff2, sf_precoeff3,
+                          sf_precoeff4, sf_precoeff5, sf_precoeff6);
+  }
+  if (function[1] + function[2] + function[3]) {
+    compute_gf_denom(gf_b_6, order_6);
+    compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6);
+    cg_6->ghost_notify();
+    cg_6->setup();
+    if (differentiation_flag == 1)
+      compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6,
+                          nxlo_fft_6, nylo_fft_6, nzlo_fft_6, 
+                          nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
+                          sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6,
+                          sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6);
+  }
+
+}
+
+/* ----------------------------------------------------------------------
+   adjust PPPM coeffs, called initially and whenever volume has changed 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::setup()
+{
+  double *prd;
+
+  // volume-dependent factors
+  // adjust z dimension for 2d slab PPPM
+  // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+  volume = xprd * yprd * zprd_slab;
+
+ // compute fkx,fky,fkz for my FFT grid pts
+
+  double unitkx = (2.0*MY_PI/xprd);
+  double unitky = (2.0*MY_PI/yprd);
+  double unitkz = (2.0*MY_PI/zprd_slab);
+
+  //compute the virial coefficients and green functions
+  if (function[0]){
+
+    delxinv = nx_pppm/xprd;
+    delyinv = ny_pppm/yprd;
+    delzinv = nz_pppm/zprd_slab;
+
+    delvolinv = delxinv*delyinv*delzinv;
+
+    double per;
+    int i, j, k, n;
+
+    for (i = nxlo_fft; i <= nxhi_fft; i++) {
+      per = i - nx_pppm*(2*i/nx_pppm);
+      fkx[i] = unitkx*per;
+      j = (nx_pppm - i) % nx_pppm;
+      per = j - nx_pppm*(2*j/nx_pppm);
+      fkx2[i] = unitkx*per;
+    }
+
+    for (i = nylo_fft; i <= nyhi_fft; i++) {
+      per = i - ny_pppm*(2*i/ny_pppm);
+      fky[i] = unitky*per;
+      j = (ny_pppm - i) % ny_pppm;
+      per = j - ny_pppm*(2*j/ny_pppm);
+      fky2[i] = unitky*per;
+    }
+
+    for (i = nzlo_fft; i <= nzhi_fft; i++) {
+      per = i - nz_pppm*(2*i/nz_pppm);
+      fkz[i] = unitkz*per;
+      j = (nz_pppm - i) % nz_pppm;
+      per = j - nz_pppm*(2*j/nz_pppm);
+      fkz2[i] = unitkz*per;
+    }
+
+    double sqk,vterm;
+    double gew2inv = 1/(g_ewald*g_ewald);
+    n = 0;
+    for (k = nzlo_fft; k <= nzhi_fft; k++) {
+      for (j = nylo_fft; j <= nyhi_fft; j++) {
+        for (i = nxlo_fft; i <= nxhi_fft; i++) {
+	  sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
+	  if (sqk == 0.0) {
+	    vg[n][0] = 0.0;
+	    vg[n][1] = 0.0;
+	    vg[n][2] = 0.0;
+	    vg[n][3] = 0.0;
+	    vg[n][4] = 0.0;
+	    vg[n][5] = 0.0;
+	  } else {
+	    vterm = -2.0 * (1.0/sqk + 0.25*gew2inv);
+	    vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
+	    vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
+	    vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
+	    vg[n][3] = vterm*fkx[i]*fky[j];
+	    vg[n][4] = vterm*fkx[i]*fkz[k];
+	    vg[n][5] = vterm*fky[j]*fkz[k];
+            vg2[n][0] = vterm*0.5*(fkx[i]*fky[j] + fkx2[i]*fky2[j]);
+            vg2[n][1] = vterm*0.5*(fkx[i]*fkz[k] + fkx2[i]*fkz2[k]);
+            vg2[n][2] = vterm*0.5*(fky[j]*fkz[k] + fky2[j]*fkz2[k]);
+  	  }
+	  n++;
+        }
+      }
+    }
+    compute_gf();
+    if (differentiation_flag == 1) compute_sf_coeff();
+  }
+
+  if (function[1] + function[2] + function[3]) {
+    delxinv_6 = nx_pppm_6/xprd;
+    delyinv_6 = ny_pppm_6/yprd;
+    delzinv_6 = nz_pppm_6/zprd_slab;
+    delvolinv_6 = delxinv_6*delyinv_6*delzinv_6;
+
+    double per;
+    int i, j, k, n;
+    for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
+      per = i - nx_pppm_6*(2*i/nx_pppm_6);
+      fkx_6[i] = unitkx*per;
+      j = (nx_pppm_6 - i) % nx_pppm_6;
+      per = j - nx_pppm_6*(2*j/nx_pppm_6);
+      fkx2_6[i] = unitkx*per;
+    }
+    for (i = nylo_fft_6; i <= nyhi_fft_6; i++) {
+      per = i - ny_pppm_6*(2*i/ny_pppm_6);
+      fky_6[i] = unitky*per;
+      j = (ny_pppm_6 - i) % ny_pppm_6;
+      per = j - ny_pppm_6*(2*j/ny_pppm_6);
+      fky2_6[i] = unitky*per;
+    }
+    for (i = nzlo_fft_6; i <= nzhi_fft_6; i++) {
+      per = i - nz_pppm_6*(2*i/nz_pppm_6);
+      fkz_6[i] = unitkz*per;
+      j = (nz_pppm_6 - i) % nz_pppm_6;
+      per = j - nz_pppm_6*(2*j/nz_pppm_6);
+      fkz2_6[i] = unitkz*per;
+    }
+    double sqk,vterm;
+    long double erft, expt,nom, denom;
+    long double b, bs, bt;
+    double rtpi = sqrt(MY_PI);
+    double gewinv = 1/g_ewald_6;
+    n = 0;
+    for (k = nzlo_fft_6; k <= nzhi_fft_6; k++) {
+      for (j = nylo_fft_6; j <= nyhi_fft_6; j++) {
+        for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
+	  sqk = fkx_6[i]*fkx_6[i] + fky_6[j]*fky_6[j] + fkz_6[k]*fkz_6[k];
+	  if (sqk == 0.0) {
+	    vg_6[n][0] = 0.0;
+	    vg_6[n][1] = 0.0;
+	    vg_6[n][2] = 0.0;
+	    vg_6[n][3] = 0.0;
+	    vg_6[n][4] = 0.0;
+	    vg_6[n][5] = 0.0;
+	  } else {
+            b = 0.5*sqrt(sqk)*gewinv;
+            bs = b*b;
+            bt = bs*b;
+            erft = 2*bt*rtpi*erfc(b);
+            expt = exp(-bs);
+            nom = erft - 2*bs*expt;
+            denom = nom + expt;
+            if (denom == 0) vterm = 3.0/sqk;
+            else vterm = 3.0*nom/(sqk*denom);
+	    vg_6[n][0] = 1.0 + vterm*fkx_6[i]*fkx_6[i];
+	    vg_6[n][1] = 1.0 + vterm*fky_6[j]*fky_6[j];
+	    vg_6[n][2] = 1.0 + vterm*fkz_6[k]*fkz_6[k];
+	    vg_6[n][3] = vterm*fkx_6[i]*fky_6[j];
+	    vg_6[n][4] = vterm*fkx_6[i]*fkz_6[k];
+	    vg_6[n][5] = vterm*fky_6[j]*fkz_6[k];
+            vg2_6[n][0] = vterm*0.5*(fkx_6[i]*fky_6[j] + fkx2_6[i]*fky2_6[j]);
+            vg2_6[n][1] = vterm*0.5*(fkx_6[i]*fkz_6[k] + fkx2_6[i]*fkz2_6[k]);
+            vg2_6[n][2] = vterm*0.5*(fky_6[j]*fkz_6[k] + fky2_6[j]*fkz2_6[k]);
+	  }
+	  n++;
+        }
+      }
+    }
+    compute_gf_6();
+    if (differentiation_flag == 1) compute_sf_coeff_6();
+  }
+}
+
+/* ----------------------------------------------------------------------
+   reset local grid arrays and communication stencils
+   called by fix balance b/c it changed sizes of processor sub-domains
+------------------------------------------------------------------------- */
+
+void PPPMDisp::setup_grid()
+{
+  // free all arrays previously allocated
+
+  deallocate();
+  deallocate_peratom();
+
+  // reset portion of global grid that each proc owns
+
+  if (function[0])
+    set_fft_parameters(nx_pppm, ny_pppm, nz_pppm,
+                       nxlo_fft, nylo_fft, nzlo_fft,
+                       nxhi_fft, nyhi_fft, nzhi_fft,
+                       nxlo_in, nylo_in, nzlo_in,
+                       nxhi_in, nyhi_in, nzhi_in,
+                       nxlo_out, nylo_out, nzlo_out,
+                       nxhi_out, nyhi_out, nzhi_out,
+                       nlower, nupper,
+                       ngrid, nfft, nfft_both,
+                       shift, shiftone, order);
+
+  if (function[1] + function[2] + function[3])
+    set_fft_parameters(nx_pppm_6, ny_pppm_6, nz_pppm_6,
+                       nxlo_fft_6, nylo_fft_6, nzlo_fft_6,
+                       nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
+                       nxlo_in_6, nylo_in_6, nzlo_in_6,
+                       nxhi_in_6, nyhi_in_6, nzhi_in_6,
+                       nxlo_out_6, nylo_out_6, nzlo_out_6,
+                       nxhi_out_6, nyhi_out_6, nzhi_out_6,
+                       nlower_6, nupper_6,
+                       ngrid_6, nfft_6, nfft_both_6,
+                       shift_6, shiftone_6, order_6);
+
+  // reallocate K-space dependent memory
+  // check if grid communication is now overlapping if not allowed
+  // don't invoke allocate_peratom(), compute() will allocate when needed
+
+  allocate();
+
+  if (function[0]) {
+    cg->ghost_notify();
+    if (overlap_allowed == 0 && cg->ghost_overlap())
+      error->all(FLERR,"PPPM grid stencil extends "
+                 "beyond nearest neighbor processor");
+    cg->setup();
+  }
+  if (function[1] + function[2] + function[3]) {
+    cg_6->ghost_notify();
+    if (overlap_allowed == 0 && cg_6->ghost_overlap())
+      error->all(FLERR,"PPPM grid stencil extends "
+                 "beyond nearest neighbor processor");
+    cg_6->setup();
+  }
+
+  // pre-compute Green's function denomiator expansion
+  // pre-compute 1d charge distribution coefficients
+
+  if (function[0]) {
+    compute_gf_denom(gf_b, order);
+    compute_rho_coeff(rho_coeff, drho_coeff, order);
+    if (differentiation_flag == 1) 
+      compute_sf_precoeff(nx_pppm, ny_pppm, nz_pppm, order,
+                          nxlo_fft, nylo_fft, nzlo_fft, 
+                          nxhi_fft, nyhi_fft, nzhi_fft,
+                          sf_precoeff1, sf_precoeff2, sf_precoeff3,
+                          sf_precoeff4, sf_precoeff5, sf_precoeff6);
+  }
+  if (function[1] + function[2] + function[3]) {
+    compute_gf_denom(gf_b_6, order_6);
+    compute_rho_coeff(rho_coeff_6, drho_coeff_6, order_6);
+    if (differentiation_flag == 1)
+      compute_sf_precoeff(nx_pppm_6, ny_pppm_6, nz_pppm_6, order_6,
+                          nxlo_fft_6, nylo_fft_6, nzlo_fft_6, 
+                          nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
+                          sf_precoeff1_6, sf_precoeff2_6, sf_precoeff3_6,
+                          sf_precoeff4_6, sf_precoeff5_6, sf_precoeff6_6);
+  }
+
+  // pre-compute volume-dependent coeffs
+
+  setup();
+}
+
+/* ----------------------------------------------------------------------
+   compute the PPPM long-range force, energy, virial 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::compute(int eflag, int vflag)
+{
+
+  int i;
+  // convert atoms from box to lamda coords
+
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = evflag_atom = eflag_global = vflag_global = 
+	 eflag_atom = vflag_atom = 0;
+
+  if (evflag_atom && !peratom_allocate_flag) {
+    allocate_peratom();
+    if (function[0]) {
+      cg_peratom->ghost_notify();
+      cg_peratom->setup();
+    }
+    if (function[1] + function[2] + function[3]) {
+      cg_peratom_6->ghost_notify();
+      cg_peratom_6->setup();
+    }
+    peratom_allocate_flag = 1;
+  }
+  
+  if (triclinic == 0) boxlo = domain->boxlo;
+  else {
+    boxlo = domain->boxlo_lamda;
+    domain->x2lamda(atom->nlocal);
+  }
+  // extend size of per-atom arrays if necessary
+
+  if (atom->nlocal > nmax) {
+
+    if (function[0]) memory->destroy(part2grid);
+    if (function[1] + function[2] + function[3]) memory->destroy(part2grid_6);
+    nmax = atom->nmax;
+    if (function[0]) memory->create(part2grid,nmax,3,"pppm/disp:part2grid");
+    if (function[1] + function[2] + function[3]) 
+      memory->create(part2grid_6,nmax,3,"pppm/disp:part2grid_6");
+  }
+
+
+  energy = 0.0;
+  energy_1 = 0.0;
+  energy_6 = 0.0;
+  if (vflag) for (i = 0; i < 6; i++) virial_6[i] = virial_1[i] = 0.0;
+
+  // find grid points for all my particles
+  // distribute partcles' charges/dispersion coefficients on the grid
+  // communication between processors and remapping two fft
+  // Solution of poissons equation in k-space and backtransformation
+  // communication between processors
+  // calculation of forces
+
+  if (function[0]) {
+
+    //perfrom calculations for coulomb interactions only
+
+    particle_map_c(delxinv, delyinv, delzinv, shift, part2grid, nupper, nlower,
+                 nxlo_out, nylo_out, nzlo_out, nxhi_out, nyhi_out, nzhi_out);
+
+    make_rho_c();
+
+    cg->reverse_comm(this,REVERSE_RHO);
+ 
+    brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
+	      density_brick, density_fft, work1,remap); 
+ 
+    if (differentiation_flag == 1) {
+
+      poisson_ad(work1, work2, density_fft, fft1, fft2,
+                 nx_pppm, ny_pppm, nz_pppm, nfft,
+                 nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft,
+                 nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
+                 energy_1, greensfn, 
+                 virial_1, vg,vg2,
+                 u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);
+
+      cg->forward_comm(this,FORWARD_AD);
+
+      fieldforce_c_ad(); 
+
+      if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM);
+
+    } else {
+      poisson_ik(work1, work2, density_fft, fft1, fft2,
+                 nx_pppm, ny_pppm, nz_pppm, nfft,
+                 nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft,
+                 nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
+                 energy_1, greensfn, 
+	         fkx, fky, fkz,fkx2, fky2, fkz2,
+                 vdx_brick, vdy_brick, vdz_brick, virial_1, vg,vg2,
+                 u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);
+
+      cg->forward_comm(this, FORWARD_IK);
+
+      fieldforce_c_ik(); 
+
+      if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM);
+    }
+    if (evflag_atom) fieldforce_c_peratom();
+  }
+
+  if (function[1]) {
+    //perfrom calculations for geometric mixing
+    particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
+                 nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
+    make_rho_g();
+
+
+    cg_6->reverse_comm(this, REVERSE_RHO_G);
+
+    brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
+	      density_brick_g, density_fft_g, work1_6,remap_6);
+ 
+    if (differentiation_flag == 1) {
+
+      poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
+                 nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
+                 nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
+                 nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
+                 energy_6, greensfn_6, 
+                 virial_6, vg_6, vg2_6,
+                 u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
+
+      cg_6->forward_comm(this,FORWARD_AD_G);
+
+      fieldforce_g_ad();
+
+      if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G);
+
+    } else {
+      poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
+                 nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
+                 nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
+                 nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
+                 energy_6, greensfn_6, 
+	         fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6,
+                 vdx_brick_g, vdy_brick_g, vdz_brick_g, virial_6, vg_6, vg2_6,
+                 u_brick_g, v0_brick_g, v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
+ 
+      cg_6->forward_comm(this,FORWARD_IK_G);
+ 
+      fieldforce_g_ik();
+
+
+      if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G);
+    }
+    if (evflag_atom) fieldforce_g_peratom();
+  }
+
+  if (function[2]) {
+    //perform calculations for arithmetic mixing
+    particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
+                 nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
+    make_rho_a();
+
+    cg_6->reverse_comm(this, REVERSE_RHO_A);
+
+    brick2fft_a();
+
+    if ( differentiation_flag == 1) {
+
+      poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
+                 nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
+                 nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
+                 nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
+                 energy_6, greensfn_6, 
+                 virial_6, vg_6, vg2_6,
+                 u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3);
+      poisson_2s_ad(density_fft_a0, density_fft_a6,
+                    u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0,
+                    u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6);
+      poisson_2s_ad(density_fft_a1, density_fft_a5,
+                    u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1,
+                    u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5);
+      poisson_2s_ad(density_fft_a2, density_fft_a4,
+                    u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2,
+                    u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);
+
+      cg_6->forward_comm(this, FORWARD_AD_A);
+
+      fieldforce_a_ad();
+
+      if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A);
+
+    }  else {
+    
+      poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
+                 nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
+                 nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
+                 nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
+                 energy_6, greensfn_6, 
+	         fkx_6, fky_6, fkz_6,fkx2_6, fky2_6, fkz2_6,
+                 vdx_brick_a3, vdy_brick_a3, vdz_brick_a3, virial_6, vg_6, vg2_6,
+                 u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3);
+      poisson_2s_ik(density_fft_a0, density_fft_a6,
+                    vdx_brick_a0, vdy_brick_a0, vdz_brick_a0,
+                    vdx_brick_a6, vdy_brick_a6, vdz_brick_a6,
+                    u_brick_a0, v0_brick_a0, v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0,
+                    u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6);
+      poisson_2s_ik(density_fft_a1, density_fft_a5,
+                    vdx_brick_a1, vdy_brick_a1, vdz_brick_a1,
+                    vdx_brick_a5, vdy_brick_a5, vdz_brick_a5,
+                    u_brick_a1, v0_brick_a1, v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1,
+                    u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5);
+      poisson_2s_ik(density_fft_a2, density_fft_a4,
+                    vdx_brick_a2, vdy_brick_a2, vdz_brick_a2,
+                    vdx_brick_a4, vdy_brick_a4, vdz_brick_a4,
+                    u_brick_a2, v0_brick_a2, v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2,
+                    u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);
+
+      cg_6->forward_comm(this, FORWARD_IK_A);
+
+      fieldforce_a_ik();
+
+      if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A);
+    }
+    if (evflag_atom) fieldforce_a_peratom();
+  }
+
+  if (function[3]) {
+    //perfrom calculations if no mixing rule applies
+    particle_map(delxinv_6, delyinv_6, delzinv_6, shift_6, part2grid_6, nupper_6, nlower_6,
+                 nxlo_out_6, nylo_out_6, nzlo_out_6, nxhi_out_6, nyhi_out_6, nzhi_out_6);
+
+    make_rho_none();
+
+    cg_6->reverse_comm(this, REVERSE_RHO_NONE);
+
+    brick2fft_none();
+
+    if (differentiation_flag == 1) {
+
+      int n = 0;
+      for (int k = 0; k<nsplit_alloc/2; k++) {
+        poisson_none_ad(n,n+1,density_fft_none[n],density_fft_none[n+1],
+                        u_brick_none[n],u_brick_none[n+1],
+                        v0_brick_none, v1_brick_none, v2_brick_none,
+                        v3_brick_none, v4_brick_none, v5_brick_none);
+        n += 2;
+      }
+
+      cg_6->forward_comm(this,FORWARD_AD_NONE);
+
+      fieldforce_none_ad();
+
+      if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_NONE);
+
+    } else {
+      int n = 0;
+      for (int k = 0; k<nsplit_alloc/2; k++) {
+
+        poisson_none_ik(n,n+1,density_fft_none[n], density_fft_none[n+1],
+                        vdx_brick_none[n], vdy_brick_none[n], vdz_brick_none[n],
+                        vdx_brick_none[n+1], vdy_brick_none[n+1], vdz_brick_none[n+1],
+                        u_brick_none, v0_brick_none, v1_brick_none, v2_brick_none,
+                        v3_brick_none, v4_brick_none, v5_brick_none);
+        n += 2;
+      }
+
+      cg_6->forward_comm(this,FORWARD_IK_NONE);
+
+      fieldforce_none_ik();
+
+
+      if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_NONE);
+    }
+    if (evflag_atom) fieldforce_none_peratom();
+  }
+
+  // sum energy across procs and add in volume-dependent term
+
+  const double qscale = force->qqrd2e * scale;
+  if (eflag_global) {
+    double energy_all;
+    MPI_Allreduce(&energy_1,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
+    energy_1 = energy_all;
+    MPI_Allreduce(&energy_6,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
+    energy_6 = energy_all;
+   
+    energy_1 *= 0.5*volume;
+    energy_6 *= 0.5*volume;
+    
+    energy_1 -= g_ewald*qsqsum/MY_PIS +
+      MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
+    energy_6 += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij +
+      1.0/12.0*pow(g_ewald_6,6)*csum;
+    energy_1 *= qscale;
+  }
+
+  // sum virial across procs
+
+  if (vflag_global) {
+    double virial_all[6];
+    MPI_Allreduce(virial_1,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
+    for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
+    MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
+    for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i];
+    if (function[1]+function[2]+function[3]){
+      double a =  MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij;
+      virial[0] -= a;
+      virial[1] -= a;
+      virial[2] -= a;
+    }
+  }
+
+  if (eflag_atom) {
+    if (function[0]) {
+      double *q = atom->q;
+      for (i = 0; i < atom->nlocal; i++) {
+        eatom[i] -= qscale*g_ewald*q[i]*q[i]/MY_PIS + qscale*MY_PI2*q[i]*qsum / (g_ewald*g_ewald*volume); //coulomb self energy correction
+      }
+    }
+    if (function[1] + function[2] + function[3]) {
+      int tmp;
+      for (i = 0; i < atom->nlocal; i++) {
+        tmp = atom->type[i];
+        eatom[i] += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp] +
+                      1.0/12.0*pow(g_ewald_6,6)*cii[tmp];
+      }
+    }
+  }
+            
+  if (vflag_atom) {
+    if (function[1] + function[2] + function[3]) {
+      int tmp;
+      for (i = 0; i < atom->nlocal; i++) {
+        tmp = atom->type[i];
+        for (int n = 0; n < 3; n++) vatom[i][n] -= MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp]; //dispersion self virial correction
+      }
+    }
+  }
+
+
+  // 2d slab correction
+
+  if (slabflag) slabcorr(eflag);
+  if (function[0]) energy += energy_1;
+  if (function[1] + function[2] + function[3]) energy += energy_6;
+
+  // convert atoms back from lamda to box coords
+  
+  if (triclinic) domain->lamda2x(atom->nlocal);
+}
+
+/* ----------------------------------------------------------------------
+   initialize coefficients needed for the dispersion density on the grids
+------------------------------------------------------------------------- */
+
+void PPPMDisp::init_coeffs()				// local pair coeffs
+{
+  int tmp;
+  int n = atom->ntypes;
+  int converged;
+  delete [] B;
+  if (function[3] + function[2]) {                     // no mixing rule or arithmetic
+    if (function[2] && me == 0) {
+      if (screen) fprintf(screen,"  Optimizing splitting of Dispersion coefficients\n");
+      if (logfile) fprintf(logfile,"  Optimizing splitting of Dispersion coefficients\n");
+    }
+    // get dispersion coefficients
+    double **b = (double **) force->pair->extract("B",tmp);
+    // allocate data for eigenvalue decomposition
+    double **A;
+    double **Q;
+    memory->create(A,n,n,"pppm/disp:A");
+    memory->create(Q,n,n,"pppm/disp:Q");
+    // fill coefficients to matrix a
+    for (int i = 1; i <= n; i++)
+      for (int j = 1; j <= n; j++)
+        A[i-1][j-1] = b[i][j];
+    // transform q to a unity matrix
+    for (int i = 0; i < n; i++)
+      for (int j = 0; j < n; j++)
+        Q[i][j] = 0.0;
+    for (int i = 0; i < n; i++)
+      Q[i][i] = 1.0;
+    // perfrom eigenvalue decomposition with QR algorithm
+    converged = qr_alg(A,Q,n);
+    if (function[3] && !converged) {
+      error->all(FLERR,"Matrix factorization to split dispersion coefficients failed");
+    }
+    // determine number of used eigenvalues 
+    //   based on maximum allowed number or cutoff criterion
+    //   sort eigenvalues according to their size with bubble sort
+    double t;
+    for (int i = 0; i < n; i++) {
+      for (int j = 0; j < n-1-i; j++) {
+        if (fabs(A[j][j]) < fabs(A[j+1][j+1])) {
+          t = A[j][j];
+          A[j][j] = A[j+1][j+1];
+          A[j+1][j+1] = t;
+          for (int k = 0; k < n; k++) {
+	    t = Q[k][j];
+	    Q[k][j] = Q[k][j+1];
+            Q[k][j+1] = t;
+          }
+        }
+      }
+    }
+
+    //   check which eigenvalue is the first that is smaller
+    //   than a specified tolerance
+    //   check how many are maximum allowed by the user
+    double amax = fabs(A[0][0]);
+    double acrit = amax*splittol;
+    double bmax = 0;
+    double err = 0;
+    nsplit = 0;
+    for (int i = 0; i < n; i++) {
+      if (fabs(A[i][i]) > acrit) nsplit++;
+      else {
+        bmax = fabs(A[i][i]);
+        break;
+      }
+    }
+
+    err =  bmax/amax;
+    if (err > 1.0e-4) {
+      char str[128];
+      sprintf(str,"Error in splitting of dispersion coeffs is estimated %g",err);
+      error->warning(FLERR, str);
+    }
+    // set B
+    B = new double[nsplit*n+nsplit];
+    for (int i = 0; i< nsplit; i++) {
+      B[i] = A[i][i];
+      for (int j = 0; j < n; j++) {
+        B[nsplit*(j+1) + i] = Q[j][i];
+      }
+    }
+
+    nsplit_alloc = nsplit;
+    if (nsplit%2 == 1) nsplit_alloc = nsplit + 1;
+    // check if the function should preferably be [1] or [2] or [3]
+    if (nsplit == 1) {
+      delete [] B;
+      function[3] = 0;
+      function[2] = 0;
+      function[1] = 1;
+      if (me == 0) {
+        if (screen) fprintf(screen,"  Using geometric mixing for reciprocal space\n");
+        if (logfile) fprintf(logfile,"  Using geometric mixing for reciprocal space\n");
+      }
+    }
+    if (function[2] && nsplit <= 6) {
+      if (me == 0) {
+        if (screen) fprintf(screen,"  Using %d instead of 7 structure factors\n",nsplit);
+        if (logfile) fprintf(logfile,"  Using %d instead of 7 structure factors\n",nsplit);
+      }
+      function[3] = 1;
+      function[2] = 0;
+    }
+    if (function[2] && (nsplit > 6)) {
+      if (me == 0) {
+        if (screen) fprintf(screen,"  Using 7 structure factors\n");
+        if (logfile) fprintf(logfile,"  Using 7 structure factors\n");
+      }
+      delete [] B;
+    }
+    if (function[3]) {
+      if (me == 0) {
+        if (screen) fprintf(screen,"  Using %d structure factors\n",nsplit);
+        if (logfile) fprintf(logfile,"  Using %d structure factors\n",nsplit);
+      }
+      if (nsplit > 9) error->warning(FLERR, "Simulations might be very slow because of large number of structure factors");
+    }
+
+    memory->destroy(A);
+    memory->destroy(Q);
+  }
+  if (function[1]) {					// geometric 1/r^6
+    double **b = (double **) force->pair->extract("B",tmp);
+    B = new double[n+1];
+    for (int i=0; i<=n; ++i) B[i] = sqrt(fabs(b[i][i]));
+  }
+  if (function[2]) {					// arithmetic 1/r^6
+    //cannot use epsilon, because this has not been set yet
+    double **epsilon = (double **) force->pair->extract("epsilon",tmp);  
+    //cannot use sigma, because this has not been set yet
+    double **sigma = (double **) force->pair->extract("sigma",tmp);  
+    if (!(epsilon&&sigma))
+      error->all(FLERR,"Epsilon or sigma reference not set by pair style in PPPMDisp");
+    double eps_i, sigma_i, sigma_n, *bi = B = new double[7*n+7];
+    double c[7] = {
+      1.0, sqrt(6.0), sqrt(15.0), sqrt(20.0), sqrt(15.0), sqrt(6.0), 1.0};
+    for (int i=0; i<=n; ++i) {
+      eps_i = sqrt(epsilon[i][i]);
+      sigma_i = sigma[i][i];
+      sigma_n = 1.0;
+      for (int j=0; j<7; ++j) {
+	*(bi++) = sigma_n*eps_i*c[j]*0.25;
+        sigma_n *= sigma_i;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   Eigenvalue decomposition of a real, symmetric matrix with the QR
+   method (includes transpformation to Tridiagonal Matrix + Wilkinson
+   shift)
+------------------------------------------------------------------------- */
+
+int PPPMDisp::qr_alg(double **A, double **Q, int n)
+{
+  int converged = 0;
+  double an1, an, bn1, d, mue;
+  // allocate some memory for the required operations
+  double **A0,**Qi,**C,**D,**E;
+  // make a copy of A for convergence check
+  memory->create(A0,n,n,"pppm/disp:A0");
+  for (int i = 0; i < n; i++)
+    for (int j = 0; j < n; j++)
+      A0[i][j] = A[i][j];
+
+  // allocate an auxiliary matrix Qi
+  memory->create(Qi,n,n,"pppm/disp:Qi");
+
+  // alllocate an auxillary matrices for the matrix multiplication
+  memory->create(C,n,n,"pppm/disp:C");
+  memory->create(D,n,n,"pppm/disp:D");
+  memory->create(E,n,n,"pppm/disp:E");
+
+  // transform Matrix A to Tridiagonal form
+  hessenberg(A,Q,n);
+
+  // start loop for the matrix factorization
+  int count = 0;
+  int countmax = 100000;
+  while (1) {
+    // make a Wilkinson shift
+    an1 = A[n-2][n-2];
+    an = A[n-1][n-1];
+    bn1 = A[n-2][n-1];
+    d = (an1-an)/2;
+    mue = an + d - copysign(1.,d)*sqrt(d*d + bn1*bn1);
+    for (int i = 0; i < n; i++) 
+      A[i][i] -= mue;
+
+    // perform a QR factorization for a tridiagonal matrix A
+    qr_tri(Qi,A,n);
+
+    // update the matrices
+    mmult(A,Qi,C,n);
+    mmult(Q,Qi,C,n);
+
+    // backward Wilkinson shift
+    for (int i = 0; i < n; i++)
+      A[i][i] += mue;
+
+    // check the convergence
+    converged = check_convergence(A,Q,A0,C,D,E,n);
+    if (converged) break;
+    count = count + 1;
+    if (count == countmax) break;
+  }
+  
+  // free allocated memory
+  memory->destroy(Qi);
+  memory->destroy(A0);
+  memory->destroy(C);
+  memory->destroy(D);
+  memory->destroy(E);
+  
+  return converged;
+}
+
+/* ----------------------------------------------------------------------
+   Transform a Matrix to Hessenberg form (for symmetric Matrices, the 
+   result will be a tridiagonal matrix)
+------------------------------------------------------------------------- */
+
+void PPPMDisp::hessenberg(double **A, double **Q, int n)
+{
+  double r,a,b,c,s,x1,x2;
+  for (int i = 0; i < n-1; i++) {
+    for (int j = i+2; j < n; j++) {
+      // compute coeffs for the rotation matrix
+      a = A[i+1][i];
+      b = A[j][i];
+      r = sqrt(a*a + b*b);
+      c = a/r;
+      s = b/r;
+      // update the entries of A with multiplication from the left
+      for (int k = 0; k < n; k++) {
+        x1 = A[i+1][k];
+        x2 = A[j][k];
+        A[i+1][k] = c*x1 + s*x2;
+        A[j][k] = -s*x1 + c*x2;
+      }
+      // update the entries of A and Q with a multiplication from the right
+      for (int k = 0; k < n; k++) {
+        x1 = A[k][i+1];
+        x2 = A[k][j];
+        A[k][i+1] = c*x1 + s*x2;
+        A[k][j] = -s*x1 + c*x2;
+        x1 = Q[k][i+1];
+        x2 = Q[k][j];
+        Q[k][i+1] = c*x1 + s*x2;
+        Q[k][j] = -s*x1 + c*x2;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   QR factorization for a tridiagonal matrix; Result of the factorization
+   is stored in A and Qi
+------------------------------------------------------------------------- */
+
+void PPPMDisp::qr_tri(double** Qi,double** A,int n)
+{
+  double r,a,b,c,s,x1,x2;
+  int j,k,k0,kmax;
+  // make Qi a unity matrix
+  for (int i = 0; i < n; i++)
+    for (int j = 0; j < n; j++)
+      Qi[i][j] = 0.0;
+  for (int i = 0; i < n; i++)
+    Qi[i][i] = 1.0;
+  // loop over main diagonal and first of diagonal of A
+  for (int i = 0; i < n-1; i++) {
+    j = i+1;
+    // coefficients of the rotation matrix
+    a = A[i][i];
+    b = A[j][i];
+    r = sqrt(a*a + b*b);
+    c = a/r;
+    s = b/r;
+    // update the entries of A and Q
+    k0 = (i-1>0)?i-1:0;   //min(i-1,0);
+    kmax = (i+3<n)?i+3:n;  //min(i+3,n);
+    for (k = k0; k < kmax; k++) {
+      x1 = A[i][k];
+      x2 = A[j][k];
+      A[i][k] = c*x1 + s*x2;
+      A[j][k] = -s*x1 + c*x2;
+    }
+    for (k = 0; k < n; k++) {
+      x1 = Qi[k][i];
+      x2 = Qi[k][j];
+      Qi[k][i] = c*x1 + s*x2;
+      Qi[k][j] = -s*x1 + c*x2;
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   Multiply two matrices A and B, store the result in A; C provides
+   some memory to store intermediate results
+------------------------------------------------------------------------- */
+
+void PPPMDisp::mmult(double** A, double** B, double** C, int n)
+{
+  for (int i = 0; i < n; i++)
+    for (int j = 0; j < n; j++)
+      C[i][j] = 0.0;
+
+  // perform matrix multiplication 
+  for (int i = 0; i < n; i++)
+    for (int j = 0; j < n; j++)
+      for (int k = 0; k < n; k++)
+        C[i][j] += A[i][k] * B[k][j];
+  // copy the result back to matrix A
+  for (int i = 0; i < n; i++)
+    for (int j = 0; j < n; j++)
+      A[i][j] = C[i][j];
+}
+
+/* ----------------------------------------------------------------------
+   Check if the factorization has converged by comparing all elements of the
+   original matrix and the new matrix
+------------------------------------------------------------------------- */
+
+int PPPMDisp::check_convergence(double** A,double** Q,double** A0,
+                                double** C,double** D,double** E,int n)
+{
+  double eps = 1.0e-8;
+  int converged = 1;
+  double epsmax = -1;
+  double Bmax = 0.0;
+  double diff;
+  // get the largest eigenvalue of the original matrix
+  for (int i = 0; i < n; i++)
+    for (int j = 0; j < n; j++)
+      Bmax = (Bmax>A0[i][j])?Bmax:A0[i][j];  //max(Bmax,A0[i][j]);
+  double epsabs = eps*Bmax;
+  
+  // reconstruct the original matrix
+  // store the diagonal elements in D
+  for (int i = 0; i < n; i++)
+    for (int j = 0; j < n; j++)
+      D[i][j] = 0.0;
+  for (int i = 0; i < n; i++)
+    D[i][i] = A[i][i];
+  // store matrix Q in E
+  for (int i = 0; i < n; i++)
+    for (int j = 0; j < n; j++)
+      E[i][j] = Q[i][j];
+  // E = Q*A
+  mmult(E,D,C,n);
+  // store transpose of Q in D
+  for (int i = 0; i < n; i++)
+    for (int j = 0; j < n; j++)
+      D[i][j] = Q[j][i];
+  // E = Q*A*Q.t
+  mmult(E,D,C,n);
+
+  //compare the original matrix and the final matrix
+  for (int i = 0; i < n; i++) {
+    for (int j = 0; j < n; j++) {
+      diff = A0[i][j] - E[i][j];
+      epsmax = (epsmax>fabs(diff))?epsmax:fabs(diff);//max(epsmax,fabs(diff));
+    }
+  }
+  if (epsmax > epsabs) converged = 0;
+  return converged;
+}
+
+/* ----------------------------------------------------------------------
+   allocate memory that depends on # of K-vectors and order 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::allocate()
+{
+
+  int (*procneigh)[2] = comm->procneigh;
+
+  if (function[0]) {
+    memory->create(work1,2*nfft_both,"pppm/disp:work1");
+    memory->create(work2,2*nfft_both,"pppm/disp:work2");
+
+    memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm/disp:fkx");
+    memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm/disp:fky");
+    memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm/disp:fkz");
+
+    memory->create1d_offset(fkx2,nxlo_fft,nxhi_fft,"pppm/disp:fkx2");
+    memory->create1d_offset(fky2,nylo_fft,nyhi_fft,"pppm/disp:fky2");
+    memory->create1d_offset(fkz2,nzlo_fft,nzhi_fft,"pppm/disp:fkz2");
+
+
+    memory->create(gf_b,order,"pppm/disp:gf_b");
+    memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm/disp:rho1d");
+    memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm/disp:rho_coeff");
+    memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm/disp:rho1d");
+    memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,"pppm/disp:drho_coeff");
+
+    memory->create(greensfn,nfft_both,"pppm/disp:greensfn");
+    memory->create(vg,nfft_both,6,"pppm/disp:vg");
+    memory->create(vg2,nfft_both,3,"pppm/disp:vg2");
+
+    memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+  			    nxlo_out,nxhi_out,"pppm/disp:density_brick");
+    if ( differentiation_flag == 1) {
+      memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+  		  	      nxlo_out,nxhi_out,"pppm/disp:u_brick");
+      memory->create(sf_precoeff1,nfft_both,"pppm/disp:sf_precoeff1");
+      memory->create(sf_precoeff2,nfft_both,"pppm/disp:sf_precoeff2");
+      memory->create(sf_precoeff3,nfft_both,"pppm/disp:sf_precoeff3");
+      memory->create(sf_precoeff4,nfft_both,"pppm/disp:sf_precoeff4");
+      memory->create(sf_precoeff5,nfft_both,"pppm/disp:sf_precoeff5");
+      memory->create(sf_precoeff6,nfft_both,"pppm/disp:sf_precoeff6");
+
+    } else {
+      memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+  			      nxlo_out,nxhi_out,"pppm/disp:vdx_brick");
+      memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+			      nxlo_out,nxhi_out,"pppm/disp:vdy_brick");
+      memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+			      nxlo_out,nxhi_out,"pppm/disp:vdz_brick");
+    }
+    memory->create(density_fft,nfft_both,"pppm/disp:density_fft");
+
+    int tmp;
+
+    fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+		     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+		     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+		     0,0,&tmp);
+
+    fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+		     nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+		     nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+		     0,0,&tmp);
+
+    remap = new Remap(lmp,world,
+		      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+		      nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+		      1,0,0,FFT_PRECISION);
+
+  // create ghost grid object for rho and electric field communication
+
+  if (differentiation_flag == 1)
+    cg = new CommGrid(lmp,world,1,1,
+                      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+                      nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+  else
+    cg = new CommGrid(lmp,world,3,1,
+                      nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+                      nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+                      procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                      procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+  }
+
+  if (function[1]) {
+    memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
+    memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
+
+    memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
+    memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
+    memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
+
+    memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
+    memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
+    memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
+
+    memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
+    memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
+    memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
+    memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
+    memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
+
+    memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
+    memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
+    memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
+
+    memory->create3d_offset(density_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_g");
+    if ( differentiation_flag == 1) {
+      memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g");
+
+      memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
+      memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
+      memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
+      memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
+      memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
+      memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
+
+    }  else {
+      memory->create3d_offset(vdx_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_g");
+      memory->create3d_offset(vdy_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_g");
+      memory->create3d_offset(vdz_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_g");
+    }
+    memory->create(density_fft_g,nfft_both_6,"pppm/disp:density_fft_g");
+
+
+    int tmp;
+
+    fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
+		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+		     0,0,&tmp);
+
+    fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
+		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+		     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+		     0,0,&tmp);
+
+    remap_6 = new Remap(lmp,world,
+		      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+		      nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+		      1,0,0,FFT_PRECISION);
+
+    // create ghost grid object for rho and electric field communication
+
+    if (differentiation_flag == 1)
+      cg_6 = new CommGrid(lmp,world,1,1,
+                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+    else
+      cg_6 = new CommGrid(lmp,world,3,1,
+                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+  }
+
+  if (function[2]) {
+    memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
+    memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
+
+    memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
+    memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
+    memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
+
+    memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
+    memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
+    memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
+
+    memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
+    memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
+    memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
+    memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
+    memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
+
+    memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
+    memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
+    memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
+
+    memory->create3d_offset(density_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a0");
+    memory->create3d_offset(density_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a1");
+    memory->create3d_offset(density_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a2");
+    memory->create3d_offset(density_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a3");
+    memory->create3d_offset(density_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a4");
+    memory->create3d_offset(density_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a5");
+    memory->create3d_offset(density_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_a6");
+
+    memory->create(density_fft_a0,nfft_both_6,"pppm/disp:density_fft_a0");
+    memory->create(density_fft_a1,nfft_both_6,"pppm/disp:density_fft_a1");
+    memory->create(density_fft_a2,nfft_both_6,"pppm/disp:density_fft_a2");
+    memory->create(density_fft_a3,nfft_both_6,"pppm/disp:density_fft_a3");
+    memory->create(density_fft_a4,nfft_both_6,"pppm/disp:density_fft_a4");
+    memory->create(density_fft_a5,nfft_both_6,"pppm/disp:density_fft_a5");
+    memory->create(density_fft_a6,nfft_both_6,"pppm/disp:density_fft_a6");
+
+
+    if ( differentiation_flag == 1 ) {
+      memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0");
+      memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1");
+      memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2");
+      memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3");
+      memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4");
+      memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5");
+      memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6");
+
+      memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
+      memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
+      memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
+      memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
+      memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
+      memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
+
+    } else {
+
+      memory->create3d_offset(vdx_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a0");
+      memory->create3d_offset(vdy_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a0");
+      memory->create3d_offset(vdz_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a0");
+
+      memory->create3d_offset(vdx_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a1");
+      memory->create3d_offset(vdy_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a1");
+      memory->create3d_offset(vdz_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a1");
+
+      memory->create3d_offset(vdx_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a2");
+      memory->create3d_offset(vdy_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a2");
+      memory->create3d_offset(vdz_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a2");
+
+      memory->create3d_offset(vdx_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a3");
+      memory->create3d_offset(vdy_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a3");
+      memory->create3d_offset(vdz_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a3");
+
+      memory->create3d_offset(vdx_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a4");
+      memory->create3d_offset(vdy_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a4");
+      memory->create3d_offset(vdz_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a4");
+
+      memory->create3d_offset(vdx_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a5");
+      memory->create3d_offset(vdy_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a5");
+      memory->create3d_offset(vdz_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a5");
+
+      memory->create3d_offset(vdx_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_a6");
+      memory->create3d_offset(vdy_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_a6");
+      memory->create3d_offset(vdz_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_a6");
+    }
+
+
+
+    int tmp;
+
+    fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
+		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+		     0,0,&tmp);
+
+    fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
+		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+		     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+		     0,0,&tmp);
+
+    remap_6 = new Remap(lmp,world,
+		      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+		      nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+		      1,0,0,FFT_PRECISION);
+
+    // create ghost grid object for rho and electric field communication
+
+
+    if (differentiation_flag == 1)
+      cg_6 = new CommGrid(lmp,world,7,7,
+                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+    else
+      cg_6 = new CommGrid(lmp,world,21,7,
+                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+  }  
+
+  if (function[3]) {
+    memory->create(work1_6,2*nfft_both_6,"pppm/disp:work1_6");
+    memory->create(work2_6,2*nfft_both_6,"pppm/disp:work2_6");
+
+    memory->create1d_offset(fkx_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx_6");
+    memory->create1d_offset(fky_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky_6");
+    memory->create1d_offset(fkz_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz_6");
+
+    memory->create1d_offset(fkx2_6,nxlo_fft_6,nxhi_fft_6,"pppm/disp:fkx2_6");
+    memory->create1d_offset(fky2_6,nylo_fft_6,nyhi_fft_6,"pppm/disp:fky2_6");
+    memory->create1d_offset(fkz2_6,nzlo_fft_6,nzhi_fft_6,"pppm/disp:fkz2_6");
+
+    memory->create(gf_b_6,order_6,"pppm/disp:gf_b_6");
+    memory->create2d_offset(rho1d_6,3,-order_6/2,order_6/2,"pppm/disp:rho1d_6");
+    memory->create2d_offset(rho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:rho_coeff_6");
+    memory->create2d_offset(drho1d_6,3,-order_6/2,order_6/2,"pppm/disp:drho1d_6");
+    memory->create2d_offset(drho_coeff_6,order_6,(1-order_6)/2,order_6/2,"pppm/disp:drho_coeff_6");
+
+    memory->create(greensfn_6,nfft_both_6,"pppm/disp:greensfn_6");
+    memory->create(vg_6,nfft_both_6,6,"pppm/disp:vg_6");
+    memory->create(vg2_6,nfft_both_6,3,"pppm/disp:vg2_6");
+
+    memory->create4d_offset(density_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  			    nxlo_out_6,nxhi_out_6,"pppm/disp:density_brick_none");
+    if ( differentiation_flag == 1) {
+      memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none");
+
+      memory->create(sf_precoeff1_6,nfft_both_6,"pppm/disp:sf_precoeff1_6");
+      memory->create(sf_precoeff2_6,nfft_both_6,"pppm/disp:sf_precoeff2_6");
+      memory->create(sf_precoeff3_6,nfft_both_6,"pppm/disp:sf_precoeff3_6");
+      memory->create(sf_precoeff4_6,nfft_both_6,"pppm/disp:sf_precoeff4_6");
+      memory->create(sf_precoeff5_6,nfft_both_6,"pppm/disp:sf_precoeff5_6");
+      memory->create(sf_precoeff6_6,nfft_both_6,"pppm/disp:sf_precoeff6_6");
+
+    }  else {
+      memory->create4d_offset(vdx_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdx_brick_none");
+      memory->create4d_offset(vdy_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdy_brick_none");
+      memory->create4d_offset(vdz_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+			      nxlo_out_6,nxhi_out_6,"pppm/disp:vdz_brick_none");
+    }
+    memory->create(density_fft_none,nsplit_alloc,nfft_both_6,"pppm/disp:density_fft_none");
+
+
+    int tmp;
+
+    fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
+		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+		     0,0,&tmp);
+
+    fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
+		     nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+		     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+		     0,0,&tmp);
+
+    remap_6 = new Remap(lmp,world,
+		      nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+		      nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
+		      1,0,0,FFT_PRECISION);
+
+    // create ghost grid object for rho and electric field communication
+
+    if (differentiation_flag == 1)
+      cg_6 = new CommGrid(lmp,world,nsplit_alloc,nsplit_alloc,
+                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+    else
+      cg_6 = new CommGrid(lmp,world,3*nsplit_alloc,nsplit_alloc,
+                        nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+                        nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+                        procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                        procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+  }
+
+}
+
+/* ----------------------------------------------------------------------
+   allocate memory that depends on # of K-vectors and order
+   for per atom calculations 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::allocate_peratom()
+{
+
+  int (*procneigh)[2] = comm->procneigh;
+
+  if (function[0]) {
+
+    if (differentiation_flag != 1)
+      memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+    	                      nxlo_out,nxhi_out,"pppm/disp:u_brick");
+
+    memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+			    nxlo_out,nxhi_out,"pppm/disp:v0_brick");
+    memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+  			    nxlo_out,nxhi_out,"pppm/disp:v1_brick");
+    memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+  			    nxlo_out,nxhi_out,"pppm/disp:v2_brick");
+    memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+  			    nxlo_out,nxhi_out,"pppm/disp:v3_brick");
+    memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+  			    nxlo_out,nxhi_out,"pppm/disp:v4_brick");
+    memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+  			    nxlo_out,nxhi_out,"pppm/disp:v5_brick");
+
+    // create ghost grid object for rho and electric field communication
+
+    if (differentiation_flag == 1)
+      cg_peratom =
+        new CommGrid(lmp,world,6,1,
+                     nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+                     nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+    else
+      cg_peratom =
+        new CommGrid(lmp,world,7,1,
+                     nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+                     nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
+                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+
+  }
+
+
+  if (function[1]) {
+
+    if ( differentiation_flag != 1 )
+      memory->create3d_offset(u_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_g");
+
+    memory->create3d_offset(v0_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_g");
+    memory->create3d_offset(v1_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_g");
+    memory->create3d_offset(v2_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_g");
+    memory->create3d_offset(v3_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_g");
+    memory->create3d_offset(v4_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_g");
+    memory->create3d_offset(v5_brick_g,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_g");
+
+    // create ghost grid object for rho and electric field communication
+
+    if (differentiation_flag == 1)
+      cg_peratom_6 =
+        new CommGrid(lmp,world,6,1,
+                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+    else
+      cg_peratom_6 =
+        new CommGrid(lmp,world,7,1,
+                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+
+  }
+
+  if (function[2]) {
+   
+    if ( differentiation_flag != 1 ) {
+      memory->create3d_offset(u_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a0");
+      memory->create3d_offset(u_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a1");
+      memory->create3d_offset(u_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a2");
+      memory->create3d_offset(u_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a3");
+      memory->create3d_offset(u_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a4");
+      memory->create3d_offset(u_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a5");
+      memory->create3d_offset(u_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_a6");
+    }
+
+    memory->create3d_offset(v0_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a0");
+    memory->create3d_offset(v1_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+    	                        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a0");
+    memory->create3d_offset(v2_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a0");
+    memory->create3d_offset(v3_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a0");
+    memory->create3d_offset(v4_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a0");
+    memory->create3d_offset(v5_brick_a0,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a0");
+
+    memory->create3d_offset(v0_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a1");
+    memory->create3d_offset(v1_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+   	                        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a1");
+    memory->create3d_offset(v2_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a1");
+    memory->create3d_offset(v3_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a1");
+    memory->create3d_offset(v4_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  	  	                nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a1");
+    memory->create3d_offset(v5_brick_a1,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a1");
+
+    memory->create3d_offset(v0_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a2");
+    memory->create3d_offset(v1_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a2");
+    memory->create3d_offset(v2_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a2");
+    memory->create3d_offset(v3_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a2");
+    memory->create3d_offset(v4_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a2");
+    memory->create3d_offset(v5_brick_a2,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a2");
+
+    memory->create3d_offset(v0_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a3");
+    memory->create3d_offset(v1_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a3");
+    memory->create3d_offset(v2_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a3");
+    memory->create3d_offset(v3_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  	  	                nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a3");
+    memory->create3d_offset(v4_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a3");
+    memory->create3d_offset(v5_brick_a3,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a3");
+
+    memory->create3d_offset(v0_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a4");
+    memory->create3d_offset(v1_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a4");
+    memory->create3d_offset(v2_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a4");
+    memory->create3d_offset(v3_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a4");
+    memory->create3d_offset(v4_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a4");
+    memory->create3d_offset(v5_brick_a4,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a4");
+
+    memory->create3d_offset(v0_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a5");
+    memory->create3d_offset(v1_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a5");
+    memory->create3d_offset(v2_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a5");
+    memory->create3d_offset(v3_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a5");
+    memory->create3d_offset(v4_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a5");
+    memory->create3d_offset(v5_brick_a5,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a5");
+
+    memory->create3d_offset(v0_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  	  	                nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_a6");
+    memory->create3d_offset(v1_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_a6");
+    memory->create3d_offset(v2_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_a6");
+    memory->create3d_offset(v3_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_a6");
+    memory->create3d_offset(v4_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_a6");
+    memory->create3d_offset(v5_brick_a6,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	        nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_a6");
+
+    // create ghost grid object for rho and electric field communication
+
+    if (differentiation_flag == 1)
+      cg_peratom_6 =
+        new CommGrid(lmp,world,42,1,
+                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+    else
+      cg_peratom_6 =
+        new CommGrid(lmp,world,49,1,
+                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+
+  }  
+
+  if (function[3]) {
+
+    if ( differentiation_flag != 1 )
+      memory->create4d_offset(u_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	      nxlo_out_6,nxhi_out_6,"pppm/disp:u_brick_none");
+
+    memory->create4d_offset(v0_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v0_brick_none");
+    memory->create4d_offset(v1_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v1_brick_none");
+    memory->create4d_offset(v2_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v2_brick_none");
+    memory->create4d_offset(v3_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v3_brick_none");
+    memory->create4d_offset(v4_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v4_brick_none");
+    memory->create4d_offset(v5_brick_none,nsplit_alloc,nzlo_out_6,nzhi_out_6,nylo_out_6,nyhi_out_6,
+  		  	    nxlo_out_6,nxhi_out_6,"pppm/disp:v5_brick_none");
+
+    // create ghost grid object for rho and electric field communication
+
+    if (differentiation_flag == 1)
+      cg_peratom_6 =
+        new CommGrid(lmp,world,6*nsplit_alloc,1,
+                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+    else
+      cg_peratom_6 =
+        new CommGrid(lmp,world,7*nsplit_alloc,1,
+                     nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
+                     nxlo_out_6,nxhi_out_6,nylo_out_6,nyhi_out_6,nzlo_out_6,nzhi_out_6,
+                     procneigh[0][0],procneigh[0][1],procneigh[1][0],
+                     procneigh[1][1],procneigh[2][0],procneigh[2][1]);
+
+  }
+}
+
+
+/* ----------------------------------------------------------------------
+   deallocate memory that depends on # of K-vectors and order 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::deallocate()
+{
+  memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy(density_fft);
+  density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
+  density_fft = NULL;
+
+  memory->destroy3d_offset(density_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdx_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdy_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdz_brick_g,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy(density_fft_g);
+  density_brick_g = vdx_brick_g = vdy_brick_g = vdz_brick_g = NULL;
+  density_fft_g = NULL;
+
+  memory->destroy3d_offset(density_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdx_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdy_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdz_brick_a0,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy(density_fft_a0);
+  density_brick_a0 = vdx_brick_a0 = vdy_brick_a0 = vdz_brick_a0 = NULL;
+  density_fft_a0 = NULL;
+
+  memory->destroy3d_offset(density_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdx_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdy_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdz_brick_a1,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy(density_fft_a1);
+  density_brick_a1 = vdx_brick_a1 = vdy_brick_a1 = vdz_brick_a1 = NULL;
+  density_fft_a1 = NULL;
+
+  memory->destroy3d_offset(density_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdx_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdy_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdz_brick_a2,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy(density_fft_a2);
+  density_brick_a2 = vdx_brick_a2 = vdy_brick_a2 = vdz_brick_a2 = NULL;
+  density_fft_a2 = NULL;
+
+  memory->destroy3d_offset(density_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdx_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdy_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdz_brick_a3,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy(density_fft_a3);
+  density_brick_a3 = vdx_brick_a3 = vdy_brick_a3 = vdz_brick_a3 = NULL;
+  density_fft_a3 = NULL;
+ 
+  memory->destroy3d_offset(density_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdx_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdy_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdz_brick_a4,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy(density_fft_a4);
+  density_brick_a4 = vdx_brick_a4 = vdy_brick_a4 = vdz_brick_a4 = NULL;
+  density_fft_a4 = NULL;
+
+  memory->destroy3d_offset(density_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdx_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdy_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdz_brick_a5,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy(density_fft_a5);
+  density_brick_a5 = vdx_brick_a5 = vdy_brick_a5 = vdz_brick_a5 = NULL;
+  density_fft_a5 = NULL;
+
+  memory->destroy3d_offset(density_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdx_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdy_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy3d_offset(vdz_brick_a6,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy(density_fft_a6);
+  density_brick_a6 = vdx_brick_a6 = vdy_brick_a6 = vdz_brick_a6 = NULL;
+  density_fft_a6 = NULL;
+
+  memory->destroy4d_offset(density_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy4d_offset(vdx_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy4d_offset(vdy_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy4d_offset(vdz_brick_none,nzlo_out_6,nylo_out_6,nxlo_out_6);
+  memory->destroy(density_fft_none);
+  density_brick_none = vdx_brick_none = vdy_brick_none = vdz_brick_none = NULL;
+  density_fft_none = NULL;
+
+  memory->destroy(sf_precoeff1);
+  memory->destroy(sf_precoeff2);
+  memory->destroy(sf_precoeff3);
+  memory->destroy(sf_precoeff4);
+  memory->destroy(sf_precoeff5);
+  memory->destroy(sf_precoeff6);
+  sf_precoeff1 = sf_precoeff2 = sf_precoeff3 = sf_precoeff4 = sf_precoeff5 = sf_precoeff6 = NULL;
+
+  memory->destroy(sf_precoeff1_6);
+  memory->destroy(sf_precoeff2_6);
+  memory->destroy(sf_precoeff3_6);
+  memory->destroy(sf_precoeff4_6);
+  memory->destroy(sf_precoeff5_6);
+  memory->destroy(sf_precoeff6_6);
+  sf_precoeff1_6 = sf_precoeff2_6 = sf_precoeff3_6 = sf_precoeff4_6 = sf_precoeff5_6 = sf_precoeff6_6 = NULL;
+
+  memory->destroy(greensfn);
+  memory->destroy(greensfn_6);
+  memory->destroy(work1);
+  memory->destroy(work2);
+  memory->destroy(work1_6);
+  memory->destroy(work2_6);
+  memory->destroy(vg);
+  memory->destroy(vg2);
+  memory->destroy(vg_6);
+  memory->destroy(vg2_6);
+  greensfn = greensfn_6 = NULL;
+  work1 = work2 = work1_6 = work2_6 = NULL;
+  vg = vg2 = vg_6 = vg2_6 = NULL;
+
+  memory->destroy1d_offset(fkx,nxlo_fft);
+  memory->destroy1d_offset(fky,nylo_fft);
+  memory->destroy1d_offset(fkz,nzlo_fft);
+  fkx = fky = fkz = NULL;
+
+  memory->destroy1d_offset(fkx2,nxlo_fft);
+  memory->destroy1d_offset(fky2,nylo_fft);
+  memory->destroy1d_offset(fkz2,nzlo_fft);
+  fkx2 = fky2 = fkz2 = NULL;
+
+  memory->destroy1d_offset(fkx_6,nxlo_fft_6);
+  memory->destroy1d_offset(fky_6,nylo_fft_6);
+  memory->destroy1d_offset(fkz_6,nzlo_fft_6);
+  fkx_6 = fky_6 = fkz_6 = NULL;
+
+  memory->destroy1d_offset(fkx2_6,nxlo_fft_6);
+  memory->destroy1d_offset(fky2_6,nylo_fft_6);
+  memory->destroy1d_offset(fkz2_6,nzlo_fft_6);
+  fkx2_6 = fky2_6 = fkz2_6 = NULL;
+
+
+  memory->destroy(gf_b);
+  memory->destroy2d_offset(rho1d,-order/2);
+  memory->destroy2d_offset(rho_coeff,(1-order)/2);
+  memory->destroy2d_offset(drho1d,-order/2);
+  memory->destroy2d_offset(drho_coeff, (1-order)/2);
+  gf_b = NULL;
+  rho1d = rho_coeff = drho1d = drho_coeff = NULL;
+
+  memory->destroy(gf_b_6);
+  memory->destroy2d_offset(rho1d_6,-order_6/2);
+  memory->destroy2d_offset(rho_coeff_6,(1-order_6)/2);
+  memory->destroy2d_offset(drho1d_6,-order_6/2); 
+  memory->destroy2d_offset(drho_coeff_6,(1-order_6)/2);
+  gf_b_6 = NULL;
+  rho1d_6 = rho_coeff_6 = drho1d_6 = drho_coeff_6 = NULL;
+
+  delete fft1;
+  delete fft2;
+  delete remap;
+  delete cg;
+  fft1 = fft2 = NULL;
+  remap = NULL;
+  cg = NULL;
+
+  delete fft1_6;
+  delete fft2_6;
+  delete remap_6;
+  delete cg_6;
+  fft1_6 = fft2_6 = NULL;
+  remap_6 = NULL;
+  cg_6 = NULL;
+}
+
+
+/* ----------------------------------------------------------------------
+   deallocate memory that depends on # of K-vectors and order
+   for per atom calculations 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::deallocate_peratom()
+{
+  peratom_allocate_flag = 0;
+
+  memory->destroy3d_offset(u_brick, nzlo_out, nylo_out, nxlo_out);
+  memory->destroy3d_offset(v0_brick, nzlo_out, nylo_out, nxlo_out);
+  memory->destroy3d_offset(v1_brick, nzlo_out, nylo_out, nxlo_out);
+  memory->destroy3d_offset(v2_brick, nzlo_out, nylo_out, nxlo_out);
+  memory->destroy3d_offset(v3_brick, nzlo_out, nylo_out, nxlo_out);
+  memory->destroy3d_offset(v4_brick, nzlo_out, nylo_out, nxlo_out);
+  memory->destroy3d_offset(v5_brick, nzlo_out, nylo_out, nxlo_out);
+  u_brick = v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
+
+  memory->destroy3d_offset(u_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v0_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v1_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v2_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v3_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v4_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v5_brick_g, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  u_brick_g = v0_brick_g = v1_brick_g = v2_brick_g = v3_brick_g = v4_brick_g = v5_brick_g = NULL;
+
+  memory->destroy3d_offset(u_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v0_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v1_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v2_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v3_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v4_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v5_brick_a0, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  u_brick_a0 = v0_brick_a0 = v1_brick_a0 = v2_brick_a0 = v3_brick_a0 = v4_brick_a0 = v5_brick_a0 = NULL;
+
+  memory->destroy3d_offset(u_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v0_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v1_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v2_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v3_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v4_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v5_brick_a1, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  u_brick_a1 = v0_brick_a1 = v1_brick_a1 = v2_brick_a1 = v3_brick_a1 = v4_brick_a1 = v5_brick_a1 = NULL;
+
+  memory->destroy3d_offset(u_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v0_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v1_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v2_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v3_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v4_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v5_brick_a2, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  u_brick_a2 = v0_brick_a2 = v1_brick_a2 = v2_brick_a2 = v3_brick_a2 = v4_brick_a2 = v5_brick_a2 = NULL;
+
+  memory->destroy3d_offset(u_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v0_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v1_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v2_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v3_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v4_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v5_brick_a3, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  u_brick_a3 = v0_brick_a3 = v1_brick_a3 = v2_brick_a3 = v3_brick_a3 = v4_brick_a3 = v5_brick_a3 = NULL;
+ 
+  memory->destroy3d_offset(u_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v0_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v1_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v2_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v3_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v4_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v5_brick_a4, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  u_brick_a4 = v0_brick_a4 = v1_brick_a4 = v2_brick_a4 = v3_brick_a4 = v4_brick_a4 = v5_brick_a4 = NULL;
+ 
+  memory->destroy3d_offset(u_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v0_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v1_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v2_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v3_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v4_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v5_brick_a5, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  u_brick_a5 = v0_brick_a5 = v1_brick_a5 = v2_brick_a5 = v3_brick_a5 = v4_brick_a5 = v5_brick_a5 = NULL;
+
+  memory->destroy3d_offset(u_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v0_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v1_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v2_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v3_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v4_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy3d_offset(v5_brick_a6, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  u_brick_a6 = v0_brick_a6 = v1_brick_a6 = v2_brick_a6 = v3_brick_a6 = v4_brick_a6 = v5_brick_a6 = NULL;
+
+  memory->destroy4d_offset(u_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy4d_offset(v0_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy4d_offset(v1_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy4d_offset(v2_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy4d_offset(v3_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy4d_offset(v4_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  memory->destroy4d_offset(v5_brick_none, nzlo_out_6, nylo_out_6, nxlo_out_6);
+  u_brick_none = v0_brick_none = v1_brick_none = v2_brick_none = v3_brick_none = v4_brick_none = v5_brick_none = NULL;
+
+  delete cg_peratom;
+  delete cg_peratom_6;
+  cg_peratom = cg_peratom_6 = NULL;
+}
+
+/* ----------------------------------------------------------------------
+   set size of FFT grid (nx,ny,nz_pppm) and g_ewald
+   for Coulomb interactions
+------------------------------------------------------------------------- */
+
+void PPPMDisp::set_grid()
+{
+  double q2 = qsqsum * force->qqrd2e;
+
+  // use xprd,yprd,zprd even if triclinic so grid size is the same
+  // adjust z dimension for 2d slab PPPM
+  // 3d PPPM just uses zprd since slab_volfactor = 1.0
+
+  double xprd = domain->xprd;
+  double yprd = domain->yprd;
+  double zprd = domain->zprd;
+  double zprd_slab = zprd*slab_volfactor;
+  
+  // make initial g_ewald estimate
+  // based on desired accuracy and real space cutoff
+  // fluid-occupied volume used to estimate real-space error
+  // zprd used rather than zprd_slab
+
+  double h, h_x,h_y,h_z;
+  bigint natoms = atom->natoms;
+
+  if (!gewaldflag) {
+    g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
+    if (g_ewald >= 1.0)  
+      error->all(FLERR,"KSpace accuracy too large to estimate G vector");
+    g_ewald = sqrt(-log(g_ewald)) / cutoff;
+  } 
+
+  // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
+  // nz_pppm uses extended zprd_slab instead of zprd
+  // reduce it until accuracy target is met
+
+  if (!gridflag) {
+    h = h_x = h_y = h_z = 4.0/g_ewald;  
+    int count = 0;
+    while (1) {
+      
+      // set grid dimension
+      nx_pppm = static_cast<int> (xprd/h_x);
+      ny_pppm = static_cast<int> (yprd/h_y);
+      nz_pppm = static_cast<int> (zprd_slab/h_z);
+
+      if (nx_pppm <= 1) nx_pppm = 2;
+      if (ny_pppm <= 1) ny_pppm = 2;
+      if (nz_pppm <= 1) nz_pppm = 2;
+
+      //set local grid dimension
+      int npey_fft,npez_fft;
+      if (nz_pppm >= nprocs) {
+        npey_fft = 1;
+        npez_fft = nprocs;
+      } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
+
+      int me_y = me % npey_fft;
+      int me_z = me / npey_fft;
+
+      nxlo_fft = 0;
+      nxhi_fft = nx_pppm - 1;
+      nylo_fft = me_y*ny_pppm/npey_fft;
+      nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
+      nzlo_fft = me_z*nz_pppm/npez_fft;
+      nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
+
+      double qopt = compute_qopt();
+   
+      double dfkspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
+
+      count++;
+
+      // break loop if the accuracy has been reached or too many loops have been performed
+      if (dfkspace <= accuracy) break;
+      if (count > 500) error->all(FLERR, "Could not compute grid size for Coulomb interaction");
+      h *= 0.95;
+      h_x = h_y = h_z = h;
+    }
+  }
+  
+  // boost grid size until it is factorable
+
+  while (!factorable(nx_pppm)) nx_pppm++;
+  while (!factorable(ny_pppm)) ny_pppm++;
+  while (!factorable(nz_pppm)) nz_pppm++;
+}
+
+/* ----------------------------------------------------------------------
+   set the FFT parameters 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::set_fft_parameters(int& nx_p,int& ny_p,int& nz_p,
+                                   int& nxlo_f,int& nylo_f,int& nzlo_f,
+                                   int& nxhi_f,int& nyhi_f,int& nzhi_f,
+                                   int& nxlo_i,int& nylo_i,int& nzlo_i,
+                                   int& nxhi_i,int& nyhi_i,int& nzhi_i,
+                                   int& nxlo_o,int& nylo_o,int& nzlo_o,
+                                   int& nxhi_o,int& nyhi_o,int& nzhi_o,
+		                   int& nlow, int& nupp,
+                                   int& ng, int& nf, int& nfb,
+		                   double& sft,double& sftone, int& ord)
+{
+  // global indices of PPPM grid range from 0 to N-1
+  // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
+  //   global PPPM grid that I own without ghost cells
+  // for slab PPPM, assign z grid as if it were not extended
+
+  nxlo_i = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_p);
+  nxhi_i = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_p) - 1;
+
+  nylo_i = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_p);
+  nyhi_i = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_p) - 1;
+
+  nzlo_i = static_cast<int> 
+      (comm->zsplit[comm->myloc[2]] * nz_p/slab_volfactor);
+  nzhi_i = static_cast<int> 
+      (comm->zsplit[comm->myloc[2]+1] * nz_p/slab_volfactor) - 1;
+
+
+  // nlow,nupp = stencil size for mapping particles to PPPM grid
+
+  nlow = -(ord-1)/2;
+  nupp = ord/2;
+
+  // sft values for particle <-> grid mapping
+  // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
+
+  if (ord % 2) sft = OFFSET + 0.5;
+  else sft = OFFSET;
+  if (ord % 2) sftone = 0.0;
+  else sftone = 0.5;
+
+  // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
+  //   global PPPM grid that my particles can contribute charge to
+  // effectively nlo_in,nhi_in + ghost cells
+  // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
+  //           position a particle in my box can be at
+  // dist[3] = particle position bound = subbox + skin/2.0 + qdist
+  //   qdist = offset due to TIP4P fictitious charge
+  //   convert to triclinic if necessary
+  // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
+  // for slab PPPM, assign z grid as if it were not extended
+
+  double *prd,*sublo,*subhi;
+
+  if (triclinic == 0) {
+    prd = domain->prd;
+    boxlo = domain->boxlo;
+    sublo = domain->sublo;
+    subhi = domain->subhi;
+  } else {
+    prd = domain->prd_lamda;
+    boxlo = domain->boxlo_lamda;
+    sublo = domain->sublo_lamda;
+    subhi = domain->subhi_lamda;
+  }
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+
+  double dist[3];
+  double cuthalf = 0.5*neighbor->skin + qdist;
+  if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
+  else {
+    dist[0] = cuthalf/domain->prd[0];
+    dist[1] = cuthalf/domain->prd[1];
+    dist[2] = cuthalf/domain->prd[2];
+  }
+    
+  int nlo,nhi;
+    
+  nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) * 
+                            nx_p/xprd + sft) - OFFSET;
+  nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) * 
+                            nx_p/xprd + sft) - OFFSET;
+  nxlo_o = nlo + nlow;
+  nxhi_o = nhi + nupp;
+
+  nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) * 
+                            ny_p/yprd + sft) - OFFSET;
+  nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) * 
+                            ny_p/yprd + sft) - OFFSET;
+  nylo_o = nlo + nlow;
+  nyhi_o = nhi + nupp;
+
+  nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) * 
+                            nz_p/zprd_slab + sft) - OFFSET;
+  nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) * 
+                            nz_p/zprd_slab + sft) - OFFSET;
+  nzlo_o = nlo + nlow;
+  nzhi_o = nhi + nupp;
+
+  // for slab PPPM, change the grid boundary for processors at +z end
+  //   to include the empty volume between periodically repeating slabs
+  // for slab PPPM, want charge data communicated from -z proc to +z proc,
+  //   but not vice versa, also want field data communicated from +z proc to
+  //   -z proc, but not vice versa
+  // this is accomplished by nzhi_i = nzhi_o on +z end (no ghost cells)
+
+  if (slabflag && (comm->myloc[2] == comm->procgrid[2]-1)) {
+    nzhi_i = nz_p - 1;
+    nzhi_o = nz_p - 1;
+  }
+  
+  // decomposition of FFT mesh
+  // global indices range from 0 to N-1
+  // proc owns entire x-dimension, clump of columns in y,z dimensions
+  // npey_fft,npez_fft = # of procs in y,z dims
+  // if nprocs is small enough, proc can own 1 or more entire xy planes,
+  //   else proc owns 2d sub-blocks of yz plane
+  // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
+  // nlo_fft,nhi_fft = lower/upper limit of the section
+  //   of the global FFT mesh that I own
+
+  int npey_fft,npez_fft;
+  if (nz_p >= nprocs) {
+    npey_fft = 1;
+    npez_fft = nprocs;
+  } else procs2grid2d(nprocs,ny_p,nz_p,&npey_fft,&npez_fft);
+
+  int me_y = me % npey_fft;
+  int me_z = me / npey_fft;
+
+  nxlo_f = 0;
+  nxhi_f = nx_p - 1;
+  nylo_f = me_y*ny_p/npey_fft;
+  nyhi_f = (me_y+1)*ny_p/npey_fft - 1;
+  nzlo_f = me_z*nz_p/npez_fft;
+  nzhi_f = (me_z+1)*nz_p/npez_fft - 1;
+
+  // PPPM grid for this proc, including ghosts
+
+  ng = (nxhi_o-nxlo_o+1) * (nyhi_o-nylo_o+1) *
+    (nzhi_o-nzlo_o+1);
+
+  // FFT arrays on this proc, without ghosts
+  // nfft = FFT points in FFT decomposition on this proc
+  // nfft_brick = FFT points in 3d brick-decomposition on this proc
+  // nfft_both = greater of 2 values
+
+  nf = (nxhi_f-nxlo_f+1) * (nyhi_f-nylo_f+1) *
+    (nzhi_f-nzlo_f+1);
+  int nfft_brick = (nxhi_i-nxlo_i+1) * (nyhi_i-nylo_i+1) *
+    (nzhi_i-nzlo_i+1);
+  nfb = MAX(nf,nfft_brick);
+
+}
+
+/* ----------------------------------------------------------------------
+   check if all factors of n are in list of factors
+   return 1 if yes, 0 if no 
+------------------------------------------------------------------------- */
+
+int PPPMDisp::factorable(int n)
+{
+  int i;
+
+  while (n > 1) {
+    for (i = 0; i < nfactors; i++) {
+      if (n % factors[i] == 0) {
+	n /= factors[i];
+	break;
+      }
+    }
+    if (i == nfactors) return 0;
+  }
+
+  return 1;
+}
+
+/* ----------------------------------------------------------------------
+   pre-compute Green's function denominator expansion coeffs, Gamma(2n) 
+------------------------------------------------------------------------- */
+void PPPMDisp::adjust_gewald()
+{
+  
+  // Use Newton solver to find g_ewald
+
+  double dx;
+        
+  // Begin algorithm
+  
+  for (int i = 0; i < LARGE; i++) {
+    dx = f() / derivf(); 
+    g_ewald -= dx; //Update g_ewald
+    if (fabs(f()) < SMALL) return;
+  }
+   
+  // Failed to converge
+  
+  char str[128];
+  sprintf(str, "Could not compute g_ewald");
+  error->all(FLERR, str);
+
+}
+
+/* ----------------------------------------------------------------------
+ Calculate f(x)
+ ------------------------------------------------------------------------- */
+
+double PPPMDisp::f()
+{
+  double df_rspace, df_kspace;
+  double q2 = qsqsum * force->qqrd2e;
+  double xprd = domain->xprd;
+  double yprd = domain->yprd;
+  double zprd = domain->zprd;
+  double zprd_slab = zprd*slab_volfactor;
+  bigint natoms = atom->natoms;
+
+  df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) / 
+       sqrt(natoms*cutoff*xprd*yprd*zprd);
+   
+  double qopt = compute_qopt();
+  df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
+   
+  return df_rspace - df_kspace;
+}
+
+/* ----------------------------------------------------------------------
+ Calculate numerical derivative f'(x) using forward difference
+ [f(x + h) - f(x)] / h
+ ------------------------------------------------------------------------- */
+            
+double PPPMDisp::derivf()
+{  
+  double h = 0.000001;  //Derivative step-size
+  double df,f1,f2,g_ewald_old;
+  
+  f1 = f();
+  g_ewald_old = g_ewald;
+  g_ewald += h;
+  f2 = f();
+  g_ewald = g_ewald_old;
+  df = (f2 - f1)/h;
+  
+  return df;
+} 
+
+/* ----------------------------------------------------------------------
+   Calculate the final estimator for the accuracy
+------------------------------------------------------------------------- */
+
+double PPPMDisp::final_accuracy()
+{
+  double df_rspace, df_kspace;
+  double q2 = qsqsum * force->qqrd2e;
+  double xprd = domain->xprd;
+  double yprd = domain->yprd;
+  double zprd = domain->zprd;
+  double zprd_slab = zprd*slab_volfactor;
+  bigint natoms = atom->natoms;
+  df_rspace = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / 
+             sqrt(natoms*cutoff*xprd*yprd*zprd);
+
+  double qopt = compute_qopt();
+
+  df_kspace = sqrt(qopt/natoms)*q2/(xprd*yprd*zprd_slab);
+
+  double acc = sqrt(df_rspace*df_rspace + df_kspace*df_kspace);
+  return acc;
+}
+
+/* ----------------------------------------------------------------------
+   Calculate the final estimator for the Dispersion accuracy
+------------------------------------------------------------------------- */
+
+void PPPMDisp::final_accuracy_6(double& acc, double& acc_real, double& acc_kspace)
+{
+  double df_rspace, df_kspace;
+  double xprd = domain->xprd;
+  double yprd = domain->yprd;
+  double zprd = domain->zprd;
+  double zprd_slab = zprd*slab_volfactor;
+  bigint natoms = atom->natoms;
+  acc_real = lj_rspace_error();
+
+  double qopt = compute_qopt_6();
+
+  acc_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
+
+  acc = sqrt(acc_real*acc_real + acc_kspace*acc_kspace);
+  return;
+}
+
+/* ----------------------------------------------------------------------
+   Compute qopt for Coulomb interactions
+------------------------------------------------------------------------- */
+
+double PPPMDisp::compute_qopt()
+{
+  double qopt;
+  if (differentiation_flag == 1) {
+    qopt = compute_qopt_ad();
+  } else {
+    qopt = compute_qopt_ik();
+  }
+  double qopt_all;
+  MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
+  return qopt_all;
+}
+
+/* ----------------------------------------------------------------------
+   Compute qopt for Dispersion interactions
+------------------------------------------------------------------------- */
+
+double PPPMDisp::compute_qopt_6()
+{
+  double qopt;
+  if (differentiation_flag == 1) {
+    qopt = compute_qopt_6_ad();
+  } else {
+    qopt = compute_qopt_6_ik();
+  }
+  double qopt_all;
+  MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
+  return qopt_all;
+}
+
+/* ----------------------------------------------------------------------
+   Compute qopt for the ik differentiation scheme and Coulomb interaction
+------------------------------------------------------------------------- */
+
+double PPPMDisp::compute_qopt_ik()
+{
+  double qopt = 0.0;
+  int k,l,m;
+  double *prd;
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+
+  double unitkx = (2.0*MY_PI/xprd);
+  double unitky = (2.0*MY_PI/yprd);
+  double unitkz = (2.0*MY_PI/zprd_slab);
+
+  int nx,ny,nz,kper,lper,mper;
+  double sqk, u2;
+  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+  double sum1,sum2, sum3,dot1,dot2;
+
+  int nbx = 2;
+  int nby = 2;
+  int nbz = 2;
+
+  for (m = nzlo_fft; m <= nzhi_fft; m++) {
+    mper = m - nz_pppm*(2*m/nz_pppm);
+
+    for (l = nylo_fft; l <= nyhi_fft; l++) {
+      lper = l - ny_pppm*(2*l/ny_pppm);
+
+      for (k = nxlo_fft; k <= nxhi_fft; k++) {
+        kper = k - nx_pppm*(2*k/nx_pppm);
+      
+        sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + 
+          pow(unitkz*mper,2.0);
+
+        if (sqk != 0.0) {
+          sum1 = 0.0;
+          sum2 = 0.0;
+          sum3 = 0.0;
+          for (nx = -nbx; nx <= nbx; nx++) {
+            qx = unitkx*(kper+nx_pppm*nx);
+            sx = exp(-0.25*pow(qx/g_ewald,2.0));
+            wx = 1.0;
+            argx = 0.5*qx*xprd/nx_pppm;
+            if (argx != 0.0) wx = pow(sin(argx)/argx,order);
+            for (ny = -nby; ny <= nby; ny++) {
+              qy = unitky*(lper+ny_pppm*ny);
+              sy = exp(-0.25*pow(qy/g_ewald,2.0));
+              wy = 1.0;
+              argy = 0.5*qy*yprd/ny_pppm;
+              if (argy != 0.0) wy = pow(sin(argy)/argy,order);
+              for (nz = -nbz; nz <= nbz; nz++) {
+                qz = unitkz*(mper+nz_pppm*nz);
+                sz = exp(-0.25*pow(qz/g_ewald,2.0));
+                wz = 1.0;
+                argz = 0.5*qz*zprd_slab/nz_pppm;
+                if (argz != 0.0) wz = pow(sin(argz)/argz,order);
+
+                dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
+                dot2 = qx*qx+qy*qy+qz*qz;
+                u2 =  pow(wx*wy*wz,2.0);
+                sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI;
+                sum2 += u2*sx*sy*sz*4.0*MY_PI/dot2*dot1;
+		sum3 += u2;
+              }
+            }
+          }
+	  sum2 *= sum2;
+	  sum3 *= sum3*sqk;
+          qopt += sum1 -sum2/sum3;
+        }
+      }
+    }
+  }
+  return qopt;
+}
+
+/* ----------------------------------------------------------------------
+   Compute qopt for the ad differentiation scheme and Coulomb interaction
+------------------------------------------------------------------------- */
+
+double PPPMDisp::compute_qopt_ad()
+{
+  double qopt = 0.0;
+  int k,l,m;
+  double *prd;
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+
+
+  double unitkx = (2.0*MY_PI/xprd);
+  double unitky = (2.0*MY_PI/yprd);
+  double unitkz = (2.0*MY_PI/zprd_slab);
+
+  int nx,ny,nz,kper,lper,mper;
+  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+  double u2, sqk;
+  double sum1,sum2,sum3,sum4,dot2;
+  double numerator;
+
+  int nbx = 2;
+  int nby = 2;
+  int nbz = 2;
+  double form = 1.0;
+
+  for (m = nzlo_fft; m <= nzhi_fft; m++) {
+    mper = m - nz_pppm*(2*m/nz_pppm);
+
+    for (l = nylo_fft; l <= nyhi_fft; l++) {
+      lper = l - ny_pppm*(2*l/ny_pppm);
+
+      for (k = nxlo_fft; k <= nxhi_fft; k++) {
+        kper = k - nx_pppm*(2*k/nx_pppm);
+      
+        sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + 
+          pow(unitkz*mper,2.0);
+
+        if (sqk != 0.0) {
+          numerator = form*12.5663706;
+    
+          sum1 = 0.0;
+          sum2 = 0.0;
+          sum3 = 0.0;
+          sum4 = 0.0;
+          for (nx = -nbx; nx <= nbx; nx++) {
+            qx = unitkx*(kper+nx_pppm*nx);
+            sx = exp(-0.25*pow(qx/g_ewald,2.0));
+            wx = 1.0;
+            argx = 0.5*qx*xprd/nx_pppm;
+            if (argx != 0.0) wx = pow(sin(argx)/argx,order);
+            for (ny = -nby; ny <= nby; ny++) {
+              qy = unitky*(lper+ny_pppm*ny);
+              sy = exp(-0.25*pow(qy/g_ewald,2.0));
+              wy = 1.0;
+              argy = 0.5*qy*yprd/ny_pppm;
+              if (argy != 0.0) wy = pow(sin(argy)/argy,order);
+              for (nz = -nbz; nz <= nbz; nz++) {
+                qz = unitkz*(mper+nz_pppm*nz);
+                sz = exp(-0.25*pow(qz/g_ewald,2.0));
+                wz = 1.0;
+                argz = 0.5*qz*zprd_slab/nz_pppm;
+                if (argz != 0.0) wz = pow(sin(argz)/argz,order);
+
+                dot2 = qx*qx+qy*qy+qz*qz;
+                u2 =  pow(wx*wy*wz,2.0);
+                sum1 += sx*sy*sz*sx*sy*sz/dot2*4.0*4.0*MY_PI*MY_PI;
+                sum2 += sx*sy*sz * u2*4.0*MY_PI;
+                sum3 += u2;
+                sum4 += dot2*u2;
+              }
+            }
+          }
+          sum2 *= sum2;
+          qopt += sum1 - sum2/(sum3*sum4);
+        }
+      }
+    }
+  }
+  return qopt;
+}
+
+/* ----------------------------------------------------------------------
+   Compute qopt for the ik differentiation scheme and Dispersion interaction
+------------------------------------------------------------------------- */
+
+double PPPMDisp::compute_qopt_6_ik()
+{
+  double qopt = 0.0;
+  int k,l,m,n;
+  double *prd;
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+
+  double unitkx = (2.0*MY_PI/xprd);
+  double unitky = (2.0*MY_PI/yprd);
+  double unitkz = (2.0*MY_PI/zprd_slab);
+
+  int nx,ny,nz,kper,lper,mper;
+  double sqk, u2;
+  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+  double sum1,sum2, sum3;
+  double dot1,dot2, rtdot2, term;
+  double inv2ew = 2*g_ewald_6;
+  inv2ew = 1.0/inv2ew;
+  double rtpi = sqrt(MY_PI);
+
+  int nbx = 2;
+  int nby = 2;
+  int nbz = 2;
+
+  n = 0;
+  for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
+    mper = m - nz_pppm_6*(2*m/nz_pppm_6);
+
+    for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
+      lper = l - ny_pppm_6*(2*l/ny_pppm_6);
+
+      for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
+        kper = k - nx_pppm_6*(2*k/nx_pppm_6);
+      
+        sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + 
+          pow(unitkz*mper,2.0);
+
+        if (sqk != 0.0) {
+          sum1 = 0.0;
+          sum2 = 0.0;
+          sum3 = 0.0;
+          for (nx = -nbx; nx <= nbx; nx++) {
+            qx = unitkx*(kper+nx_pppm_6*nx);
+            sx = exp(-qx*qx*inv2ew*inv2ew);
+            wx = 1.0;
+            argx = 0.5*qx*xprd/nx_pppm_6;
+            if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
+            for (ny = -nby; ny <= nby; ny++) {
+              qy = unitky*(lper+ny_pppm_6*ny);
+              sy = exp(-qy*qy*inv2ew*inv2ew);
+              wy = 1.0;
+              argy = 0.5*qy*yprd/ny_pppm_6;
+              if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
+              for (nz = -nbz; nz <= nbz; nz++) {
+                qz = unitkz*(mper+nz_pppm_6*nz);
+                sz = exp(-qz*qz*inv2ew*inv2ew);
+                wz = 1.0;
+                argz = 0.5*qz*zprd_slab/nz_pppm_6;
+                if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
+
+                dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
+                dot2 = qx*qx+qy*qy+qz*qz;
+                rtdot2 = sqrt(dot2);
+                term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz +
+		       2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew);
+                term *= g_ewald_6*g_ewald_6*g_ewald_6;
+                u2 =  pow(wx*wy*wz,2.0);
+                sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2;
+                sum2 += -u2*term*MY_PI*rtpi/3.0*dot1;
+		sum3 += u2;
+              }
+            }
+          }
+	  sum2 *= sum2;
+	  sum3 *= sum3*sqk;
+          qopt += sum1 -sum2/sum3;
+        }
+      }
+    }
+  }
+  return qopt;
+}
+
+/* ----------------------------------------------------------------------
+   Compute qopt for the ad differentiation scheme and Dispersion interaction
+------------------------------------------------------------------------- */
+
+double PPPMDisp::compute_qopt_6_ad()
+{
+  double qopt = 0.0;
+  int k,l,m;
+  double *prd;
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+
+  double unitkx = (2.0*MY_PI/xprd);
+  double unitky = (2.0*MY_PI/yprd);
+  double unitkz = (2.0*MY_PI/zprd_slab);
+
+  int nx,ny,nz,kper,lper,mper;
+  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+  double u2, sqk;
+  double sum1,sum2,sum3,sum4;
+  double dot2, rtdot2, term;
+  double inv2ew = 2*g_ewald_6;
+  inv2ew = 1/inv2ew;
+  double rtpi = sqrt(MY_PI);
+
+  int nbx = 2;
+  int nby = 2;
+  int nbz = 2;
+
+  for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
+    mper = m - nz_pppm_6*(2*m/nz_pppm_6);
+
+    for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
+      lper = l - ny_pppm_6*(2*l/ny_pppm_6);
+
+      for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
+        kper = k - nx_pppm_6*(2*k/nx_pppm_6);
+      
+        sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) + 
+          pow(unitkz*mper,2.0);
+
+        if (sqk != 0.0) {
+    
+          sum1 = 0.0;
+          sum2 = 0.0;
+          sum3 = 0.0;
+          sum4 = 0.0;
+          for (nx = -nbx; nx <= nbx; nx++) {
+            qx = unitkx*(kper+nx_pppm_6*nx);
+            sx = exp(-qx*qx*inv2ew*inv2ew);
+            wx = 1.0;
+            argx = 0.5*qx*xprd/nx_pppm_6;
+            if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
+            for (ny = -nby; ny <= nby; ny++) {
+              qy = unitky*(lper+ny_pppm_6*ny);
+              sy = exp(-qy*qy*inv2ew*inv2ew);
+              wy = 1.0;
+              argy = 0.5*qy*yprd/ny_pppm_6;
+              if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
+              for (nz = -nbz; nz <= nbz; nz++) {
+                qz = unitkz*(mper+nz_pppm_6*nz);
+                sz = exp(-qz*qz*inv2ew*inv2ew);
+                wz = 1.0;
+                argz = 0.5*qz*zprd_slab/nz_pppm_6;
+                if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
+
+                dot2 = qx*qx+qy*qy+qz*qz;
+                rtdot2 = sqrt(dot2);
+                term = (1-2*dot2*inv2ew*inv2ew)*sx*sy*sz +
+		       2*dot2*rtdot2*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtdot2*inv2ew);
+                term *= g_ewald_6*g_ewald_6*g_ewald_6;
+                u2 =  pow(wx*wy*wz,2.0);
+                sum1 += term*term*MY_PI*MY_PI*MY_PI/9.0 * dot2;
+                sum2 += -term*MY_PI*rtpi/3.0 * u2 * dot2;
+                sum3 += u2;
+                sum4 += dot2*u2;
+              }
+            }
+          }
+          sum2 *= sum2;
+          qopt += sum1 - sum2/(sum3*sum4);
+        }
+      }
+    }
+  }
+  return qopt;
+}
+
+/* ----------------------------------------------------------------------
+   set size of FFT grid  and g_ewald_6
+   for Dispersion interactions
+------------------------------------------------------------------------- */
+
+void PPPMDisp::set_grid_6()
+{
+  // Calculate csum
+  if (!csumflag) calc_csum();
+  if (!gewaldflag_6) set_init_g6();
+  if (!gridflag_6) set_n_pppm_6();
+  while (!factorable(nx_pppm_6)) nx_pppm_6++;
+  while (!factorable(ny_pppm_6)) ny_pppm_6++;
+  while (!factorable(nz_pppm_6)) nz_pppm_6++;
+  
+}
+
+/* ----------------------------------------------------------------------
+   Calculate the sum of the squared dispersion coefficients and other 
+   related quantities required for the calculations
+------------------------------------------------------------------------- */
+
+void PPPMDisp::calc_csum()
+{
+  csumij = 0.0;
+  csum = 0.0;
+
+  int ntypes = atom->ntypes;   
+  int i,j,k;
+
+  delete [] cii;
+  cii = new double[ntypes +1];
+  for (i = 0; i<=ntypes; i++) cii[i] = 0.0;
+  delete [] csumi; 
+  csumi = new double[ntypes +1];
+  for (i = 0; i<=ntypes; i++) csumi[i] = 0.0; 
+  int *neach = new int[ntypes+1];
+  for (i = 0; i<=ntypes; i++) neach[i] = 0; 
+
+  //the following variables are needed to distinguish between arithmetic
+  //  and geometric mixing
+
+  double mix1;    // scales 20/16 to 4
+  int mix2;       // shifts the value to the sigma^3 value
+  int mix3;       // shifts the value to the right atom type
+  if (function[1]) {
+    for (i = 1; i <= ntypes; i++)
+      cii[i] = B[i]*B[i];
+    int tmp;
+    for (i = 0; i < atom->nlocal; i++) {
+      tmp = atom->type[i];
+      neach[tmp]++;
+      csum += B[tmp]*B[tmp];
+    }
+  }
+  if (function[2]) {
+    for (i = 1; i <= ntypes; i++)
+      cii[i] = 64.0/20.0*B[7*i+3]*B[7*i+3];
+    int tmp;
+    for (i = 0; i < atom->nlocal; i++) {
+      tmp = atom->type[i];
+      neach[tmp]++;
+      csum += 64.0/20.0*B[7*tmp+3]*B[7*tmp+3];
+    }
+  }
+  if (function[3]) {
+    for (i = 1; i <= ntypes; i++)
+      for (j = 0; j < nsplit; j++)
+        cii[i] += B[j]*B[nsplit*i + j]*B[nsplit*i + j];
+    int tmp;
+    for (i = 0; i < atom->nlocal; i++) {
+      tmp = atom->type[i];
+      neach[tmp]++;
+      for (j = 0; j < nsplit; j++)
+        csum += B[j]*B[nsplit*tmp + j]*B[nsplit*tmp + j];
+    }
+  }
+
+
+  double tmp2;
+  MPI_Allreduce(&csum,&tmp2,1,MPI_DOUBLE,MPI_SUM,world);
+  csum = tmp2;
+  csumflag = 1;
+
+  int *neach_all = new int[ntypes+1];
+  MPI_Allreduce(neach,neach_all,ntypes+1,MPI_INT,MPI_SUM,world);
+
+  // copmute csumij and csumi
+  double d1, d2;
+  if (function[1]){
+    for (i=1; i<=ntypes; i++) {
+      for (j=1; j<=ntypes; j++) {
+        csumi[i] += neach_all[j]*B[i]*B[j];
+        d1 = neach_all[i]*B[i];
+        d2 = neach_all[j]*B[j];
+        csumij += d1*d2;
+        //csumij += neach_all[i]*neach_all[j]*B[i]*B[j]; 
+      }
+    }
+  }
+  if (function[2]) {
+    for (i=1; i<=ntypes; i++) {
+      for (j=1; j<=ntypes; j++) {
+        for (k=0; k<=6; k++) {
+          csumi[i] += neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1];
+          d1 = neach_all[i]*B[7*i + k];
+          d2 = neach_all[j]*B[7*(j+1)-k-1];
+          csumij += d1*d2;
+          //csumij += neach_all[i]*neach_all[j]*B[7*i + k]*B[7*(j+1)-k-1];
+        }
+      }
+    }
+  }
+  if (function[3]) {
+    for (i=1; i<=ntypes; i++) {
+      for (j=1; j<=ntypes; j++) {
+        for (k=0; k<nsplit; k++) {
+	  csumi[i] += neach_all[j]*B[k]*B[nsplit*i+k]*B[nsplit*j+k];
+	  d1 = neach_all[i]*B[nsplit*i+k];
+	  d2 = neach_all[j]*B[nsplit*j+k];
+          csumij += B[k]*d1*d2;
+	}
+      }
+    }
+  }    
+
+  delete [] neach;
+  delete [] neach_all;
+}
+
+/* ----------------------------------------------------------------------
+   adjust g_ewald_6 to the new grid size
+------------------------------------------------------------------------- */
+
+void PPPMDisp::adjust_gewald_6()
+{
+  // Use Newton solver to find g_ewald_6
+  double dx;
+
+  // Start loop
+
+  for (int i = 0; i <  LARGE; i++) {
+    dx = f_6() / derivf_6();
+    g_ewald_6 -= dx; //update g_ewald_6
+    if (fabs(f_6()) < SMALL) return;
+  }
+
+  // Failed to converge
+
+  char str[128];
+  sprintf(str, "Could not adjust g_ewald_6");
+  error->all(FLERR, str);
+
+}
+
+/* ----------------------------------------------------------------------
+ Calculate f(x) for Dispersion interaction
+ ------------------------------------------------------------------------- */
+
+double PPPMDisp::f_6()
+{
+  double df_rspace, df_kspace;
+  double *prd;
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+  bigint natoms = atom->natoms;
+
+  df_rspace = lj_rspace_error();
+   
+  double qopt = compute_qopt_6();
+  df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
+   
+  return df_rspace - df_kspace;
+}
+
+/* ----------------------------------------------------------------------
+ Calculate numerical derivative f'(x) using forward difference
+ [f(x + h) - f(x)] / h
+ ------------------------------------------------------------------------- */
+            
+double PPPMDisp::derivf_6()
+{  
+  double h = 0.000001;  //Derivative step-size
+  double df,f1,f2,g_ewald_old;
+  
+  f1 = f_6();
+  g_ewald_old = g_ewald_6;
+  g_ewald_6 += h;
+  f2 = f_6();
+  g_ewald_6 = g_ewald_old;
+  df = (f2 - f1)/h;
+  
+  return df;
+} 
+
+
+/* ----------------------------------------------------------------------
+   calculate an initial value for g_ewald_6
+   ---------------------------------------------------------------------- */
+
+void PPPMDisp::set_init_g6()
+{
+  // use xprd,yprd,zprd even if triclinic so grid size is the same
+  // adjust z dimension for 2d slab PPPM
+  // 3d PPPM just uses zprd since slab_volfactor = 1.0
+
+  // make initial g_ewald estimate
+  // based on desired error and real space cutoff
+ 
+  // compute initial value for df_real with g_ewald_6 = 1/cutoff_lj
+  // if df_real > 0, repeat divide g_ewald_6 by 2 until df_real < 0
+  // else, repeat multiply g_ewald_6 by 2 until df_real > 0
+  // perform bisection for the last two values of
+  double df_real;
+  double g_ewald_old; 
+  double gmin, gmax;
+
+  // check if there is a user defined accuracy
+  double acc_rspace = accuracy;
+  if (accuracy_real_6 > 0) acc_rspace = accuracy_real_6;
+
+  g_ewald_6 = 1.0/cutoff_lj;
+  df_real = lj_rspace_error() - acc_rspace;
+  int counter = 0;
+  if (df_real > 0) {
+    while (df_real > 0 && counter < LARGE) {
+      counter++;
+      g_ewald_old = g_ewald_6;
+      g_ewald_6 *= 2;
+      df_real = lj_rspace_error() - acc_rspace;
+    }
+  }
+
+  if (df_real < 0) {
+    while (df_real < 0 && counter < LARGE) {
+      counter++;
+      g_ewald_old = g_ewald_6;
+      g_ewald_6 *= 0.5;
+      df_real = lj_rspace_error() - acc_rspace;
+    }
+  }
+
+  if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp");
+
+  gmin = MIN(g_ewald_6, g_ewald_old);
+  gmax = MAX(g_ewald_6, g_ewald_old);
+  g_ewald_6 = gmin + 0.5*(gmax-gmin);
+  counter = 0;
+  while (gmax-gmin > SMALL && counter < LARGE) {
+    counter++;
+    df_real = lj_rspace_error() -acc_rspace;
+    if (df_real < 0) gmax = g_ewald_6;
+    else gmin = g_ewald_6;
+    g_ewald_6 = gmin + 0.5*(gmax-gmin);
+  }
+  if (counter >= LARGE-1) error->all(FLERR,"Cannot compute initial g_ewald_disp");
+
+}
+
+/* ----------------------------------------------------------------------
+   calculate nx_pppm, ny_pppm, nz_pppm for dispersion interaction
+   ---------------------------------------------------------------------- */
+
+void PPPMDisp::set_n_pppm_6()
+{
+  bigint natoms = atom->natoms;
+
+  double *prd;
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+  double h, h_x,h_y,h_z;
+
+  double acc_kspace = accuracy;
+  if (accuracy_kspace_6 > 0.0) acc_kspace = accuracy_kspace_6;
+
+  // initial value for the grid spacing
+  h = h_x = h_y = h_z = 4.0/g_ewald_6;
+  // decrease grid spacing untill required precision is obtained
+  int count = 0;
+  while(1) {
+  
+    // set grid dimension
+    nx_pppm_6 = static_cast<int> (xprd/h_x);
+    ny_pppm_6 = static_cast<int> (yprd/h_y);
+    nz_pppm_6 = static_cast<int> (zprd_slab/h_z);
+
+    if (nx_pppm_6 <= 1) nx_pppm_6 = 2;
+    if (ny_pppm_6 <= 1) ny_pppm_6 = 2;
+    if (nz_pppm_6 <= 1) nz_pppm_6 = 2;
+
+    //set local grid dimension
+    int npey_fft,npez_fft;
+    if (nz_pppm_6 >= nprocs) {
+      npey_fft = 1;
+      npez_fft = nprocs;
+    } else procs2grid2d(nprocs,ny_pppm_6,nz_pppm_6,&npey_fft,&npez_fft);
+
+    int me_y = me % npey_fft;
+    int me_z = me / npey_fft;
+
+    nxlo_fft_6 = 0;
+    nxhi_fft_6 = nx_pppm_6 - 1;
+    nylo_fft_6 = me_y*ny_pppm_6/npey_fft;
+    nyhi_fft_6 = (me_y+1)*ny_pppm_6/npey_fft - 1;
+    nzlo_fft_6 = me_z*nz_pppm_6/npez_fft;
+    nzhi_fft_6 = (me_z+1)*nz_pppm_6/npez_fft - 1;
+
+    double qopt = compute_qopt_6();
+ 
+    double df_kspace = sqrt(qopt/natoms)*csum/(xprd*yprd*zprd_slab);
+
+    count++;
+
+    // break loop if the accuracy has been reached or too many loops have been performed
+    if (df_kspace <= acc_kspace) break;
+    if (count > 500) error->all(FLERR, "Could not compute grid size for Dispersion");
+    h *= 0.95;
+    h_x = h_y = h_z = h;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   calculate the real space error for dispersion interactions
+   ---------------------------------------------------------------------- */
+
+double PPPMDisp::lj_rspace_error()
+{
+  bigint natoms = atom->natoms;
+  double xprd = domain->xprd;
+  double yprd = domain->yprd;
+  double zprd = domain->zprd;
+  double zprd_slab = zprd*slab_volfactor;
+
+  double deltaf;
+  double rgs = (cutoff_lj*g_ewald_6);
+  rgs *= rgs;
+  double rgs_inv = 1.0/rgs;
+  deltaf = csum/sqrt(natoms*xprd*yprd*zprd_slab*cutoff_lj)*sqrt(MY_PI)*pow(g_ewald_6, 5)*
+    exp(-rgs)*(1+rgs_inv*(3+rgs_inv*(6+rgs_inv*6)));
+  return deltaf;
+}
+
+
+/* ----------------------------------------------------------------------
+   Compyute the modified (hockney-eastwood) coulomb green function
+   ---------------------------------------------------------------------- */ 
+
+void PPPMDisp::compute_gf()
+{
+  int k,l,m,n;
+  double *prd;
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+  volume = xprd * yprd * zprd_slab;
+
+  double unitkx = (2.0*MY_PI/xprd);
+  double unitky = (2.0*MY_PI/yprd);
+  double unitkz = (2.0*MY_PI/zprd_slab);
+
+  int kper,lper,mper;
+  double snx,sny,snz,snx2,sny2,snz2;
+  double sqk;
+  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+  double numerator,denominator;
+
+
+  n = 0;
+  for (m = nzlo_fft; m <= nzhi_fft; m++) {
+    mper = m - nz_pppm*(2*m/nz_pppm);
+    qz = unitkz*mper;
+    snz = sin(0.5*qz*zprd_slab/nz_pppm);
+    snz2 = snz*snz;
+    sz = exp(-0.25*pow(qz/g_ewald,2.0));
+    wz = 1.0;
+    argz = 0.5*qz*zprd_slab/nz_pppm;
+    if (argz != 0.0) wz = pow(sin(argz)/argz,order);
+    wz *= wz;
+
+    for (l = nylo_fft; l <= nyhi_fft; l++) {
+      lper = l - ny_pppm*(2*l/ny_pppm);
+      qy = unitky*lper;
+      sny = sin(0.5*qy*yprd/ny_pppm);
+      sny2 = sny*sny;
+      sy = exp(-0.25*pow(qy/g_ewald,2.0));
+      wy = 1.0;
+      argy = 0.5*qy*yprd/ny_pppm;
+      if (argy != 0.0) wy = pow(sin(argy)/argy,order);
+      wy *= wy;
+
+      for (k = nxlo_fft; k <= nxhi_fft; k++) {
+        kper = k - nx_pppm*(2*k/nx_pppm);
+        qx = unitkx*kper;
+        snx = sin(0.5*qx*xprd/nx_pppm);
+        snx2 = snx*snx;
+        sx = exp(-0.25*pow(qx/g_ewald,2.0));
+        wx = 1.0;
+        argx = 0.5*qx*xprd/nx_pppm;
+        if (argx != 0.0) wx = pow(sin(argx)/argx,order);
+        wx *= wx;
+
+        sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
+
+        if (sqk != 0.0) {
+          numerator = 4.0*MY_PI/sqk;
+          denominator = gf_denom(snx2,sny2,snz2, gf_b, order);  
+          greensfn[n++] = numerator*sx*sy*sz*wx*wy*wz/denominator;
+        } else greensfn[n++] = 0.0;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute self force coefficients for ad-differentiation scheme
+   and Coulomb interaction 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::compute_sf_precoeff(int nxp, int nyp, int nzp, int ord, 
+                                    int nxlo_ft, int nylo_ft, int nzlo_ft,
+                                    int nxhi_ft, int nyhi_ft, int nzhi_ft,
+                                    double *sf_pre1, double *sf_pre2, double *sf_pre3,
+                                    double *sf_pre4, double *sf_pre5, double *sf_pre6)
+{
+
+  int i,k,l,m,n;
+  double *prd;
+
+  // volume-dependent factors
+  // adjust z dimension for 2d slab PPPM
+  // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+
+  double unitkx = (2.0*MY_PI/xprd);
+  double unitky = (2.0*MY_PI/yprd);
+  double unitkz = (2.0*MY_PI/zprd_slab);
+
+  int nx,ny,nz,kper,lper,mper;
+  double argx,argy,argz;
+  double wx0[5],wy0[5],wz0[5],wx1[5],wy1[5],wz1[5],wx2[5],wy2[5],wz2[5];
+  double qx0,qy0,qz0,qx1,qy1,qz1,qx2,qy2,qz2;
+  double u0,u1,u2,u3,u4,u5,u6;
+  double sum1,sum2,sum3,sum4,sum5,sum6;
+
+  int nb = 2;
+
+  n = 0;
+  for (m = nzlo_ft; m <= nzhi_ft; m++) {
+    mper = m - nzp*(2*m/nzp);
+
+    for (l = nylo_ft; l <= nyhi_ft; l++) {
+      lper = l - nyp*(2*l/nyp);
+
+      for (k = nxlo_ft; k <= nxhi_ft; k++) {
+        kper = k - nxp*(2*k/nxp);
+      
+        sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
+        for (i = -nb; i <= nb; i++) {
+
+          qx0 = unitkx*(kper+nxp*i);
+          qx1 = unitkx*(kper+nxp*(i+1));
+          qx2 = unitkx*(kper+nxp*(i+2));
+          wx0[i+2] = 1.0;
+          wx1[i+2] = 1.0;
+          wx2[i+2] = 1.0;
+          argx = 0.5*qx0*xprd/nxp;
+          if (argx != 0.0) wx0[i+2] = pow(sin(argx)/argx,ord);
+          argx = 0.5*qx1*xprd/nxp;
+          if (argx != 0.0) wx1[i+2] = pow(sin(argx)/argx,ord);
+          argx = 0.5*qx2*xprd/nxp;
+          if (argx != 0.0) wx2[i+2] = pow(sin(argx)/argx,ord);
+
+          qy0 = unitky*(lper+nyp*i);
+          qy1 = unitky*(lper+nyp*(i+1));
+          qy2 = unitky*(lper+nyp*(i+2));
+          wy0[i+2] = 1.0;
+          wy1[i+2] = 1.0;
+          wy2[i+2] = 1.0;
+          argy = 0.5*qy0*yprd/nyp;
+          if (argy != 0.0) wy0[i+2] = pow(sin(argy)/argy,ord);
+          argy = 0.5*qy1*yprd/nyp;
+          if (argy != 0.0) wy1[i+2] = pow(sin(argy)/argy,ord);
+          argy = 0.5*qy2*yprd/nyp;
+          if (argy != 0.0) wy2[i+2] = pow(sin(argy)/argy,ord);
+   
+          qz0 = unitkz*(mper+nzp*i);
+          qz1 = unitkz*(mper+nzp*(i+1));
+          qz2 = unitkz*(mper+nzp*(i+2));
+          wz0[i+2] = 1.0;
+          wz1[i+2] = 1.0;
+          wz2[i+2] = 1.0;
+          argz = 0.5*qz0*zprd_slab/nzp;
+          if (argz != 0.0) wz0[i+2] = pow(sin(argz)/argz,ord);
+          argz = 0.5*qz1*zprd_slab/nzp;
+          if (argz != 0.0) wz1[i+2] = pow(sin(argz)/argz,ord);
+           argz = 0.5*qz2*zprd_slab/nzp;
+          if (argz != 0.0) wz2[i+2] = pow(sin(argz)/argz,ord);
+        }
+    
+        for (nx = 0; nx <= 4; nx++) {
+          for (ny = 0; ny <= 4; ny++) {
+            for (nz = 0; nz <= 4; nz++) {
+              u0 = wx0[nx]*wy0[ny]*wz0[nz];
+              u1 = wx1[nx]*wy0[ny]*wz0[nz];
+              u2 = wx2[nx]*wy0[ny]*wz0[nz];
+              u3 = wx0[nx]*wy1[ny]*wz0[nz];
+              u4 = wx0[nx]*wy2[ny]*wz0[nz];
+              u5 = wx0[nx]*wy0[ny]*wz1[nz];
+              u6 = wx0[nx]*wy0[ny]*wz2[nz];
+
+              sum1 += u0*u1;
+              sum2 += u0*u2;
+              sum3 += u0*u3;
+              sum4 += u0*u4;
+              sum5 += u0*u5;
+              sum6 += u0*u6;
+            }
+          }
+        }
+        
+        // store values
+
+        sf_pre1[n] = sum1;
+        sf_pre2[n] = sum2;
+        sf_pre3[n] = sum3;
+        sf_pre4[n] = sum4;
+        sf_pre5[n] = sum5;
+        sf_pre6[n++] = sum6;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   Compute the modified (hockney-eastwood) dispersion green function
+   ---------------------------------------------------------------------- */
+
+void PPPMDisp::compute_gf_6()
+{
+  double *prd;
+  int k,l,m,n;
+
+  // volume-dependent factors
+  // adjust z dimension for 2d slab PPPM
+  // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+
+  double unitkx = (2.0*MY_PI/xprd);
+  double unitky = (2.0*MY_PI/yprd);
+  double unitkz = (2.0*MY_PI/zprd_slab);
+
+  int kper,lper,mper;
+  double sqk;
+  double snx,sny,snz,snx2,sny2,snz2;
+  double argx,argy,argz,wx,wy,wz,sx,sy,sz;
+  double qx,qy,qz;
+  double rtsqk, term;
+  double numerator,denominator;
+  double inv2ew = 2*g_ewald_6;
+  inv2ew = 1/inv2ew;
+  double rtpi = sqrt(MY_PI);
+
+  numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0);
+
+  n = 0;
+  for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
+    mper = m - nz_pppm_6*(2*m/nz_pppm_6);
+    qz = unitkz*mper;
+    snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6);
+    snz2 = snz*snz;
+    sz = exp(-qz*qz*inv2ew*inv2ew);
+    wz = 1.0;
+    argz = 0.5*qz*zprd_slab/nz_pppm_6;
+    if (argz != 0.0) wz = pow(sin(argz)/argz,order_6);
+    wz *= wz;
+              
+    for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
+      lper = l - ny_pppm_6*(2*l/ny_pppm_6);
+      qy = unitky*lper;
+      sny = sin(0.5*unitky*lper*yprd/ny_pppm_6);
+      sny2 = sny*sny;
+      sy = exp(-qy*qy*inv2ew*inv2ew);
+      wy = 1.0;
+      argy = 0.5*qy*yprd/ny_pppm_6;
+      if (argy != 0.0) wy = pow(sin(argy)/argy,order_6);
+      wy *= wy;
+
+      for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
+	kper = k - nx_pppm_6*(2*k/nx_pppm_6);
+        qx = unitkx*kper;
+	snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6);
+	snx2 = snx*snx;
+        sx = exp(-qx*qx*inv2ew*inv2ew);
+	wx = 1.0;
+	argx = 0.5*qx*xprd/nx_pppm_6;
+	if (argx != 0.0) wx = pow(sin(argx)/argx,order_6);
+        wx *= wx;
+      
+	sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0);
+
+        if (sqk != 0.0) {
+	  denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6); 
+	  rtsqk = sqrt(sqk);
+          term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz +
+                  2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew);
+	  greensfn_6[n++] = numerator*term*wx*wy*wz/denominator;
+        } else greensfn_6[n++] = 0.0;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute self force coefficients for ad-differentiation scheme
+   and Coulomb interaction 
+------------------------------------------------------------------------- */
+void PPPMDisp::compute_sf_coeff()
+{
+  int i,k,l,m,n;
+  double *prd;
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+  volume = xprd * yprd * zprd_slab;
+
+  for (i = 0; i <= 5; i++) sf_coeff[i] = 0.0;
+
+  n = 0;
+  for (m = nzlo_fft; m <= nzhi_fft; m++) {
+    for (l = nylo_fft; l <= nyhi_fft; l++) {
+      for (k = nxlo_fft; k <= nxhi_fft; k++) {
+        sf_coeff[0] += sf_precoeff1[n]*greensfn[n];
+        sf_coeff[1] += sf_precoeff2[n]*greensfn[n];
+        sf_coeff[2] += sf_precoeff3[n]*greensfn[n];
+        sf_coeff[3] += sf_precoeff4[n]*greensfn[n];
+        sf_coeff[4] += sf_precoeff5[n]*greensfn[n];
+        sf_coeff[5] += sf_precoeff6[n]*greensfn[n];
+        ++n;
+      }
+    }
+  }
+
+  // Compute the coefficients for the self-force correction
+
+  double prex, prey, prez;
+  prex = prey = prez = MY_PI/volume;
+  prex *= nx_pppm/xprd;
+  prey *= ny_pppm/yprd;
+  prez *= nz_pppm/zprd_slab;
+  sf_coeff[0] *= prex;
+  sf_coeff[1] *= prex*2;
+  sf_coeff[2] *= prey;
+  sf_coeff[3] *= prey*2;
+  sf_coeff[4] *= prez;
+  sf_coeff[5] *= prez*2;
+
+  // communicate values with other procs
+
+  double tmp[6];
+  MPI_Allreduce(sf_coeff,tmp,6,MPI_DOUBLE,MPI_SUM,world);
+  for (n = 0; n < 6; n++) sf_coeff[n] = tmp[n];
+}
+
+/* ----------------------------------------------------------------------
+   compute self force coefficients for ad-differentiation scheme
+   and Dispersion interaction 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::compute_sf_coeff_6()
+{
+  int i,k,l,m,n;
+  double *prd;
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+  volume = xprd * yprd * zprd_slab;
+
+  for (i = 0; i <= 5; i++) sf_coeff_6[i] = 0.0;
+
+  n = 0;
+  for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) {
+    for (l = nylo_fft_6; l <= nyhi_fft_6; l++) {
+      for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) {
+        sf_coeff_6[0] += sf_precoeff1_6[n]*greensfn_6[n];
+        sf_coeff_6[1] += sf_precoeff2_6[n]*greensfn_6[n];
+        sf_coeff_6[2] += sf_precoeff3_6[n]*greensfn_6[n];
+        sf_coeff_6[3] += sf_precoeff4_6[n]*greensfn_6[n];
+        sf_coeff_6[4] += sf_precoeff5_6[n]*greensfn_6[n];
+        sf_coeff_6[5] += sf_precoeff6_6[n]*greensfn_6[n];
+        ++n;
+      }
+    }
+  }
+
+  
+  // perform multiplication with prefactors
+  
+  double prex, prey, prez;
+  prex = prey = prez = MY_PI/volume;
+  prex *= nx_pppm_6/xprd;
+  prey *= ny_pppm_6/yprd;
+  prez *= nz_pppm_6/zprd_slab;
+  sf_coeff_6[0] *= prex;
+  sf_coeff_6[1] *= prex*2;
+  sf_coeff_6[2] *= prey;
+  sf_coeff_6[3] *= prey*2;
+  sf_coeff_6[4] *= prez;
+  sf_coeff_6[5] *= prez*2;
+  
+  // communicate values with other procs
+  
+  double tmp[6];
+  MPI_Allreduce(sf_coeff_6,tmp,6,MPI_DOUBLE,MPI_SUM,world);
+  for (n = 0; n < 6; n++) sf_coeff_6[n] = tmp[n];
+
+}
+
+/* ----------------------------------------------------------------------
+   denominator for Hockney-Eastwood Green's function
+     of x,y,z = sin(kx*deltax/2), etc
+
+            inf                 n-1
+   S(n,k) = Sum  W(k+pi*j)**2 = Sum b(l)*(z*z)**l
+           j=-inf               l=0
+
+          = -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x)  at z = sin(x)
+   gf_b = denominator expansion coeffs 
+------------------------------------------------------------------------- */
+
+double PPPMDisp::gf_denom(double x, double y, double z, double *g_b, int ord)
+{
+  double sx,sy,sz;
+  sz = sy = sx = 0.0;
+  for (int l = ord-1; l >= 0; l--) {
+    sx = g_b[l] + sx*x;
+    sy = g_b[l] + sy*y;
+    sz = g_b[l] + sz*z;
+  }
+  double s = sx*sy*sz;
+  return s*s;
+}
+
+/* ----------------------------------------------------------------------
+   pre-compute Green's function denominator expansion coeffs, Gamma(2n) 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::compute_gf_denom(double* gf, int ord)
+{
+  int k,l,m;
+  
+  for (l = 1; l < ord; l++) gf[l] = 0.0;
+  gf[0] = 1.0;
+  
+  for (m = 1; m < ord; m++) {
+    for (l = m; l > 0; l--) 
+      gf[l] = 4.0 * (gf[l]*(l-m)*(l-m-0.5)-gf[l-1]*(l-m-1)*(l-m-1));
+    gf[0] = 4.0 * (gf[0]*(l-m)*(l-m-0.5));
+  }
+
+  bigint ifact = 1;
+  for (k = 1; k < 2*ord; k++) ifact *= k;
+  double gaminv = 1.0/ifact;
+  for (l = 0; l < ord; l++) gf[l] *= gaminv;
+}
+
+/* ----------------------------------------------------------------------
+   ghost-swap to accumulate full density in brick decomposition 
+   remap density from 3d brick decomposition to FFTdecomposition
+   for coulomb interaction or dispersion interaction with geometric
+   mixing
+------------------------------------------------------------------------- */
+
+void PPPMDisp::brick2fft(int nxlo_i, int nylo_i, int nzlo_i,
+                         int nxhi_i, int nyhi_i, int nzhi_i,
+                         FFT_SCALAR*** dbrick, FFT_SCALAR* dfft, FFT_SCALAR* work,
+                         LAMMPS_NS::Remap* rmp)
+{
+  int n,ix,iy,iz;
+
+  // copy grabs inner portion of density from 3d brick
+  // remap could be done as pre-stage of FFT,
+  //   but this works optimally on only double values, not complex values
+
+  n = 0;
+  for (iz = nzlo_i; iz <= nzhi_i; iz++)
+    for (iy = nylo_i; iy <= nyhi_i; iy++)
+      for (ix = nxlo_i; ix <= nxhi_i; ix++)
+	dfft[n++] = dbrick[iz][iy][ix];
+
+  rmp->perform(dfft,dfft,work);
+}
+
+
+/* ----------------------------------------------------------------------
+   ghost-swap to accumulate full density in brick decomposition 
+   remap density from 3d brick decomposition to FFTdecomposition
+   for dispersion with arithmetic mixing rule
+------------------------------------------------------------------------- */
+
+void PPPMDisp::brick2fft_a()
+{
+  int n,ix,iy,iz;
+
+  // copy grabs inner portion of density from 3d brick
+  // remap could be done as pre-stage of FFT,
+  //   but this works optimally on only double values, not complex values
+
+  n = 0;
+  for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++)
+    for (iy = nylo_in_6; iy <= nyhi_in_6; iy++)
+      for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) {
+        density_fft_a0[n] = density_brick_a0[iz][iy][ix];
+        density_fft_a1[n] = density_brick_a1[iz][iy][ix];
+        density_fft_a2[n] = density_brick_a2[iz][iy][ix];
+        density_fft_a3[n] = density_brick_a3[iz][iy][ix];
+        density_fft_a4[n] = density_brick_a4[iz][iy][ix];
+        density_fft_a5[n] = density_brick_a5[iz][iy][ix];
+        density_fft_a6[n++] = density_brick_a6[iz][iy][ix];
+      }
+
+  remap_6->perform(density_fft_a0,density_fft_a0,work1_6);
+  remap_6->perform(density_fft_a1,density_fft_a1,work1_6);
+  remap_6->perform(density_fft_a2,density_fft_a2,work1_6);
+  remap_6->perform(density_fft_a3,density_fft_a3,work1_6);
+  remap_6->perform(density_fft_a4,density_fft_a4,work1_6);
+  remap_6->perform(density_fft_a5,density_fft_a5,work1_6);
+  remap_6->perform(density_fft_a6,density_fft_a6,work1_6);
+
+}
+
+/* ----------------------------------------------------------------------
+   ghost-swap to accumulate full density in brick decomposition 
+   remap density from 3d brick decomposition to FFTdecomposition
+   for dispersion with special case
+------------------------------------------------------------------------- */
+
+void PPPMDisp::brick2fft_none()
+{
+  int k,n,ix,iy,iz;
+
+  // copy grabs inner portion of density from 3d brick
+  // remap could be done as pre-stage of FFT,
+  //   but this works optimally on only double values, not complex values
+
+  for (k = 0; k<nsplit_alloc; k++) {
+    n = 0;
+    for (iz = nzlo_in_6; iz <= nzhi_in_6; iz++)
+      for (iy = nylo_in_6; iy <= nyhi_in_6; iy++)
+        for (ix = nxlo_in_6; ix <= nxhi_in_6; ix++) 
+          density_fft_none[k][n++] = density_brick_none[k][iz][iy][ix];
+  }
+
+  for (k=0; k<nsplit_alloc; k++)
+    remap_6->perform(density_fft_none[k],density_fft_none[k],work1_6);
+}
+
+/* ----------------------------------------------------------------------
+   find center grid pt for each of my particles
+   check that full stencil for the particle will fit in my 3d brick
+   store central grid pt indices in part2grid array 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::particle_map(double delx, double dely, double delz,
+                             double sft, int** p2g, int nup, int nlow,
+                             int nxlo, int nylo, int nzlo,
+                             int nxhi, int nyhi, int nzhi)
+{
+  int nx,ny,nz;
+
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+
+  int flag = 0;
+  for (int i = 0; i < nlocal; i++) {
+    
+    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+    // current particle coord can be outside global and local box
+    // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
+
+    nx = static_cast<int> ((x[i][0]-boxlo[0])*delx+sft) - OFFSET;
+    ny = static_cast<int> ((x[i][1]-boxlo[1])*dely+sft) - OFFSET;
+    nz = static_cast<int> ((x[i][2]-boxlo[2])*delz+sft) - OFFSET;
+
+    p2g[i][0] = nx;
+    p2g[i][1] = ny;
+    p2g[i][2] = nz;
+
+    // check that entire stencil around nx,ny,nz will fit in my 3d brick
+
+    if (nx+nlow < nxlo || nx+nup > nxhi ||
+	ny+nlow < nylo || ny+nup > nyhi ||
+	nz+nlow < nzlo || nz+nup > nzhi)
+      flag = 1;
+  }
+
+  if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPMDisp");
+}
+
+
+void PPPMDisp::particle_map_c(double delx, double dely, double delz,
+                               double sft, int** p2g, int nup, int nlow,
+                               int nxlo, int nylo, int nzlo,
+                               int nxhi, int nyhi, int nzhi)
+{
+  particle_map(delx, dely, delz, sft, p2g, nup, nlow,
+               nxlo, nylo, nzlo, nxhi, nyhi, nzhi);
+}
+
+/* ----------------------------------------------------------------------
+   create discretized "density" on section of global grid due to my particles
+   density(x,y,z) = charge "density" at grid points of my 3d brick
+   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+   in global grid 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::make_rho_c()
+{
+  int l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+
+  // clear 3d density array
+
+  memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
+	 ngrid*sizeof(FFT_SCALAR));
+
+  // loop over my charges, add their contribution to nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+
+  double *q = atom->q;
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; i++) {
+
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+    compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
+
+    z0 = delvolinv * q[i];
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      y0 = z0*rho1d[2][n];
+      for (m = nlower; m <= nupper; m++) {
+	my = m+ny;
+	x0 = y0*rho1d[1][m];
+	for (l = nlower; l <= nupper; l++) {
+	  mx = l+nx;
+	  density_brick[mz][my][mx] += x0*rho1d[0][l];
+	}
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   create discretized "density" on section of global grid due to my particles
+   density(x,y,z) = dispersion "density" at grid points of my 3d brick
+   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+   in global grid --- geometric mixing
+------------------------------------------------------------------------- */
+
+void PPPMDisp::make_rho_g()
+{
+  int l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+
+  // clear 3d density array
+
+  memset(&(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+	 ngrid_6*sizeof(FFT_SCALAR));
+
+  // loop over my charges, add their contribution to nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  int type;
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; i++) {
+
+    nx = part2grid_6[i][0];
+    ny = part2grid_6[i][1];
+    nz = part2grid_6[i][2];
+    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+
+    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+    type = atom->type[i];
+    z0 = delvolinv_6 * B[type];
+    for (n = nlower_6; n <= nupper_6; n++) {
+      mz = n+nz;
+      y0 = z0*rho1d_6[2][n];
+      for (m = nlower_6; m <= nupper_6; m++) {
+	my = m+ny;
+	x0 = y0*rho1d_6[1][m];
+	for (l = nlower_6; l <= nupper_6; l++) {
+	  mx = l+nx;
+	  density_brick_g[mz][my][mx] += x0*rho1d_6[0][l];
+	}
+      }
+    }
+  }
+}
+
+
+/* ----------------------------------------------------------------------
+   create discretized "density" on section of global grid due to my particles
+   density(x,y,z) = dispersion "density" at grid points of my 3d brick
+   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+   in global grid --- arithmetic mixing
+------------------------------------------------------------------------- */
+
+void PPPMDisp::make_rho_a()
+{
+  int l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0,w;
+
+  // clear 3d density array
+
+  memset(&(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+	 ngrid_6*sizeof(FFT_SCALAR));
+  memset(&(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+	 ngrid_6*sizeof(FFT_SCALAR));
+  memset(&(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+	 ngrid_6*sizeof(FFT_SCALAR));
+  memset(&(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+	 ngrid_6*sizeof(FFT_SCALAR));
+  memset(&(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+	 ngrid_6*sizeof(FFT_SCALAR));
+  memset(&(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+	 ngrid_6*sizeof(FFT_SCALAR));
+  memset(&(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+	 ngrid_6*sizeof(FFT_SCALAR));
+
+  // loop over my particles, add their contribution to nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  int type;
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+  
+  for (int i = 0; i < nlocal; i++) {
+
+    //do the following for all 4 grids
+    nx = part2grid_6[i][0];
+    ny = part2grid_6[i][1];
+    nz = part2grid_6[i][2];
+    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+    type = atom->type[i];
+    z0 = delvolinv_6;
+    for (n = nlower_6; n <= nupper_6; n++) {
+      mz = n+nz;
+      y0 = z0*rho1d_6[2][n];
+      for (m = nlower_6; m <= nupper_6; m++) {
+	my = m+ny;
+	x0 = y0*rho1d_6[1][m];
+	for (l = nlower_6; l <= nupper_6; l++) {
+	  mx = l+nx;
+          w = x0*rho1d_6[0][l];
+	  density_brick_a0[mz][my][mx] += w*B[7*type];
+	  density_brick_a1[mz][my][mx] += w*B[7*type+1];
+	  density_brick_a2[mz][my][mx] += w*B[7*type+2];
+	  density_brick_a3[mz][my][mx] += w*B[7*type+3];
+	  density_brick_a4[mz][my][mx] += w*B[7*type+4];
+	  density_brick_a5[mz][my][mx] += w*B[7*type+5];
+	  density_brick_a6[mz][my][mx] += w*B[7*type+6];
+	}
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   create discretized "density" on section of global grid due to my particles
+   density(x,y,z) = dispersion "density" at grid points of my 3d brick
+   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+   in global grid --- case when mixing rules don't apply
+------------------------------------------------------------------------- */
+
+void PPPMDisp::make_rho_none()
+{
+  int k,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0,w;
+
+  // clear 3d density array
+  for (k = 0; k < nsplit_alloc; k++)
+    memset(&(density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6]),0,
+	   ngrid_6*sizeof(FFT_SCALAR));
+
+
+  // loop over my particles, add their contribution to nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  int type;
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+  
+  for (int i = 0; i < nlocal; i++) {
+
+    //do the following for all 4 grids
+    nx = part2grid_6[i][0];
+    ny = part2grid_6[i][1];
+    nz = part2grid_6[i][2];
+    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+    type = atom->type[i];
+    z0 = delvolinv_6;
+    for (n = nlower_6; n <= nupper_6; n++) {
+      mz = n+nz;
+      y0 = z0*rho1d_6[2][n];
+      for (m = nlower_6; m <= nupper_6; m++) {
+	my = m+ny;
+	x0 = y0*rho1d_6[1][m];
+	for (l = nlower_6; l <= nupper_6; l++) {
+	  mx = l+nx;
+          w = x0*rho1d_6[0][l];
+          for (k = 0; k < nsplit; k++)
+	    density_brick_none[k][mz][my][mx] += w*B[nsplit*type + k];
+	}
+      }
+    }
+  }
+}
+
+
+/* ----------------------------------------------------------------------
+   FFT-based Poisson solver for ik differentiation
+------------------------------------------------------------------------- */
+
+void PPPMDisp::poisson_ik(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
+                           FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, 
+                           int nx_p, int ny_p, int nz_p, int nft,
+                           int nxlo_ft, int nylo_ft, int nzlo_ft,
+                           int nxhi_ft, int nyhi_ft, int nzhi_ft,
+                           int nxlo_i, int nylo_i, int nzlo_i,
+                           int nxhi_i, int nyhi_i, int nzhi_i,
+                           double& egy, double* gfn,
+                           double* kx, double* ky, double* kz,
+                           double* kx2, double* ky2, double* kz2,
+                           FFT_SCALAR*** vx_brick, FFT_SCALAR*** vy_brick, FFT_SCALAR*** vz_brick,
+                           double* vir, double** vcoeff, double** vcoeff2,
+                           FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
+                           FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
+
+
+{
+  int i,j,k,n;
+  double eng;
+
+  // transform charge/dispersion density (r -> k) 
+  n = 0;
+  for (i = 0; i < nft; i++) {
+    wk1[n++] = dfft[i];
+    wk1[n++] = ZEROF;
+  }
+
+  ft1->compute(wk1,wk1,1);
+
+  // if requested, compute energy and virial contribution
+
+  double scaleinv = 1.0/(nx_p*ny_p*nz_p);
+  double s2 = scaleinv*scaleinv;
+
+  if (eflag_global || vflag_global) {
+    if (vflag_global) {
+      n = 0;
+      for (i = 0; i < nft; i++) {
+	eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
+	for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j];
+	if (eflag_global) egy += eng;
+	n += 2;
+      }
+    } else {
+      n = 0;
+      for (i = 0; i < nft; i++) {
+	egy += 
+	  s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
+	n += 2;
+      }
+    }
+  }
+
+  // scale by 1/total-grid-pts to get rho(k)
+  // multiply by Green's function to get V(k)
+
+  n = 0;
+  for (i = 0; i < nft; i++) {
+    wk1[n++] *= scaleinv * gfn[i];
+    wk1[n++] *= scaleinv * gfn[i];
+  }
+
+  // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
+  // FFT leaves data in 3d brick decomposition
+  // copy it into inner portion of vdx,vdy,vdz arrays
+
+  // x & y direction gradient
+
+  n = 0;
+  for (k = nzlo_ft; k <= nzhi_ft; k++)
+    for (j = nylo_ft; j <= nyhi_ft; j++)
+      for (i = nxlo_ft; i <= nxhi_ft; i++) {
+	wk2[n] = 0.5*(kx[i]-kx2[i])*wk1[n+1] + 0.5*(ky[j]-ky2[j])*wk1[n];
+	wk2[n+1] = -0.5*(kx[i]-kx2[i])*wk1[n] + 0.5*(ky[j]-ky2[j])*wk1[n+1];
+	n += 2;
+      }
+
+  ft2->compute(wk2,wk2,-1);
+
+  n = 0;
+  for (k = nzlo_i; k <= nzhi_i; k++)
+    for (j = nylo_i; j <= nyhi_i; j++)
+      for (i = nxlo_i; i <= nxhi_i; i++) {
+	vx_brick[k][j][i] = wk2[n++];
+	vy_brick[k][j][i] = wk2[n++];
+      }
+
+  if (!eflag_atom) {
+    // z direction gradient only
+
+    n = 0;
+    for (k = nzlo_ft; k <= nzhi_ft; k++)
+      for (j = nylo_ft; j <= nyhi_ft; j++)
+        for (i = nxlo_ft; i <= nxhi_ft; i++) {
+	  wk2[n] = kz[k]*wk1[n+1];
+	  wk2[n+1] = -kz[k]*wk1[n];
+	  n += 2;
+        }
+
+    ft2->compute(wk2,wk2,-1);
+
+
+    n = 0;
+    for (k = nzlo_i; k <= nzhi_i; k++)
+      for (j = nylo_i; j <= nyhi_i; j++)
+        for (i = nxlo_i; i <= nxhi_i; i++) {
+	  vz_brick[k][j][i] = wk2[n];
+	  n += 2;
+        }
+
+  }
+
+  else {
+    // z direction gradient & per-atom energy
+
+    n = 0;
+    for (k = nzlo_ft; k <= nzhi_ft; k++)
+      for (j = nylo_ft; j <= nyhi_ft; j++)
+        for (i = nxlo_ft; i <= nxhi_ft; i++) {
+	  wk2[n] = 0.5*(kz[k]-kz2[k])*wk1[n+1] - wk1[n+1];
+	  wk2[n+1] = -0.5*(kz[k]-kz2[k])*wk1[n] + wk1[n];
+	  n += 2;
+        }
+
+    ft2->compute(wk2,wk2,-1);
+
+    n = 0;
+    for (k = nzlo_i; k <= nzhi_i; k++)
+      for (j = nylo_i; j <= nyhi_i; j++)
+        for (i = nxlo_i; i <= nxhi_i; i++) {
+	  vz_brick[k][j][i] = wk2[n++];
+	  u_pa[k][j][i] = wk2[n++];;
+        }
+  }
+
+  if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft,
+                                  nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i,
+                                  v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa);
+}
+
+/* ----------------------------------------------------------------------
+   FFT-based Poisson solver for ad differentiation
+------------------------------------------------------------------------- */
+
+void PPPMDisp::poisson_ad(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
+                           FFT_SCALAR* dfft, LAMMPS_NS::FFT3d* ft1,LAMMPS_NS::FFT3d* ft2, 
+                           int nx_p, int ny_p, int nz_p, int nft,
+                           int nxlo_ft, int nylo_ft, int nzlo_ft,
+                           int nxhi_ft, int nyhi_ft, int nzhi_ft,
+                           int nxlo_i, int nylo_i, int nzlo_i,
+                           int nxhi_i, int nyhi_i, int nzhi_i,
+                           double& egy, double* gfn,
+                           double* vir, double** vcoeff, double** vcoeff2,
+                           FFT_SCALAR*** u_pa, FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
+                           FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
+
+
+{
+  int i,j,k,n;
+  double eng;
+
+  // transform charge/dispersion density (r -> k) 
+  n = 0;
+  for (i = 0; i < nft; i++) {
+    wk1[n++] = dfft[i];
+    wk1[n++] = ZEROF;
+  }
+
+  ft1->compute(wk1,wk1,1);
+
+  // if requested, compute energy and virial contribution
+
+  double scaleinv = 1.0/(nx_p*ny_p*nz_p);
+  double s2 = scaleinv*scaleinv;
+
+  if (eflag_global || vflag_global) {
+    if (vflag_global) {
+      n = 0;
+      for (i = 0; i < nft; i++) {
+	eng = s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
+	for (j = 0; j < 6; j++) vir[j] += eng*vcoeff[i][j];
+	if (eflag_global) egy += eng;
+	n += 2;
+      }
+    } else {
+      n = 0;
+      for (i = 0; i < nft; i++) {
+	egy += 
+	  s2 * gfn[i] * (wk1[n]*wk1[n] + wk1[n+1]*wk1[n+1]);
+	n += 2;
+      }
+    }
+  }
+
+  // scale by 1/total-grid-pts to get rho(k)
+  // multiply by Green's function to get V(k)
+
+  n = 0;
+  for (i = 0; i < nft; i++) {
+    wk1[n++] *= scaleinv * gfn[i];
+    wk1[n++] *= scaleinv * gfn[i];
+  }
+
+
+  n = 0;
+  for (k = nzlo_ft; k <= nzhi_ft; k++)
+    for (j = nylo_ft; j <= nyhi_ft; j++)
+      for (i = nxlo_ft; i <= nxhi_ft; i++) {
+        wk2[n] = wk1[n];
+	wk2[n+1] = wk1[n+1];
+	n += 2;
+      }
+
+  ft2->compute(wk2,wk2,-1);
+
+
+  n = 0;
+  for (k = nzlo_i; k <= nzhi_i; k++)
+    for (j = nylo_i; j <= nyhi_i; j++)
+      for (i = nxlo_i; i <= nxhi_i; i++) {
+	u_pa[k][j][i] = wk2[n++];
+        n++;
+      }
+
+
+  if (vflag_atom) poisson_peratom(wk1, wk2, ft2, vcoeff, vcoeff2, nft,
+                                  nxlo_i, nylo_i, nzlo_i, nxhi_i, nyhi_i, nzhi_i,
+                                  v0_pa, v1_pa, v2_pa, v3_pa, v4_pa, v5_pa);
+
+}
+
+/* ----------------------------------------------------------------------
+   Fourier Transform for per atom virial calculations
+------------------------------------------------------------------------- */
+
+void PPPMDisp:: poisson_peratom(FFT_SCALAR* wk1, FFT_SCALAR* wk2, LAMMPS_NS::FFT3d* ft2, 
+                                 double** vcoeff, double** vcoeff2, int nft,
+                                 int nxlo_i, int nylo_i, int nzlo_i,
+                                 int nxhi_i, int nyhi_i, int nzhi_i,
+                                 FFT_SCALAR*** v0_pa, FFT_SCALAR*** v1_pa, FFT_SCALAR*** v2_pa,
+                                 FFT_SCALAR*** v3_pa, FFT_SCALAR*** v4_pa, FFT_SCALAR*** v5_pa)
+{
+ //v0 & v1 term
+  int n, i, j, k;
+  n = 0;
+  for (i = 0; i < nft; i++) {
+    wk2[n] = wk1[n]*vcoeff[i][0] - wk1[n+1]*vcoeff[i][1];
+    wk2[n+1] = wk1[n+1]*vcoeff[i][0] +  wk1[n]*vcoeff[i][1];
+    n += 2;
+  }
+
+  ft2->compute(wk2,wk2,-1); 
+
+  n = 0;
+  for (k = nzlo_i; k <= nzhi_i; k++)
+    for (j = nylo_i; j <= nyhi_i; j++)
+      for (i = nxlo_i; i <= nxhi_i; i++) {
+        v0_pa[k][j][i] = wk2[n++];
+        v1_pa[k][j][i] = wk2[n++];
+      }
+
+  //v2 & v3 term
+   
+  n = 0;
+  for (i = 0; i < nft; i++) {
+    wk2[n] = wk1[n]*vcoeff[i][2] - wk1[n+1]*vcoeff2[i][0];
+    wk2[n+1] = wk1[n+1]*vcoeff[i][2] + wk1[n]*vcoeff2[i][0];
+    n += 2;
+  }
+
+  ft2->compute(wk2,wk2,-1); 
+
+  n = 0;
+  for (k = nzlo_i; k <= nzhi_i; k++)
+    for (j = nylo_i; j <= nyhi_i; j++)
+      for (i = nxlo_i; i <= nxhi_i; i++) {
+        v2_pa[k][j][i] = wk2[n++];
+        v3_pa[k][j][i] = wk2[n++];
+      }
+
+  //v4 & v5 term
+   
+  n = 0;
+  for (i = 0; i < nft; i++) {
+    wk2[n] = wk1[n]*vcoeff2[i][1] - wk1[n+1]*vcoeff2[i][2];
+    wk2[n+1] = wk1[n+1]*vcoeff2[i][1] + wk1[n]*vcoeff2[i][2];
+    n += 2;
+  }
+
+  ft2->compute(wk2,wk2,-1); 
+
+  n = 0;
+  for (k = nzlo_i; k <= nzhi_i; k++)
+    for (j = nylo_i; j <= nyhi_i; j++)
+      for (i = nxlo_i; i <= nxhi_i; i++) {
+        v4_pa[k][j][i] = wk2[n++];
+        v5_pa[k][j][i] = wk2[n++];
+      }	 
+ 
+}
+
+/* ----------------------------------------------------------------------
+   Poisson solver for one mesh with 2 different dispersion densities 
+   for ik scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::poisson_2s_ik(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
+                              FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1,
+                              FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2,
+                              FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
+                              FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
+                              FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
+                              FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
+
+{
+  int i,j,k,n;
+  double eng;
+
+  double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
+
+  // transform charge/dispersion density (r -> k)
+  // only one tansform required when energies and pressures do not
+  //  need to be calculated 
+  if (eflag_global + vflag_global == 0) {
+    n = 0;
+    for (i = 0; i < nfft_6; i++) {
+      work1_6[n++] = dfft_1[i];
+      work1_6[n++] = dfft_2[i];
+    }
+  
+    fft1_6->compute(work1_6,work1_6,1);
+  }
+  // two transforms are required when energies and pressures are
+  //   calculated
+  else {
+    n = 0;
+    for (i = 0; i < nfft_6; i++) {
+      work1_6[n] = dfft_1[i];
+      work2_6[n++] = ZEROF;
+      work1_6[n] = ZEROF;
+      work2_6[n++] = dfft_2[i];
+    }
+
+    fft1_6->compute(work1_6,work1_6,1);
+    fft1_6->compute(work2_6,work2_6,1);
+
+    double s2 = scaleinv*scaleinv;
+
+    if (vflag_global) {
+      n = 0;
+      for (i = 0; i < nfft_6; i++) {
+	eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
+	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
+	if (eflag_global)energy_6 += eng;
+	n += 2;
+      }
+    } else {
+      n = 0;
+      for (i = 0; i < nfft_6; i++) {
+	energy_6 += 
+	  2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
+	n += 2;
+      }
+    }
+    // unify the two transformed vectors for efficient calculations later
+    for ( i = 0; i < 2*nfft_6; i++) {
+      work1_6[i] += work2_6[i];
+    }
+  }
+
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work1_6[n++] *= scaleinv * greensfn_6[i];
+    work1_6[n++] *= scaleinv * greensfn_6[i];
+  }
+
+  // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
+  // FFT leaves data in 3d brick decomposition
+  // copy it into inner portion of vdx,vdy,vdz arrays
+
+  // x direction gradient
+
+  n = 0;
+  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
+    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
+      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
+	work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1];
+	work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n];
+	n += 2;
+      }
+
+  fft2_6->compute(work2_6,work2_6,-1);
+  
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+	vxbrick_1[k][j][i] = work2_6[n++];
+        vxbrick_2[k][j][i] = work2_6[n++];
+      }
+
+  // y direction gradient
+
+  n = 0;
+  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
+    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
+      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
+	work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1];
+	work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n];
+	n += 2;
+      }
+
+  fft2_6->compute(work2_6,work2_6,-1);
+
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+	vybrick_1[k][j][i] = work2_6[n++];
+        vybrick_2[k][j][i] = work2_6[n++];
+      }
+
+  // z direction gradient
+
+  n = 0;
+  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
+    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
+      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
+	work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1];
+	work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n];
+	n += 2;
+      }
+
+  fft2_6->compute(work2_6,work2_6,-1);
+
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+	vzbrick_1[k][j][i] = work2_6[n++];
+	vzbrick_2[k][j][i] = work2_6[n++];
+      }
+
+  //Per-atom energy
+    
+  if (eflag_atom) {
+    n = 0;
+    for (i = 0; i < nfft_6; i++) {
+      work2_6[n] = work1_6[n];
+      work2_6[n+1] = work1_6[n+1];
+      n += 2;
+    }
+    
+    fft2_6->compute(work2_6,work2_6,-1); 
+    
+    n = 0;
+    for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+      for (j = nylo_in_6; j <= nyhi_in_6; j++)
+        for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+          u_pa_1[k][j][i] = work2_6[n++];
+          u_pa_2[k][j][i] = work2_6[n++];
+        }
+  } 
+
+  if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1,
+                                     v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2);
+}
+
+
+/* ----------------------------------------------------------------------
+   Poisson solver for one mesh with 2 different dispersion densities 
+   for ik scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::poisson_none_ik(int n1, int n2,FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
+                              FFT_SCALAR*** vxbrick_1, FFT_SCALAR*** vybrick_1, FFT_SCALAR*** vzbrick_1,
+                              FFT_SCALAR*** vxbrick_2, FFT_SCALAR*** vybrick_2, FFT_SCALAR*** vzbrick_2,
+                              FFT_SCALAR**** u_pa, FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa,
+                              FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa)
+{
+  int i,j,k,n;
+  double eng;
+
+  double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
+
+  // transform charge/dispersion density (r -> k)
+  // only one tansform required when energies and pressures do not
+  //  need to be calculated 
+  if (eflag_global + vflag_global == 0) {
+    n = 0;
+    for (i = 0; i < nfft_6; i++) {
+      work1_6[n++] = dfft_1[i];
+      work1_6[n++] = dfft_2[i];
+    }
+  
+    fft1_6->compute(work1_6,work1_6,1);
+  }
+
+
+  // two transforms are required when energies and pressures are
+  //   calculated
+  else {
+    n = 0;
+    for (i = 0; i < nfft_6; i++) {
+      work1_6[n] = dfft_1[i];
+      work2_6[n++] = ZEROF;
+      work1_6[n] = ZEROF;
+      work2_6[n++] = dfft_2[i];
+    }
+   
+
+    fft1_6->compute(work1_6,work1_6,1);
+    fft1_6->compute(work2_6,work2_6,1);
+
+    double s2 = scaleinv*scaleinv;
+
+    if (vflag_global) {
+      n = 0;
+      for (i = 0; i < nfft_6; i++) {
+	eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
+	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
+	if (eflag_global)energy_6 += eng;
+	n += 2;
+      }
+    } else {
+      n = 0;
+      for (i = 0; i < nfft_6; i++) {
+	energy_6 += 
+	  s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
+	n += 2;
+      }
+    }
+    // unify the two transformed vectors for efficient calculations later
+    for ( i = 0; i < 2*nfft_6; i++) {
+      work1_6[i] += work2_6[i];
+    }
+  }
+
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work1_6[n++] *= scaleinv * greensfn_6[i];
+    work1_6[n++] *= scaleinv * greensfn_6[i];
+  }
+
+  // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
+  // FFT leaves data in 3d brick decomposition
+  // copy it into inner portion of vdx,vdy,vdz arrays
+
+  // x direction gradient
+
+  n = 0;
+  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
+    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
+      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
+	work2_6[n] = 0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n+1];
+	work2_6[n+1] = -0.5*(fkx_6[i]-fkx2_6[i])*work1_6[n];
+	n += 2;
+      }
+
+  fft2_6->compute(work2_6,work2_6,-1);
+  
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+	vxbrick_1[k][j][i] = B[n1]*work2_6[n++];
+        vxbrick_2[k][j][i] = B[n2]*work2_6[n++];
+      }
+
+  // y direction gradient
+
+  n = 0;
+  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
+    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
+      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
+	work2_6[n] = 0.5*(fky_6[j]-fky2_6[j])*work1_6[n+1];
+	work2_6[n+1] = -0.5*(fky_6[j]-fky2_6[j])*work1_6[n];
+	n += 2;
+      }
+
+  fft2_6->compute(work2_6,work2_6,-1);
+
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+	vybrick_1[k][j][i] = B[n1]*work2_6[n++];
+        vybrick_2[k][j][i] = B[n2]*work2_6[n++];
+      }
+
+  // z direction gradient
+
+  n = 0;
+  for (k = nzlo_fft_6; k <= nzhi_fft_6; k++)
+    for (j = nylo_fft_6; j <= nyhi_fft_6; j++)
+      for (i = nxlo_fft_6; i <= nxhi_fft_6; i++) {
+	work2_6[n] = 0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n+1];
+	work2_6[n+1] = -0.5*(fkz_6[k]-fkz2_6[k])*work1_6[n];
+	n += 2;
+      }
+
+  fft2_6->compute(work2_6,work2_6,-1);
+
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+	vzbrick_1[k][j][i] = B[n1]*work2_6[n++];
+	vzbrick_2[k][j][i] = B[n2]*work2_6[n++];
+      }
+
+  //Per-atom energy
+    
+  if (eflag_atom) {
+    n = 0;
+    for (i = 0; i < nfft_6; i++) {
+      work2_6[n] = work1_6[n];
+      work2_6[n+1] = work1_6[n+1];
+      n += 2;
+    }
+    
+    fft2_6->compute(work2_6,work2_6,-1); 
+    
+    n = 0;
+    for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+      for (j = nylo_in_6; j <= nyhi_in_6; j++)
+        for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+          u_pa[n1][k][j][i] = B[n1]*work2_6[n++];
+          u_pa[n2][k][j][i] = B[n2]*work2_6[n++];
+        }
+  } 
+
+  if (vflag_atom) poisson_none_peratom(n1,n2,
+                                       v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1],
+                                       v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]);
+}
+
+/* ----------------------------------------------------------------------
+   Poisson solver for one mesh with 2 different dispersion densities 
+   for ad scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::poisson_2s_ad(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
+                              FFT_SCALAR*** u_pa_1, FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
+                              FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
+                              FFT_SCALAR*** u_pa_2, FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
+                              FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
+
+{
+  int i,j,k,n;
+  double eng;
+
+  double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
+
+  // transform charge/dispersion density (r -> k)
+  // only one tansform required when energies and pressures do not
+  //  need to be calculated 
+  if (eflag_global + vflag_global == 0) {
+    n = 0;
+    for (i = 0; i < nfft_6; i++) {
+      work1_6[n++] = dfft_1[i];
+      work1_6[n++] = dfft_2[i];
+    }
+  
+    fft1_6->compute(work1_6,work1_6,1);
+  }
+  // two transforms are required when energies and pressures are
+  //   calculated
+  else {
+    n = 0;
+    for (i = 0; i < nfft_6; i++) {
+      work1_6[n] = dfft_1[i];
+      work2_6[n++] = ZEROF;
+      work1_6[n] = ZEROF;
+      work2_6[n++] = dfft_2[i];
+    }
+
+    fft1_6->compute(work1_6,work1_6,1);
+    fft1_6->compute(work2_6,work2_6,1);
+
+    double s2 = scaleinv*scaleinv;
+
+    if (vflag_global) {
+      n = 0;
+      for (i = 0; i < nfft_6; i++) {
+	eng = 2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
+	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
+	if (eflag_global)energy_6 += eng;
+	n += 2;
+      }
+    } else {
+      n = 0;
+      for (i = 0; i < nfft_6; i++) {
+	energy_6 += 
+	  2 * s2 * greensfn_6[i] * (work1_6[n]*work2_6[n+1] - work1_6[n+1]*work2_6[n]);
+	n += 2;
+      }
+    }
+    // unify the two transformed vectors for efficient calculations later
+    for ( i = 0; i < 2*nfft_6; i++) {
+      work1_6[i] += work2_6[i];
+    }
+  }
+
+
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work1_6[n++] *= scaleinv * greensfn_6[i];
+    work1_6[n++] *= scaleinv * greensfn_6[i];
+  }
+
+
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work2_6[n] = work1_6[n];
+    work2_6[n+1] = work1_6[n+1];
+    n += 2;
+  }
+    
+  fft2_6->compute(work2_6,work2_6,-1); 
+    
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+        u_pa_1[k][j][i] = work2_6[n++];
+        u_pa_2[k][j][i] = work2_6[n++];
+      } 
+
+  if (vflag_atom) poisson_2s_peratom(v0_pa_1, v1_pa_1, v2_pa_1, v3_pa_1, v4_pa_1, v5_pa_1,
+                                     v0_pa_2, v1_pa_2, v2_pa_2, v3_pa_2, v4_pa_2, v5_pa_2);
+}
+
+/* ----------------------------------------------------------------------
+   Poisson solver for one mesh with 2 different dispersion densities 
+   for ad scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::poisson_none_ad(int n1, int n2, FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
+                               FFT_SCALAR*** u_pa_1, FFT_SCALAR*** u_pa_2,
+                               FFT_SCALAR**** v0_pa, FFT_SCALAR**** v1_pa, FFT_SCALAR**** v2_pa,
+                               FFT_SCALAR**** v3_pa, FFT_SCALAR**** v4_pa, FFT_SCALAR**** v5_pa)
+{
+  int i,j,k,n;
+  double eng;
+
+  double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
+
+  // transform charge/dispersion density (r -> k)
+  // only one tansform required when energies and pressures do not
+  //  need to be calculated 
+  if (eflag_global + vflag_global == 0) {
+    n = 0;
+    for (i = 0; i < nfft_6; i++) {
+      work1_6[n++] = dfft_1[i];
+      work1_6[n++] = dfft_2[i];
+    }
+  
+    fft1_6->compute(work1_6,work1_6,1);
+  }
+  // two transforms are required when energies and pressures are
+  //   calculated
+  else {
+    n = 0;
+    for (i = 0; i < nfft_6; i++) {
+      work1_6[n] = dfft_1[i];
+      work2_6[n++] = ZEROF;
+      work1_6[n] = ZEROF;
+      work2_6[n++] = dfft_2[i];
+    }
+
+    fft1_6->compute(work1_6,work1_6,1);
+    fft1_6->compute(work2_6,work2_6,1);
+
+    double s2 = scaleinv*scaleinv;
+
+    if (vflag_global) {
+      n = 0;
+      for (i = 0; i < nfft_6; i++) {
+	eng = s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
+	for (j = 0; j < 6; j++) virial_6[j] += eng*vg_6[i][j];
+	if (eflag_global)energy_6 += eng;
+	n += 2;
+      }
+    } else {
+      n = 0;
+      for (i = 0; i < nfft_6; i++) {
+	energy_6 += 
+	  s2 * greensfn_6[i] * (B[n1]*(work1_6[n]*work1_6[n] + work1_6[n+1]*work1_6[n+1]) + B[n2]*(work2_6[n]*work2_6[n] + work2_6[n+1]*work2_6[n+1]));
+	n += 2;
+      }
+    }
+    // unify the two transformed vectors for efficient calculations later
+    for ( i = 0; i < 2*nfft_6; i++) {
+      work1_6[i] += work2_6[i];
+    }
+  }
+
+
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work1_6[n++] *= scaleinv * greensfn_6[i];
+    work1_6[n++] *= scaleinv * greensfn_6[i];
+  }
+
+
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work2_6[n] = work1_6[n];
+    work2_6[n+1] = work1_6[n+1];
+    n += 2;
+  }
+  
+  fft2_6->compute(work2_6,work2_6,-1); 
+    
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+        u_pa_1[k][j][i] = B[n1]*work2_6[n++];
+        u_pa_2[k][j][i] = B[n2]*work2_6[n++];
+      } 
+
+  if (vflag_atom) poisson_none_peratom(n1,n2,
+                                       v0_pa[n1], v1_pa[n1], v2_pa[n1], v3_pa[n1], v4_pa[n1], v5_pa[n1],
+                                       v0_pa[n2], v1_pa[n2], v2_pa[n2], v3_pa[n2], v4_pa[n2], v5_pa[n2]);
+}
+
+/* ----------------------------------------------------------------------
+   Fourier Transform for per atom virial calculations
+------------------------------------------------------------------------- */
+
+void PPPMDisp::poisson_2s_peratom(FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
+                                   FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
+                                   FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
+                                   FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
+{
+  //Compute first virial term v0
+  int n, i, j, k;
+
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work2_6[n] = work1_6[n]*vg_6[i][0];
+    work2_6[n+1] = work1_6[n+1]*vg_6[i][0];
+    n += 2;
+  }
+   
+  fft2_6->compute(work2_6,work2_6,-1); 
+    
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+        v0_pa_1[k][j][i] = work2_6[n++];
+        v0_pa_2[k][j][i] = work2_6[n++];
+      }
+	 
+  //Compute second virial term v1  
+  
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work2_6[n] = work1_6[n]*vg_6[i][1];
+    work2_6[n+1] = work1_6[n+1]*vg_6[i][1];
+    n += 2;
+  }
+    
+  fft2_6->compute(work2_6,work2_6,-1); 
+  
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+        v1_pa_1[k][j][i] = work2_6[n++];
+        v1_pa_2[k][j][i] = work2_6[n++];
+      }
+	  
+  //Compute third virial term v2
+   
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work2_6[n] = work1_6[n]*vg_6[i][2];
+    work2_6[n+1] = work1_6[n+1]*vg_6[i][2];
+    n += 2;
+  }
+    
+  fft2_6->compute(work2_6,work2_6,-1); 
+    
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+        v2_pa_1[k][j][i] = work2_6[n++];
+        v2_pa_2[k][j][i] = work2_6[n++];
+      }
+
+  //Compute fourth virial term v3
+   
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work2_6[n] = work1_6[n]*vg2_6[i][0];
+    work2_6[n+1] = work1_6[n+1]*vg2_6[i][0];
+    n += 2;
+  }
+    
+  fft2_6->compute(work2_6,work2_6,-1); 
+    
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+        v3_pa_1[k][j][i] = work2_6[n++];
+        v3_pa_2[k][j][i] = work2_6[n++];
+      }
+
+  //Compute fifth virial term v4
+   
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work2_6[n] = work1_6[n]*vg2_6[i][1];
+    work2_6[n+1] = work1_6[n+1]*vg2_6[i][1];
+    n += 2;
+  }
+    
+  fft2_6->compute(work2_6,work2_6,-1); 
+    
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+        v4_pa_1[k][j][i] = work2_6[n++];
+        v4_pa_2[k][j][i] = work2_6[n++];
+      }
+   
+  //Compute last virial term v5
+   
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work2_6[n] = work1_6[n]*vg2_6[i][2];
+    work2_6[n+1] = work1_6[n+1]*vg2_6[i][2];
+    n += 2;
+  }
+    
+  fft2_6->compute(work2_6,work2_6,-1); 
+    
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+        v5_pa_1[k][j][i] = work2_6[n++];
+        v5_pa_2[k][j][i] = work2_6[n++];
+      }
+}
+
+/* ----------------------------------------------------------------------
+   Fourier Transform for per atom virial calculations
+------------------------------------------------------------------------- */
+
+void PPPMDisp::poisson_none_peratom(int n1, int n2,                              
+                                 FFT_SCALAR*** v0_pa_1, FFT_SCALAR*** v1_pa_1, FFT_SCALAR*** v2_pa_1,
+                                 FFT_SCALAR*** v3_pa_1, FFT_SCALAR*** v4_pa_1, FFT_SCALAR*** v5_pa_1,
+                                 FFT_SCALAR*** v0_pa_2, FFT_SCALAR*** v1_pa_2, FFT_SCALAR*** v2_pa_2,
+                                 FFT_SCALAR*** v3_pa_2, FFT_SCALAR*** v4_pa_2, FFT_SCALAR*** v5_pa_2)
+{
+  //Compute first virial term v0
+  int n, i, j, k;
+
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work2_6[n] = work1_6[n]*vg_6[i][0];
+    work2_6[n+1] = work1_6[n+1]*vg_6[i][0];
+    n += 2;
+  }
+   
+  fft2_6->compute(work2_6,work2_6,-1); 
+    
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+        v0_pa_1[k][j][i] = B[n1]*work2_6[n++];
+        v0_pa_2[k][j][i] = B[n2]*work2_6[n++];
+      }
+	 
+  //Compute second virial term v1  
+  
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work2_6[n] = work1_6[n]*vg_6[i][1];
+    work2_6[n+1] = work1_6[n+1]*vg_6[i][1];
+    n += 2;
+  }
+    
+  fft2_6->compute(work2_6,work2_6,-1); 
+  
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+        v1_pa_1[k][j][i] = B[n1]*work2_6[n++];
+        v1_pa_2[k][j][i] = B[n2]*work2_6[n++];
+      }
+	  
+  //Compute third virial term v2
+   
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work2_6[n] = work1_6[n]*vg_6[i][2];
+    work2_6[n+1] = work1_6[n+1]*vg_6[i][2];
+    n += 2;
+  }
+    
+  fft2_6->compute(work2_6,work2_6,-1); 
+    
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+        v2_pa_1[k][j][i] = B[n1]*work2_6[n++];
+        v2_pa_2[k][j][i] = B[n2]*work2_6[n++];
+      }
+
+  //Compute fourth virial term v3
+   
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work2_6[n] = work1_6[n]*vg2_6[i][0];
+    work2_6[n+1] = work1_6[n+1]*vg2_6[i][0];
+    n += 2;
+  }
+    
+  fft2_6->compute(work2_6,work2_6,-1); 
+    
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+        v3_pa_1[k][j][i] = B[n1]*work2_6[n++];
+        v3_pa_2[k][j][i] = B[n2]*work2_6[n++];
+      }
+
+  //Compute fifth virial term v4
+   
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work2_6[n] = work1_6[n]*vg2_6[i][1];
+    work2_6[n+1] = work1_6[n+1]*vg2_6[i][1];
+    n += 2;
+  }
+    
+  fft2_6->compute(work2_6,work2_6,-1); 
+    
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+        v4_pa_1[k][j][i] = B[n1]*work2_6[n++];
+        v4_pa_2[k][j][i] = B[n2]*work2_6[n++];
+      }
+   
+  //Compute last virial term v5
+   
+  n = 0;
+  for (i = 0; i < nfft_6; i++) {
+    work2_6[n] = work1_6[n]*vg2_6[i][2];
+    work2_6[n+1] = work1_6[n+1]*vg2_6[i][2];
+    n += 2;
+  }
+    
+  fft2_6->compute(work2_6,work2_6,-1); 
+    
+  n = 0;
+  for (k = nzlo_in_6; k <= nzhi_in_6; k++)
+    for (j = nylo_in_6; j <= nyhi_in_6; j++)
+      for (i = nxlo_in_6; i <= nxhi_in_6; i++) {
+        v5_pa_1[k][j][i] = B[n1]*work2_6[n++];
+        v5_pa_2[k][j][i] = B[n2]*work2_6[n++];
+      }
+}
+ 
+/* ----------------------------------------------------------------------
+   interpolate from grid to get electric field & force on my particles 
+   for ik scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_c_ik()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+  FFT_SCALAR ekx,eky,ekz;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of E-field on particle
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double **f = atom->f;
+
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+    compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
+
+    ekx = eky = ekz = ZEROF;
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      z0 = rho1d[2][n];
+      for (m = nlower; m <= nupper; m++) {
+	my = m+ny;
+	y0 = z0*rho1d[1][m];
+	for (l = nlower; l <= nupper; l++) {
+	  mx = l+nx;
+	  x0 = y0*rho1d[0][l];
+	  ekx -= x0*vdx_brick[mz][my][mx];
+	  eky -= x0*vdy_brick[mz][my][mx];
+	  ekz -= x0*vdz_brick[mz][my][mx];
+	}
+      }
+    }
+
+    // convert E-field to force
+
+    const double qfactor = force->qqrd2e * scale * q[i];
+    f[i][0] += qfactor*ekx;
+    f[i][1] += qfactor*eky;
+    if (slabflag != 2) f[i][2] += qfactor*ekz;
+  }
+}
+/* ----------------------------------------------------------------------
+   interpolate from grid to get electric field & force on my particles
+   for ad scheme 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_c_ad()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz;
+  FFT_SCALAR ekx,eky,ekz;
+  double s1,s2,s3;
+  double sf = 0.0;
+
+  double *prd;
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+
+  double hx_inv = nx_pppm/xprd;
+  double hy_inv = ny_pppm/yprd;
+  double hz_inv = nz_pppm/zprd_slab;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of E-field on particle
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double **f = atom->f;
+
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+    compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
+    compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d);
+
+    ekx = eky = ekz = ZEROF;
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      for (m = nlower; m <= nupper; m++) {
+        my = m+ny;
+        for (l = nlower; l <= nupper; l++) {
+          mx = l+nx;
+          ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
+          eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
+          ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
+        }
+      }
+    }
+    ekx *= hx_inv;
+    eky *= hy_inv;
+    ekz *= hz_inv;
+    // convert E-field to force and substract self forces
+    const double qfactor = force->qqrd2e * scale;
+
+    s1 = x[i][0]*hx_inv;
+    s2 = x[i][1]*hy_inv;
+    s3 = x[i][2]*hz_inv;
+    sf = sf_coeff[0]*sin(2*MY_PI*s1);
+    sf += sf_coeff[1]*sin(4*MY_PI*s1);
+    sf *= 2*q[i]*q[i];
+    f[i][0] += qfactor*(ekx*q[i] - sf);
+
+    sf = sf_coeff[2]*sin(2*MY_PI*s2);
+    sf += sf_coeff[3]*sin(4*MY_PI*s2);
+    sf *= 2*q[i]*q[i];
+    f[i][1] += qfactor*(eky*q[i] - sf);
+
+
+    sf = sf_coeff[4]*sin(2*MY_PI*s3);
+    sf += sf_coeff[5]*sin(4*MY_PI*s3);
+    sf *= 2*q[i]*q[i];
+    if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get electric field & force on my particles 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_c_peratom()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+  FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of E-field on particle
+
+  double *q = atom->q;
+  double **x = atom->x;
+
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+    compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d);
+
+    u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      z0 = rho1d[2][n];
+      for (m = nlower; m <= nupper; m++) {
+	my = m+ny;
+	y0 = z0*rho1d[1][m];
+	for (l = nlower; l <= nupper; l++) {
+	  mx = l+nx;
+	  x0 = y0*rho1d[0][l];
+	  if (eflag_atom) u_pa += x0*u_brick[mz][my][mx];	
+	  if (vflag_atom) {
+            v0 += x0*v0_brick[mz][my][mx];
+            v1 += x0*v1_brick[mz][my][mx];
+            v2 += x0*v2_brick[mz][my][mx];
+            v3 += x0*v3_brick[mz][my][mx];
+            v4 += x0*v4_brick[mz][my][mx];
+            v5 += x0*v5_brick[mz][my][mx];
+          }
+	}
+      }
+    }
+
+    // convert E-field to force
+
+    const double qfactor = 0.5*force->qqrd2e * scale * q[i];
+
+    if (eflag_atom) eatom[i] += u_pa*qfactor;
+    if (vflag_atom) {
+      vatom[i][0] += v0*qfactor;
+      vatom[i][1] += v1*qfactor;
+      vatom[i][2] += v2*qfactor;
+      vatom[i][3] += v3*qfactor;
+      vatom[i][4] += v4*qfactor;
+      vatom[i][5] += v5*qfactor;
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get dispersion field & force on my particles
+   for geometric mixing rule 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_g_ik()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+  FFT_SCALAR ekx,eky,ekz;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of dispersion field on particle
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int type;
+  double lj;
+
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+    nx = part2grid_6[i][0];
+    ny = part2grid_6[i][1];
+    nz = part2grid_6[i][2];
+    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+
+    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+
+    ekx = eky = ekz = ZEROF;
+    for (n = nlower_6; n <= nupper_6; n++) {
+      mz = n+nz;
+      z0 = rho1d_6[2][n];
+      for (m = nlower_6; m <= nupper_6; m++) {
+	my = m+ny;
+	y0 = z0*rho1d_6[1][m];
+	for (l = nlower_6; l <= nupper_6; l++) {
+	  mx = l+nx;
+	  x0 = y0*rho1d_6[0][l];
+	  ekx -= x0*vdx_brick_g[mz][my][mx];
+	  eky -= x0*vdy_brick_g[mz][my][mx];
+	  ekz -= x0*vdz_brick_g[mz][my][mx];
+	}
+      }
+    }
+
+    // convert E-field to force
+    type = atom->type[i];
+    lj = B[type];
+    f[i][0] += lj*ekx;
+    f[i][1] += lj*eky;
+    if (slabflag != 2) f[i][2] += lj*ekz;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get dispersion field & force on my particles
+   for geometric mixing rule for ad scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_g_ad()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz;
+  FFT_SCALAR ekx,eky,ekz;
+  double s1,s2,s3;
+  double sf = 0.0;
+  double *prd;
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+
+  double hx_inv = nx_pppm_6/xprd;
+  double hy_inv = ny_pppm_6/yprd;
+  double hz_inv = nz_pppm_6/zprd_slab;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of dispersion field on particle
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int type;
+  double lj;
+
+  int nlocal = atom->nlocal;
+
+ 
+  for (i = 0; i < nlocal; i++) {
+    nx = part2grid_6[i][0];
+    ny = part2grid_6[i][1];
+    nz = part2grid_6[i][2];
+    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+
+    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+    compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
+
+
+    ekx = eky = ekz = ZEROF;
+    for (n = nlower_6; n <= nupper_6; n++) {
+      mz = n+nz;
+      for (m = nlower_6; m <= nupper_6; m++) {
+        my = m+ny;
+        for (l = nlower_6; l <= nupper_6; l++) {
+          mx = l+nx;
+          ekx += drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx];
+          eky += rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n]*u_brick_g[mz][my][mx];
+          ekz += rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n]*u_brick_g[mz][my][mx];
+        }
+      }
+    }
+    ekx *= hx_inv;
+    eky *= hy_inv;
+    ekz *= hz_inv;
+
+    // convert E-field to force
+    type = atom->type[i];
+    lj = B[type];
+
+    s1 = x[i][0]*hx_inv;
+    s2 = x[i][1]*hy_inv;
+    s3 = x[i][2]*hz_inv;
+
+    sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
+    sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
+    sf *= 2*lj*lj;
+    f[i][0] += ekx*lj - sf;
+
+    sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
+    sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
+    sf *= 2*lj*lj;
+    f[i][1] += eky*lj - sf;
+
+
+    sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
+    sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
+    sf *= 2*lj*lj;
+    if (slabflag != 2) f[i][2] += ekz*lj - sf;
+
+  }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get dispersion field & force on my particles
+   for geometric mixing rule for per atom quantities
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_g_peratom()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+  FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of dispersion field on particle
+
+  double **x = atom->x;
+  int type;
+  double lj;
+
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+    nx = part2grid_6[i][0];
+    ny = part2grid_6[i][1];
+    nz = part2grid_6[i][2];
+    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+
+    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+
+    u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
+    for (n = nlower_6; n <= nupper_6; n++) {
+      mz = n+nz;
+      z0 = rho1d_6[2][n];
+      for (m = nlower_6; m <= nupper_6; m++) {
+	my = m+ny;
+	y0 = z0*rho1d_6[1][m];
+	for (l = nlower_6; l <= nupper_6; l++) {
+	  mx = l+nx;
+	  x0 = y0*rho1d_6[0][l];
+	  if (eflag_atom) u_pa += x0*u_brick_g[mz][my][mx];	
+	  if (vflag_atom) {
+            v0 += x0*v0_brick_g[mz][my][mx];
+            v1 += x0*v1_brick_g[mz][my][mx];
+            v2 += x0*v2_brick_g[mz][my][mx];
+            v3 += x0*v3_brick_g[mz][my][mx];
+            v4 += x0*v4_brick_g[mz][my][mx];
+            v5 += x0*v5_brick_g[mz][my][mx];
+          }
+	}
+      }
+    }
+
+    // convert E-field to force
+    type = atom->type[i];
+    lj = B[type]*0.5;
+
+    if (eflag_atom) eatom[i] += u_pa*lj;
+    if (vflag_atom) {
+      vatom[i][0] += v0*lj;
+      vatom[i][1] += v1*lj;
+      vatom[i][2] += v2*lj;
+      vatom[i][3] += v3*lj;
+      vatom[i][4] += v4*lj;
+      vatom[i][5] += v5*lj;
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get dispersion field & force on my particles
+   for arithmetic mixing rule and ik scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_a_ik()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+  FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
+  FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
+  FFT_SCALAR ekx6, eky6, ekz6;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of dispersion field on particle
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int type;
+  double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
+
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+
+    nx = part2grid_6[i][0];
+    ny = part2grid_6[i][1];
+    nz = part2grid_6[i][2];
+    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+    ekx0 = eky0 = ekz0 = ZEROF;
+    ekx1 = eky1 = ekz1 = ZEROF;
+    ekx2 = eky2 = ekz2 = ZEROF;
+    ekx3 = eky3 = ekz3 = ZEROF;
+    ekx4 = eky4 = ekz4 = ZEROF;
+    ekx5 = eky5 = ekz5 = ZEROF;
+    ekx6 = eky6 = ekz6 = ZEROF;
+    for (n = nlower_6; n <= nupper_6; n++) {
+      mz = n+nz;
+      z0 = rho1d_6[2][n];
+      for (m = nlower_6; m <= nupper_6; m++) {
+	my = m+ny;
+	y0 = z0*rho1d_6[1][m];
+	for (l = nlower_6; l <= nupper_6; l++) {
+	  mx = l+nx;
+	  x0 = y0*rho1d_6[0][l];
+	  ekx0 -= x0*vdx_brick_a0[mz][my][mx];
+	  eky0 -= x0*vdy_brick_a0[mz][my][mx];
+	  ekz0 -= x0*vdz_brick_a0[mz][my][mx];
+	  ekx1 -= x0*vdx_brick_a1[mz][my][mx];
+	  eky1 -= x0*vdy_brick_a1[mz][my][mx];
+	  ekz1 -= x0*vdz_brick_a1[mz][my][mx];
+          ekx2 -= x0*vdx_brick_a2[mz][my][mx];
+	  eky2 -= x0*vdy_brick_a2[mz][my][mx];
+	  ekz2 -= x0*vdz_brick_a2[mz][my][mx];
+	  ekx3 -= x0*vdx_brick_a3[mz][my][mx];
+	  eky3 -= x0*vdy_brick_a3[mz][my][mx];
+	  ekz3 -= x0*vdz_brick_a3[mz][my][mx];
+	  ekx4 -= x0*vdx_brick_a4[mz][my][mx];
+	  eky4 -= x0*vdy_brick_a4[mz][my][mx];
+	  ekz4 -= x0*vdz_brick_a4[mz][my][mx];
+          ekx5 -= x0*vdx_brick_a5[mz][my][mx];
+	  eky5 -= x0*vdy_brick_a5[mz][my][mx];
+	  ekz5 -= x0*vdz_brick_a5[mz][my][mx];
+          ekx6 -= x0*vdx_brick_a6[mz][my][mx];
+	  eky6 -= x0*vdy_brick_a6[mz][my][mx];
+	  ekz6 -= x0*vdz_brick_a6[mz][my][mx];
+	}
+      }
+    }
+    // convert D-field to force
+    type = atom->type[i];
+    lj0 = B[7*type+6];
+    lj1 = B[7*type+5];
+    lj2 = B[7*type+4];
+    lj3 = B[7*type+3];
+    lj4 = B[7*type+2];
+    lj5 = B[7*type+1];
+    lj6 = B[7*type];
+    f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6;
+    f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6;
+    if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get dispersion field & force on my particles
+   for arithmetic mixing rule for the ad scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_a_ad()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+  FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2;
+  FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5;
+  FFT_SCALAR ekx6, eky6, ekz6;
+
+  double s1,s2,s3;
+  double sf = 0.0;
+  double *prd;
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+
+  double hx_inv = nx_pppm_6/xprd;
+  double hy_inv = ny_pppm_6/yprd;
+  double hz_inv = nz_pppm_6/zprd_slab;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of dispersion field on particle
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int type;
+  double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
+
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+
+    nx = part2grid_6[i][0];
+    ny = part2grid_6[i][1];
+    nz = part2grid_6[i][2];
+    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+
+    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+    compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
+
+    ekx0 = eky0 = ekz0 = ZEROF;
+    ekx1 = eky1 = ekz1 = ZEROF;
+    ekx2 = eky2 = ekz2 = ZEROF;
+    ekx3 = eky3 = ekz3 = ZEROF;
+    ekx4 = eky4 = ekz4 = ZEROF;
+    ekx5 = eky5 = ekz5 = ZEROF;
+    ekx6 = eky6 = ekz6 = ZEROF;
+    for (n = nlower_6; n <= nupper_6; n++) {
+      mz = n+nz;
+      for (m = nlower_6; m <= nupper_6; m++) {
+	my = m+ny;
+	for (l = nlower_6; l <= nupper_6; l++) {
+	  mx = l+nx;
+          x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n];
+          y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n];
+          z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n];
+
+          ekx0 += x0*u_brick_a0[mz][my][mx];
+          eky0 += y0*u_brick_a0[mz][my][mx];
+          ekz0 += z0*u_brick_a0[mz][my][mx];
+
+          ekx1 += x0*u_brick_a1[mz][my][mx];
+          eky1 += y0*u_brick_a1[mz][my][mx];
+          ekz1 += z0*u_brick_a1[mz][my][mx];
+
+          ekx2 += x0*u_brick_a2[mz][my][mx];
+          eky2 += y0*u_brick_a2[mz][my][mx];
+          ekz2 += z0*u_brick_a2[mz][my][mx];
+
+          ekx3 += x0*u_brick_a3[mz][my][mx];
+          eky3 += y0*u_brick_a3[mz][my][mx];
+          ekz3 += z0*u_brick_a3[mz][my][mx];
+
+          ekx4 += x0*u_brick_a4[mz][my][mx];
+          eky4 += y0*u_brick_a4[mz][my][mx];
+          ekz4 += z0*u_brick_a4[mz][my][mx];
+
+          ekx5 += x0*u_brick_a5[mz][my][mx];
+          eky5 += y0*u_brick_a5[mz][my][mx];
+          ekz5 += z0*u_brick_a5[mz][my][mx];
+
+          ekx6 += x0*u_brick_a6[mz][my][mx];
+          eky6 += y0*u_brick_a6[mz][my][mx];
+          ekz6 += z0*u_brick_a6[mz][my][mx];
+	}
+      }
+    }
+
+    ekx0 *= hx_inv;
+    eky0 *= hy_inv;
+    ekz0 *= hz_inv;
+
+    ekx1 *= hx_inv;
+    eky1 *= hy_inv;
+    ekz1 *= hz_inv;
+
+    ekx2 *= hx_inv;
+    eky2 *= hy_inv;
+    ekz2 *= hz_inv;
+
+    ekx3 *= hx_inv;
+    eky3 *= hy_inv;
+    ekz3 *= hz_inv;
+
+    ekx4 *= hx_inv;
+    eky4 *= hy_inv;
+    ekz4 *= hz_inv;
+
+    ekx5 *= hx_inv;
+    eky5 *= hy_inv;
+    ekz5 *= hz_inv;
+
+    ekx6 *= hx_inv;
+    eky6 *= hy_inv;
+    ekz6 *= hz_inv;
+
+    // convert D-field to force
+    type = atom->type[i];
+    lj0 = B[7*type+6];
+    lj1 = B[7*type+5];
+    lj2 = B[7*type+4];
+    lj3 = B[7*type+3];
+    lj4 = B[7*type+2];
+    lj5 = B[7*type+1];
+    lj6 = B[7*type];
+
+    s1 = x[i][0]*hx_inv;
+    s2 = x[i][1]*hy_inv;
+    s3 = x[i][2]*hz_inv;
+
+    sf = sf_coeff_6[0]*sin(2*MY_PI*s1);
+    sf += sf_coeff_6[1]*sin(4*MY_PI*s1);
+    sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
+    f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf;
+
+    sf = sf_coeff_6[2]*sin(2*MY_PI*s2);
+    sf += sf_coeff_6[3]*sin(4*MY_PI*s2);
+    sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
+    f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf;
+
+    sf = sf_coeff_6[4]*sin(2*MY_PI*s3);
+    sf += sf_coeff_6[5]*sin(4*MY_PI*s3);
+    sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3;
+    if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get dispersion field & force on my particles
+   for arithmetic mixing rule for per atom quantities
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_a_peratom()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+  FFT_SCALAR u_pa0,v00,v10,v20,v30,v40,v50;
+  FFT_SCALAR u_pa1,v01,v11,v21,v31,v41,v51;
+  FFT_SCALAR u_pa2,v02,v12,v22,v32,v42,v52;
+  FFT_SCALAR u_pa3,v03,v13,v23,v33,v43,v53;
+  FFT_SCALAR u_pa4,v04,v14,v24,v34,v44,v54;
+  FFT_SCALAR u_pa5,v05,v15,v25,v35,v45,v55;
+  FFT_SCALAR u_pa6,v06,v16,v26,v36,v46,v56;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of dispersion field on particle
+
+  double **x = atom->x;
+  int type;
+  double lj0, lj1, lj2, lj3, lj4, lj5, lj6;
+
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+
+    nx = part2grid_6[i][0];
+    ny = part2grid_6[i][1];
+    nz = part2grid_6[i][2];
+    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+
+    u_pa0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF;
+    u_pa1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF;
+    u_pa2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF;
+    u_pa3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF;
+    u_pa4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF;
+    u_pa5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF;
+    u_pa6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF;
+    for (n = nlower_6; n <= nupper_6; n++) {
+      mz = n+nz;
+      z0 = rho1d_6[2][n];
+      for (m = nlower_6; m <= nupper_6; m++) {
+	my = m+ny;
+	y0 = z0*rho1d_6[1][m];
+	for (l = nlower_6; l <= nupper_6; l++) {
+	  mx = l+nx;
+	  x0 = y0*rho1d_6[0][l];
+          if (eflag_atom) {
+            u_pa0 += x0*u_brick_a0[mz][my][mx];
+            u_pa1 += x0*u_brick_a1[mz][my][mx];
+            u_pa2 += x0*u_brick_a2[mz][my][mx];
+            u_pa3 += x0*u_brick_a3[mz][my][mx];
+            u_pa4 += x0*u_brick_a4[mz][my][mx];
+            u_pa5 += x0*u_brick_a5[mz][my][mx];
+            u_pa6 += x0*u_brick_a6[mz][my][mx];
+	  }
+          if (vflag_atom) {
+            v00 += x0*v0_brick_a0[mz][my][mx];
+            v10 += x0*v1_brick_a0[mz][my][mx];
+            v20 += x0*v2_brick_a0[mz][my][mx];
+            v30 += x0*v3_brick_a0[mz][my][mx];
+            v40 += x0*v4_brick_a0[mz][my][mx];
+            v50 += x0*v5_brick_a0[mz][my][mx];
+            v01 += x0*v0_brick_a1[mz][my][mx];
+            v11 += x0*v1_brick_a1[mz][my][mx];
+            v21 += x0*v2_brick_a1[mz][my][mx];
+            v31 += x0*v3_brick_a1[mz][my][mx];
+            v41 += x0*v4_brick_a1[mz][my][mx];
+            v51 += x0*v5_brick_a1[mz][my][mx];
+            v02 += x0*v0_brick_a2[mz][my][mx];
+            v12 += x0*v1_brick_a2[mz][my][mx];
+            v22 += x0*v2_brick_a2[mz][my][mx];
+            v32 += x0*v3_brick_a2[mz][my][mx];
+            v42 += x0*v4_brick_a2[mz][my][mx];
+            v52 += x0*v5_brick_a2[mz][my][mx];
+            v03 += x0*v0_brick_a3[mz][my][mx];
+            v13 += x0*v1_brick_a3[mz][my][mx];
+            v23 += x0*v2_brick_a3[mz][my][mx];
+            v33 += x0*v3_brick_a3[mz][my][mx];
+            v43 += x0*v4_brick_a3[mz][my][mx];
+            v53 += x0*v5_brick_a3[mz][my][mx];
+            v04 += x0*v0_brick_a4[mz][my][mx];
+            v14 += x0*v1_brick_a4[mz][my][mx];
+            v24 += x0*v2_brick_a4[mz][my][mx];
+            v34 += x0*v3_brick_a4[mz][my][mx];
+            v44 += x0*v4_brick_a4[mz][my][mx];
+            v54 += x0*v5_brick_a4[mz][my][mx];
+            v05 += x0*v0_brick_a5[mz][my][mx];
+            v15 += x0*v1_brick_a5[mz][my][mx];
+            v25 += x0*v2_brick_a5[mz][my][mx];
+            v35 += x0*v3_brick_a5[mz][my][mx];
+            v45 += x0*v4_brick_a5[mz][my][mx];
+            v55 += x0*v5_brick_a5[mz][my][mx];
+            v06 += x0*v0_brick_a6[mz][my][mx];
+            v16 += x0*v1_brick_a6[mz][my][mx];
+            v26 += x0*v2_brick_a6[mz][my][mx];
+            v36 += x0*v3_brick_a6[mz][my][mx];
+            v46 += x0*v4_brick_a6[mz][my][mx];
+            v56 += x0*v5_brick_a6[mz][my][mx];
+          }
+	}
+      }
+    }
+    // convert D-field to force
+    type = atom->type[i];
+    lj0 = B[7*type+6]*0.5;
+    lj1 = B[7*type+5]*0.5;
+    lj2 = B[7*type+4]*0.5;
+    lj3 = B[7*type+3]*0.5;
+    lj4 = B[7*type+2]*0.5;
+    lj5 = B[7*type+1]*0.5;
+    lj6 = B[7*type]*0.5;
+
+ 
+    if (eflag_atom) 
+      eatom[i] += u_pa0*lj0 + u_pa1*lj1 + u_pa2*lj2 + 
+        u_pa3*lj3 + u_pa4*lj4 + u_pa5*lj5 + u_pa6*lj6;
+    if (vflag_atom) {
+      vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 + 
+        v04*lj4 + v05*lj5 + v06*lj6;
+      vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 + 
+        v14*lj4 + v15*lj5 + v16*lj6;
+      vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 + 
+        v24*lj4 + v25*lj5 + v26*lj6;
+      vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 + 
+        v34*lj4 + v35*lj5 + v36*lj6;
+      vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 + 
+        v44*lj4 + v45*lj5 + v46*lj6;
+      vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 + 
+        v54*lj4 + v55*lj5 + v56*lj6;
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get dispersion field & force on my particles
+   for arithmetic mixing rule and ik scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_none_ik()
+{
+  int i,k,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+  FFT_SCALAR *ekx, *eky, *ekz;
+
+  ekx = new FFT_SCALAR[nsplit];
+  eky = new FFT_SCALAR[nsplit];
+  ekz = new FFT_SCALAR[nsplit];
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of dispersion field on particle
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int type;
+  double lj;
+
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+
+    nx = part2grid_6[i][0];
+    ny = part2grid_6[i][1];
+    nz = part2grid_6[i][2];
+    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+    for (k = 0; k < nsplit; k++)
+      ekx[k] = eky[k] = ekz[k] = ZEROF;
+    for (n = nlower_6; n <= nupper_6; n++) {
+      mz = n+nz;
+      z0 = rho1d_6[2][n];
+      for (m = nlower_6; m <= nupper_6; m++) {
+	my = m+ny;
+	y0 = z0*rho1d_6[1][m];
+	for (l = nlower_6; l <= nupper_6; l++) {
+	  mx = l+nx;
+	  x0 = y0*rho1d_6[0][l];
+          for (k = 0; k < nsplit; k++) {
+	    ekx[k] -= x0*vdx_brick_none[k][mz][my][mx];
+	    eky[k] -= x0*vdy_brick_none[k][mz][my][mx];
+	    ekz[k] -= x0*vdz_brick_none[k][mz][my][mx];
+          }
+	}
+      }
+    }
+    // convert D-field to force
+    type = atom->type[i];
+    for (k = 0; k < nsplit; k++) {
+      lj = B[nsplit*type + k];
+      f[i][0] += lj*ekx[k];
+      f[i][1] +=lj*eky[k];
+      if (slabflag != 2) f[i][2] +=lj*ekz[k];
+    }
+  }
+
+  delete [] ekx;
+  delete [] eky;
+  delete [] ekz;
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get dispersion field & force on my particles
+   for arithmetic mixing rule for the ad scheme
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_none_ad()
+{
+  int i,k,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+  FFT_SCALAR *ekx, *eky, *ekz;
+
+  ekx = new FFT_SCALAR[nsplit];
+  eky = new FFT_SCALAR[nsplit];
+  ekz = new FFT_SCALAR[nsplit];
+
+
+  double s1,s2,s3;
+  double sf1,sf2,sf3;
+  double sf = 0.0;
+  double *prd;
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+
+  double hx_inv = nx_pppm_6/xprd;
+  double hy_inv = ny_pppm_6/yprd;
+  double hz_inv = nz_pppm_6/zprd_slab;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of dispersion field on particle
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int type;
+  double lj;
+
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+
+    nx = part2grid_6[i][0];
+    ny = part2grid_6[i][1];
+    nz = part2grid_6[i][2];
+    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+
+    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+    compute_drho1d(dx,dy,dz, order_6, drho_coeff_6, drho1d_6);
+
+    for (k = 0; k < nsplit; k++)
+      ekx[k] = eky[k] = ekz[k] = ZEROF;
+
+    for (n = nlower_6; n <= nupper_6; n++) {
+      mz = n+nz;
+      for (m = nlower_6; m <= nupper_6; m++) {
+	my = m+ny;
+	for (l = nlower_6; l <= nupper_6; l++) {
+	  mx = l+nx;
+          x0 = drho1d_6[0][l]*rho1d_6[1][m]*rho1d_6[2][n];
+          y0 = rho1d_6[0][l]*drho1d_6[1][m]*rho1d_6[2][n];
+          z0 = rho1d_6[0][l]*rho1d_6[1][m]*drho1d_6[2][n];
+          
+          for (k = 0; k < nsplit; k++) {
+            ekx[k] += x0*u_brick_none[k][mz][my][mx];
+            eky[k] += y0*u_brick_none[k][mz][my][mx];
+            ekz[k] += z0*u_brick_none[k][mz][my][mx];
+          }
+	}
+      }
+    }
+
+    for (k = 0; k < nsplit; k++) {
+      ekx[k] *= hx_inv;
+      eky[k] *= hy_inv;
+      ekz[k] *= hz_inv;
+    }
+
+    // convert D-field to force
+    type = atom->type[i];
+
+    s1 = x[i][0]*hx_inv;
+    s2 = x[i][1]*hy_inv;
+    s3 = x[i][2]*hz_inv;
+
+    sf1 = sf_coeff_6[0]*sin(2*MY_PI*s1);
+    sf1 += sf_coeff_6[1]*sin(4*MY_PI*s1);
+
+    sf2 = sf_coeff_6[2]*sin(2*MY_PI*s2);
+    sf2 += sf_coeff_6[3]*sin(4*MY_PI*s2);
+
+    sf3 = sf_coeff_6[4]*sin(2*MY_PI*s3);
+    sf3 += sf_coeff_6[5]*sin(4*MY_PI*s3);
+
+    for (k = 0; k < nsplit; k++) {
+      lj = B[nsplit*type + k];
+
+      sf = sf1*B[k]*2*lj*lj;
+      f[i][0] += lj*ekx[k] - sf;
+
+
+      sf = sf2*B[k]*2*lj*lj;
+      f[i][1] += lj*eky[k] - sf;
+
+      sf = sf3*B[k]*2*lj*lj;
+      if (slabflag != 2) f[i][2] += lj*ekz[k] - sf;
+    }
+  }
+
+  delete [] ekx;
+  delete [] eky;
+  delete [] ekz;
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get dispersion field & force on my particles
+   for arithmetic mixing rule for per atom quantities
+------------------------------------------------------------------------- */
+
+void PPPMDisp::fieldforce_none_peratom()
+{
+  int i,k,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+  FFT_SCALAR *u_pa,*v0,*v1,*v2,*v3,*v4,*v5;
+  
+  u_pa = new FFT_SCALAR[nsplit];
+  v0 = new FFT_SCALAR[nsplit];
+  v1 = new FFT_SCALAR[nsplit];
+  v2 = new FFT_SCALAR[nsplit];
+  v3 = new FFT_SCALAR[nsplit];
+  v4 = new FFT_SCALAR[nsplit];
+  v5 = new FFT_SCALAR[nsplit];
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of dispersion field on particle
+
+  double **x = atom->x;
+  int type;
+  double lj;
+
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+
+    nx = part2grid_6[i][0];
+    ny = part2grid_6[i][1];
+    nz = part2grid_6[i][2];
+    dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6;
+    dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6;
+    dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6;
+    compute_rho1d(dx,dy,dz, order_6, rho_coeff_6, rho1d_6);
+
+    for (k = 0; k < nsplit; k++) 
+      u_pa[k] = v0[k] = v1[k] = v2[k] = v3[k] = v4[k] = v5[k] = ZEROF;
+ 
+    for (n = nlower_6; n <= nupper_6; n++) {
+      mz = n+nz;
+      z0 = rho1d_6[2][n];
+      for (m = nlower_6; m <= nupper_6; m++) {
+	my = m+ny;
+	y0 = z0*rho1d_6[1][m];
+	for (l = nlower_6; l <= nupper_6; l++) {
+	  mx = l+nx;
+	  x0 = y0*rho1d_6[0][l];
+          if (eflag_atom) {
+            for (k = 0; k < nsplit; k++)
+              u_pa[k] += x0*u_brick_none[k][mz][my][mx];
+	  }
+          if (vflag_atom) {
+            for (k = 0; k < nsplit; k++) {
+              v0[k] += x0*v0_brick_none[k][mz][my][mx];
+              v1[k] += x0*v1_brick_none[k][mz][my][mx];
+              v2[k] += x0*v2_brick_none[k][mz][my][mx];
+              v3[k] += x0*v3_brick_none[k][mz][my][mx];
+              v4[k] += x0*v4_brick_none[k][mz][my][mx];
+              v5[k] += x0*v5_brick_none[k][mz][my][mx];
+            }
+          }
+	}
+      }
+    }
+    // convert D-field to force
+    type = atom->type[i];
+    for (k = 0; k < nsplit; k++) {
+      lj = B[nsplit*type + k]*0.5;
+ 
+      if (eflag_atom) {
+        eatom[i] += u_pa[k]*lj;
+      }
+      if (vflag_atom) {
+        vatom[i][0] += v0[k]*lj;
+        vatom[i][1] += v1[k]*lj;
+        vatom[i][2] += v2[k]*lj;
+        vatom[i][3] += v3[k]*lj;
+        vatom[i][4] += v4[k]*lj;
+        vatom[i][5] += v5[k]*lj;
+      }
+    }
+  }
+
+  delete [] u_pa;
+  delete [] v0;
+  delete [] v1;
+  delete [] v2;
+  delete [] v3;
+  delete [] v4;
+  delete [] v5;
+}
+
+/* ----------------------------------------------------------------------
+   pack values to buf to send to another proc
+------------------------------------------------------------------------- */
+
+void PPPMDisp::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
+{
+  int n = 0;
+
+  switch (flag) {
+
+  // Coulomb interactions
+
+  case FORWARD_IK: {
+    FFT_SCALAR *xsrc = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *ysrc = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *zsrc = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++) {
+      buf[n++] = xsrc[list[i]];
+      buf[n++] = ysrc[list[i]];
+      buf[n++] = zsrc[list[i]];
+    }
+    break;
+  }
+
+  case FORWARD_AD: {
+    FFT_SCALAR *src = &u_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++)
+      buf[i] = src[list[i]];
+    break;
+  }
+
+  case FORWARD_IK_PERATOM: {
+    FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++) {
+      if (eflag_atom) buf[n++] = esrc[list[i]];
+      if (vflag_atom) {
+        buf[n++] = v0src[list[i]];
+        buf[n++] = v1src[list[i]];
+        buf[n++] = v2src[list[i]];
+        buf[n++] = v3src[list[i]];
+        buf[n++] = v4src[list[i]];
+        buf[n++] = v5src[list[i]];
+      }
+    }
+    break;
+  }
+
+  case FORWARD_AD_PERATOM: {
+    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++) {
+      buf[n++] = v0src[list[i]];
+      buf[n++] = v1src[list[i]];
+      buf[n++] = v2src[list[i]];
+      buf[n++] = v3src[list[i]];
+      buf[n++] = v4src[list[i]];
+      buf[n++] = v5src[list[i]];
+    }
+    break;
+  }
+
+  // Dispersion interactions, geometric mixing
+
+  case FORWARD_IK_G: {
+    FFT_SCALAR *xsrc = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *ysrc = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *zsrc = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    for (int i = 0; i < nlist; i++) {
+      buf[n++] = xsrc[list[i]];
+      buf[n++] = ysrc[list[i]];
+      buf[n++] = zsrc[list[i]];
+    }
+    break;
+  }
+
+  case FORWARD_AD_G: {
+    FFT_SCALAR *src = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    for (int i = 0; i < nlist; i++)
+      buf[i] = src[list[i]];
+    break;
+  }
+
+  case FORWARD_IK_PERATOM_G: {
+    FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    for (int i = 0; i < nlist; i++) {
+      if (eflag_atom) buf[n++] = esrc[list[i]];
+      if (vflag_atom) {
+        buf[n++] = v0src[list[i]];
+        buf[n++] = v1src[list[i]];
+        buf[n++] = v2src[list[i]];
+        buf[n++] = v3src[list[i]];
+        buf[n++] = v4src[list[i]];
+        buf[n++] = v5src[list[i]];
+      }
+    }
+    break;
+  }
+
+  case FORWARD_AD_PERATOM_G: {
+    FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    for (int i = 0; i < nlist; i++) {
+      buf[n++] = v0src[list[i]];
+      buf[n++] = v1src[list[i]];
+      buf[n++] = v2src[list[i]];
+      buf[n++] = v3src[list[i]];
+      buf[n++] = v4src[list[i]];
+      buf[n++] = v5src[list[i]];
+    }
+    break;
+  }
+
+  // Dispersion interactions, arithmetic mixing
+
+  case FORWARD_IK_A: {
+    FFT_SCALAR *xsrc0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *ysrc0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *zsrc0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *xsrc1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *ysrc1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *zsrc1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *xsrc2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *ysrc2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *zsrc2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *xsrc3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *ysrc3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *zsrc3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *xsrc4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *ysrc4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *zsrc4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *xsrc5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *ysrc5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *zsrc5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *xsrc6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *ysrc6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *zsrc6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    for (int i = 0; i < nlist; i++) {
+      buf[n++] = xsrc0[list[i]];
+      buf[n++] = ysrc0[list[i]];
+      buf[n++] = zsrc0[list[i]];
+
+      buf[n++] = xsrc1[list[i]];
+      buf[n++] = ysrc1[list[i]];
+      buf[n++] = zsrc1[list[i]];
+
+      buf[n++] = xsrc2[list[i]];
+      buf[n++] = ysrc2[list[i]];
+      buf[n++] = zsrc2[list[i]];
+
+      buf[n++] = xsrc3[list[i]];
+      buf[n++] = ysrc3[list[i]];
+      buf[n++] = zsrc3[list[i]];
+
+      buf[n++] = xsrc4[list[i]];
+      buf[n++] = ysrc4[list[i]];
+      buf[n++] = zsrc4[list[i]];
+
+      buf[n++] = xsrc5[list[i]];
+      buf[n++] = ysrc5[list[i]];
+      buf[n++] = zsrc5[list[i]];
+
+      buf[n++] = xsrc6[list[i]];
+      buf[n++] = ysrc6[list[i]];
+      buf[n++] = zsrc6[list[i]];
+    }
+    break;
+  }
+
+  case FORWARD_AD_A: {
+    FFT_SCALAR *src0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *src1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *src2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *src3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *src4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *src5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *src6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    for (int i = 0; i < nlist; i++) {
+      buf[n++] = src0[list[i]];
+      buf[n++] = src1[list[i]];
+      buf[n++] = src2[list[i]];
+      buf[n++] = src3[list[i]];
+      buf[n++] = src4[list[i]];
+      buf[n++] = src5[list[i]];
+      buf[n++] = src6[list[i]];
+    }
+    break;
+  }
+
+  case FORWARD_IK_PERATOM_A: {
+    FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    for (int i = 0; i < nlist; i++) {
+      if (eflag_atom) {
+        buf[n++] = esrc0[list[i]];
+        buf[n++] = esrc1[list[i]];
+        buf[n++] = esrc2[list[i]];
+        buf[n++] = esrc3[list[i]];
+        buf[n++] = esrc4[list[i]];
+        buf[n++] = esrc5[list[i]];
+        buf[n++] = esrc6[list[i]];
+      }
+      if (vflag_atom) {
+        buf[n++] = v0src0[list[i]];
+        buf[n++] = v1src0[list[i]];
+        buf[n++] = v2src0[list[i]];
+        buf[n++] = v3src0[list[i]];
+        buf[n++] = v4src0[list[i]];
+        buf[n++] = v5src0[list[i]];
+
+        buf[n++] = v0src1[list[i]];
+        buf[n++] = v1src1[list[i]];
+        buf[n++] = v2src1[list[i]];
+        buf[n++] = v3src1[list[i]];
+        buf[n++] = v4src1[list[i]];
+        buf[n++] = v5src1[list[i]];
+
+        buf[n++] = v0src2[list[i]];
+        buf[n++] = v1src2[list[i]];
+        buf[n++] = v2src2[list[i]];
+        buf[n++] = v3src2[list[i]];
+        buf[n++] = v4src2[list[i]];
+        buf[n++] = v5src2[list[i]];
+
+        buf[n++] = v0src3[list[i]];
+        buf[n++] = v1src3[list[i]];
+        buf[n++] = v2src3[list[i]];
+        buf[n++] = v3src3[list[i]];
+        buf[n++] = v4src3[list[i]];
+        buf[n++] = v5src3[list[i]];
+
+        buf[n++] = v0src4[list[i]];
+        buf[n++] = v1src4[list[i]];
+        buf[n++] = v2src4[list[i]];
+        buf[n++] = v3src4[list[i]];
+        buf[n++] = v4src4[list[i]];
+        buf[n++] = v5src4[list[i]];
+
+        buf[n++] = v0src5[list[i]];
+        buf[n++] = v1src5[list[i]];
+        buf[n++] = v2src5[list[i]];
+        buf[n++] = v3src5[list[i]];
+        buf[n++] = v4src5[list[i]];
+        buf[n++] = v5src5[list[i]];
+
+        buf[n++] = v0src6[list[i]];
+        buf[n++] = v1src6[list[i]];
+        buf[n++] = v2src6[list[i]];
+        buf[n++] = v3src6[list[i]];
+        buf[n++] = v4src6[list[i]];
+        buf[n++] = v5src6[list[i]];
+      }
+    }
+    break;
+  }
+
+  case FORWARD_AD_PERATOM_A: {
+    FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    for (int i = 0; i < nlist; i++) {
+      buf[n++] = v0src0[list[i]];
+      buf[n++] = v1src0[list[i]];
+      buf[n++] = v2src0[list[i]];
+      buf[n++] = v3src0[list[i]];
+      buf[n++] = v4src0[list[i]];
+      buf[n++] = v5src0[list[i]];
+
+      buf[n++] = v0src1[list[i]];
+      buf[n++] = v1src1[list[i]];
+      buf[n++] = v2src1[list[i]];
+      buf[n++] = v3src1[list[i]];
+      buf[n++] = v4src1[list[i]];
+      buf[n++] = v5src1[list[i]];
+
+      buf[n++] = v0src2[list[i]];
+      buf[n++] = v1src2[list[i]];
+      buf[n++] = v2src2[list[i]];
+      buf[n++] = v3src2[list[i]];
+      buf[n++] = v4src2[list[i]];
+      buf[n++] = v5src2[list[i]];
+
+      buf[n++] = v0src3[list[i]];
+      buf[n++] = v1src3[list[i]];
+      buf[n++] = v2src3[list[i]];
+      buf[n++] = v3src3[list[i]];
+      buf[n++] = v4src3[list[i]];
+      buf[n++] = v5src3[list[i]];
+
+      buf[n++] = v0src4[list[i]];
+      buf[n++] = v1src4[list[i]];
+      buf[n++] = v2src4[list[i]];
+      buf[n++] = v3src4[list[i]];
+      buf[n++] = v4src4[list[i]];
+      buf[n++] = v5src4[list[i]];
+
+      buf[n++] = v0src5[list[i]];
+      buf[n++] = v1src5[list[i]];
+      buf[n++] = v2src5[list[i]];
+      buf[n++] = v3src5[list[i]];
+      buf[n++] = v4src5[list[i]];
+      buf[n++] = v5src5[list[i]];
+
+      buf[n++] = v0src6[list[i]];
+      buf[n++] = v1src6[list[i]];
+      buf[n++] = v2src6[list[i]];
+      buf[n++] = v3src6[list[i]];
+      buf[n++] = v4src6[list[i]];
+      buf[n++] = v5src6[list[i]];
+    }
+    break;
+  }
+
+  // Dispersion interactions, no mixing
+
+  case FORWARD_IK_NONE: {
+    for (int k = 0; k < nsplit_alloc; k++) {
+      FFT_SCALAR *xsrc = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *ysrc = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *zsrc = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      for (int i = 0; i < nlist; i++) {
+        buf[n++] = xsrc[list[i]];
+        buf[n++] = ysrc[list[i]];
+        buf[n++] = zsrc[list[i]];
+      }
+    }
+    break;
+  }
+
+  case FORWARD_AD_NONE: {
+    for (int k = 0; k < nsplit_alloc; k++) {
+      FFT_SCALAR *src = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      for (int i = 0; i < nlist; i++)
+        buf[n++] = src[list[i]];
+    }
+    break;
+  }
+
+  case FORWARD_IK_PERATOM_NONE: {
+    for (int k = 0; k < nsplit_alloc; k++) {
+      FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      for (int i = 0; i < nlist; i++) {
+        if (eflag_atom) buf[n++] = esrc[list[i]];
+        if (vflag_atom) {
+          buf[n++] = v0src[list[i]];
+          buf[n++] = v1src[list[i]];
+          buf[n++] = v2src[list[i]];
+          buf[n++] = v3src[list[i]];
+          buf[n++] = v4src[list[i]];
+          buf[n++] = v5src[list[i]];
+        }
+      }
+    }
+    break;
+  }
+
+  case FORWARD_AD_PERATOM_NONE: {
+    for (int k = 0; k < nsplit_alloc; k++) {
+      FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      for (int i = 0; i < nlist; i++) {
+        buf[n++] = v0src[list[i]];
+        buf[n++] = v1src[list[i]];
+        buf[n++] = v2src[list[i]];
+        buf[n++] = v3src[list[i]];
+        buf[n++] = v4src[list[i]];
+        buf[n++] = v5src[list[i]];
+      }
+    }
+    break;
+  }
+
+  }
+}
+
+/* ----------------------------------------------------------------------
+   unpack another proc's own values from buf and set own ghost values
+------------------------------------------------------------------------- */
+
+void PPPMDisp::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
+{
+  int n = 0;
+
+  switch (flag) {
+
+  // Coulomb interactions
+
+  case FORWARD_IK: {
+    FFT_SCALAR *xdest = &vdx_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *ydest = &vdy_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *zdest = &vdz_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++) {
+      xdest[list[i]] = buf[n++];
+      ydest[list[i]] = buf[n++];
+      zdest[list[i]] = buf[n++];
+    }
+    break;
+  }
+
+  case FORWARD_AD: {
+    FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++)
+      dest[list[i]] = buf[n++];
+    break;
+  }
+
+  case FORWARD_IK_PERATOM: {
+    FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++) {
+      if (eflag_atom) esrc[list[i]] = buf[n++];
+      if (vflag_atom) {
+        v0src[list[i]] = buf[n++];
+        v1src[list[i]] = buf[n++];
+        v2src[list[i]] = buf[n++];
+        v3src[list[i]] = buf[n++];
+        v4src[list[i]] = buf[n++];
+        v5src[list[i]] = buf[n++];
+      }
+    }
+    break;
+  }
+
+  case FORWARD_AD_PERATOM: {
+    FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v1src = &v1_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v2src = &v2_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v3src = &v3_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v4src = &v4_brick[nzlo_out][nylo_out][nxlo_out];
+    FFT_SCALAR *v5src = &v5_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++) {
+      v0src[list[i]] = buf[n++];
+      v1src[list[i]] = buf[n++];
+      v2src[list[i]] = buf[n++];
+      v3src[list[i]] = buf[n++];
+      v4src[list[i]] = buf[n++];
+      v5src[list[i]] = buf[n++];
+    }
+    break;
+  }
+
+  // Disperion interactions, geometric mixing
+
+  case FORWARD_IK_G: {
+    FFT_SCALAR *xdest = &vdx_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *ydest = &vdy_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *zdest = &vdz_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    for (int i = 0; i < nlist; i++) {
+      xdest[list[i]] = buf[n++];
+      ydest[list[i]] = buf[n++];
+      zdest[list[i]] = buf[n++];
+    }
+    break;
+  }
+
+  case FORWARD_AD_G: {
+    FFT_SCALAR *dest = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    for (int i = 0; i < nlist; i++)
+      dest[list[i]] = buf[n++];
+    break;
+  }
+
+  case FORWARD_IK_PERATOM_G: {
+    FFT_SCALAR *esrc = &u_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    for (int i = 0; i < nlist; i++) {
+      if (eflag_atom) esrc[list[i]] = buf[n++];
+      if (vflag_atom) {
+        v0src[list[i]] = buf[n++];
+        v1src[list[i]] = buf[n++];
+        v2src[list[i]] = buf[n++];
+        v3src[list[i]] = buf[n++];
+        v4src[list[i]] = buf[n++];
+        v5src[list[i]] = buf[n++];
+      }
+    }
+    break;
+  }
+
+  case FORWARD_AD_PERATOM_G: {
+    FFT_SCALAR *v0src = &v0_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src = &v1_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src = &v2_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src = &v3_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src = &v4_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src = &v5_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    for (int i = 0; i < nlist; i++) {
+      v0src[list[i]] = buf[n++];
+      v1src[list[i]] = buf[n++];
+      v2src[list[i]] = buf[n++];
+      v3src[list[i]] = buf[n++];
+      v4src[list[i]] = buf[n++];
+      v5src[list[i]] = buf[n++];
+    }
+    break;
+  }
+
+  // Disperion interactions, arithmetic mixing
+
+  case FORWARD_IK_A: {
+    FFT_SCALAR *xdest0 = &vdx_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *ydest0 = &vdy_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *zdest0 = &vdz_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *xdest1 = &vdx_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *ydest1 = &vdy_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *zdest1 = &vdz_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *xdest2 = &vdx_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *ydest2 = &vdy_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *zdest2 = &vdz_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *xdest3 = &vdx_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *ydest3 = &vdy_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *zdest3 = &vdz_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *xdest4 = &vdx_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *ydest4 = &vdy_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *zdest4 = &vdz_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *xdest5 = &vdx_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *ydest5 = &vdy_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *zdest5 = &vdz_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *xdest6 = &vdx_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *ydest6 = &vdy_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *zdest6 = &vdz_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    for (int i = 0; i < nlist; i++) {
+      xdest0[list[i]] = buf[n++];
+      ydest0[list[i]] = buf[n++];
+      zdest0[list[i]] = buf[n++];
+
+      xdest1[list[i]] = buf[n++];
+      ydest1[list[i]] = buf[n++];
+      zdest1[list[i]] = buf[n++];
+
+      xdest2[list[i]] = buf[n++];
+      ydest2[list[i]] = buf[n++];
+      zdest2[list[i]] = buf[n++];
+
+      xdest3[list[i]] = buf[n++];
+      ydest3[list[i]] = buf[n++];
+      zdest3[list[i]] = buf[n++];
+
+      xdest4[list[i]] = buf[n++];
+      ydest4[list[i]] = buf[n++];
+      zdest4[list[i]] = buf[n++];
+
+      xdest5[list[i]] = buf[n++];
+      ydest5[list[i]] = buf[n++];
+      zdest5[list[i]] = buf[n++];
+
+      xdest6[list[i]] = buf[n++];
+      ydest6[list[i]] = buf[n++];
+      zdest6[list[i]] = buf[n++];
+    }
+    break;
+  }
+
+  case FORWARD_AD_A: {
+    FFT_SCALAR *dest0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *dest1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *dest2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *dest3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *dest4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *dest5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *dest6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    for (int i = 0; i < nlist; i++) {
+      dest0[list[i]] = buf[n++];
+      dest1[list[i]] = buf[n++];
+      dest2[list[i]] = buf[n++];
+      dest3[list[i]] = buf[n++];
+      dest4[list[i]] = buf[n++];
+      dest5[list[i]] = buf[n++];
+      dest6[list[i]] = buf[n++];
+    }
+    break;
+  }
+
+  case FORWARD_IK_PERATOM_A: {
+    FFT_SCALAR *esrc0 = &u_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *esrc1 = &u_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *esrc2 = &u_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *esrc3 = &u_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *esrc4 = &u_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *esrc5 = &u_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *esrc6 = &u_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    for (int i = 0; i < nlist; i++) {
+      if (eflag_atom) {
+        esrc0[list[i]] = buf[n++];
+        esrc1[list[i]] = buf[n++];
+        esrc2[list[i]] = buf[n++];
+        esrc3[list[i]] = buf[n++];
+        esrc4[list[i]] = buf[n++];
+        esrc5[list[i]] = buf[n++];
+        esrc6[list[i]] = buf[n++];
+      }
+      if (vflag_atom) {
+        v0src0[list[i]] = buf[n++];
+        v1src0[list[i]] = buf[n++];
+        v2src0[list[i]] = buf[n++];
+        v3src0[list[i]] = buf[n++];
+        v4src0[list[i]] = buf[n++];
+        v5src0[list[i]] = buf[n++];
+
+        v0src1[list[i]] = buf[n++];
+        v1src1[list[i]] = buf[n++];
+        v2src1[list[i]] = buf[n++];
+        v3src1[list[i]] = buf[n++];
+        v4src1[list[i]] = buf[n++];
+        v5src1[list[i]] = buf[n++];
+
+        v0src2[list[i]] = buf[n++];
+        v1src2[list[i]] = buf[n++];
+        v2src2[list[i]] = buf[n++];
+        v3src2[list[i]] = buf[n++];
+        v4src2[list[i]] = buf[n++];
+        v5src2[list[i]] = buf[n++];
+
+        v0src3[list[i]] = buf[n++];
+        v1src3[list[i]] = buf[n++];
+        v2src3[list[i]] = buf[n++];
+        v3src3[list[i]] = buf[n++];
+        v4src3[list[i]] = buf[n++];
+        v5src3[list[i]] = buf[n++];
+
+        v0src4[list[i]] = buf[n++];
+        v1src4[list[i]] = buf[n++];
+        v2src4[list[i]] = buf[n++];
+        v3src4[list[i]] = buf[n++];
+        v4src4[list[i]] = buf[n++];
+        v5src4[list[i]] = buf[n++];
+
+        v0src5[list[i]] = buf[n++];
+        v1src5[list[i]] = buf[n++];
+        v2src5[list[i]] = buf[n++];
+        v3src5[list[i]] = buf[n++];
+        v4src5[list[i]] = buf[n++];
+        v5src5[list[i]] = buf[n++];
+
+        v0src6[list[i]] = buf[n++];
+        v1src6[list[i]] = buf[n++];
+        v2src6[list[i]] = buf[n++];
+        v3src6[list[i]] = buf[n++];
+        v4src6[list[i]] = buf[n++];
+        v5src6[list[i]] = buf[n++];
+      }
+    }
+    break;
+  }
+
+  case FORWARD_AD_PERATOM_A: {
+    FFT_SCALAR *v0src0 = &v0_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src0 = &v1_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src0 = &v2_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src0 = &v3_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src0 = &v4_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src0 = &v5_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *v0src1 = &v0_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src1 = &v1_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src1 = &v2_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src1 = &v3_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src1 = &v4_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src1 = &v5_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *v0src2 = &v0_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src2 = &v1_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src2 = &v2_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src2 = &v3_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src2 = &v4_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src2 = &v5_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *v0src3 = &v0_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src3 = &v1_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src3 = &v2_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src3 = &v3_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src3 = &v4_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src3 = &v5_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *v0src4 = &v0_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src4 = &v1_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src4 = &v2_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src4 = &v3_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src4 = &v4_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src4 = &v5_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *v0src5 = &v0_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src5 = &v1_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src5 = &v2_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src5 = &v3_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src5 = &v4_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src5 = &v5_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    FFT_SCALAR *v0src6 = &v0_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v1src6 = &v1_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v2src6 = &v2_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v3src6 = &v3_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v4src6 = &v4_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *v5src6 = &v5_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+
+    for (int i = 0; i < nlist; i++) {
+      v0src0[list[i]] = buf[n++];
+      v1src0[list[i]] = buf[n++];
+      v2src0[list[i]] = buf[n++];
+      v3src0[list[i]] = buf[n++];
+      v4src0[list[i]] = buf[n++];
+      v5src0[list[i]] = buf[n++];
+
+      v0src1[list[i]] = buf[n++];
+      v1src1[list[i]] = buf[n++];
+      v2src1[list[i]] = buf[n++];
+      v3src1[list[i]] = buf[n++];
+      v4src1[list[i]] = buf[n++];
+      v5src1[list[i]] = buf[n++];
+
+      v0src2[list[i]] = buf[n++];
+      v1src2[list[i]] = buf[n++];
+      v2src2[list[i]] = buf[n++];
+      v3src2[list[i]] = buf[n++];
+      v4src2[list[i]] = buf[n++];
+      v5src2[list[i]] = buf[n++];
+
+      v0src3[list[i]] = buf[n++];
+      v1src3[list[i]] = buf[n++];
+      v2src3[list[i]] = buf[n++];
+      v3src3[list[i]] = buf[n++];
+      v4src3[list[i]] = buf[n++];
+      v5src3[list[i]] = buf[n++];
+
+      v0src4[list[i]] = buf[n++];
+      v1src4[list[i]] = buf[n++];
+      v2src4[list[i]] = buf[n++];
+      v3src4[list[i]] = buf[n++];
+      v4src4[list[i]] = buf[n++];
+      v5src4[list[i]] = buf[n++];
+
+      v0src5[list[i]] = buf[n++];
+      v1src5[list[i]] = buf[n++];
+      v2src5[list[i]] = buf[n++];
+      v3src5[list[i]] = buf[n++];
+      v4src5[list[i]] = buf[n++];
+      v5src5[list[i]] = buf[n++];
+
+      v0src6[list[i]] = buf[n++];
+      v1src6[list[i]] = buf[n++];
+      v2src6[list[i]] = buf[n++];
+      v3src6[list[i]] = buf[n++];
+      v4src6[list[i]] = buf[n++];
+      v5src6[list[i]] = buf[n++];
+    }
+    break;
+  }
+
+  // Disperion interactions, geometric mixing
+
+  case FORWARD_IK_NONE: {
+    for (int k = 0; k < nsplit_alloc; k++) {
+      FFT_SCALAR *xdest = &vdx_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *ydest = &vdy_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *zdest = &vdz_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      for (int i = 0; i < nlist; i++) {
+        xdest[list[i]] = buf[n++];
+        ydest[list[i]] = buf[n++];
+        zdest[list[i]] = buf[n++];
+      }
+    }
+    break;
+  }
+
+  case FORWARD_AD_NONE: {
+    for (int k = 0; k < nsplit_alloc; k++) {
+      FFT_SCALAR *dest = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      for (int i = 0; i < nlist; i++)
+        dest[list[i]] = buf[n++];
+    }
+    break;
+  }
+
+  case FORWARD_IK_PERATOM_NONE: {
+    for (int k = 0; k < nsplit_alloc; k++) {
+      FFT_SCALAR *esrc = &u_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      for (int i = 0; i < nlist; i++) {
+        if (eflag_atom) esrc[list[i]] = buf[n++];
+        if (vflag_atom) {
+          v0src[list[i]] = buf[n++];
+          v1src[list[i]] = buf[n++];
+          v2src[list[i]] = buf[n++];
+          v3src[list[i]] = buf[n++];
+          v4src[list[i]] = buf[n++];
+          v5src[list[i]] = buf[n++];
+        }
+      }
+    }
+    break;
+  }
+
+  case FORWARD_AD_PERATOM_NONE: {
+    for (int k = 0; k < nsplit_alloc; k++) {
+      FFT_SCALAR *v0src = &v0_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v1src = &v1_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v2src = &v2_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v3src = &v3_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v4src = &v4_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      FFT_SCALAR *v5src = &v5_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      for (int i = 0; i < nlist; i++) {
+        v0src[list[i]] = buf[n++];
+        v1src[list[i]] = buf[n++];
+        v2src[list[i]] = buf[n++];
+        v3src[list[i]] = buf[n++];
+        v4src[list[i]] = buf[n++];
+        v5src[list[i]] = buf[n++];
+      }
+    }
+    break;
+  }
+
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pack ghost values into buf to send to another proc
+------------------------------------------------------------------------- */
+
+void PPPMDisp::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
+{
+  int n = 0;
+
+  //Coulomb interactions
+
+  if (flag == REVERSE_RHO) {
+    FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++)
+      buf[i] = src[list[i]];
+
+  //Dispersion interactions, geometric mixing
+
+  } else if (flag == REVERSE_RHO_G) {
+    FFT_SCALAR *src = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    for (int i = 0; i < nlist; i++)
+      buf[i] = src[list[i]];
+
+  //Dispersion interactions, arithmetic mixing
+
+  } else if (flag == REVERSE_RHO_A) {
+    FFT_SCALAR *src0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *src1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *src2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *src3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *src4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *src5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *src6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    for (int i = 0; i < nlist; i++) {
+      buf[n++] = src0[list[i]];
+      buf[n++] = src1[list[i]];
+      buf[n++] = src2[list[i]];
+      buf[n++] = src3[list[i]];
+      buf[n++] = src4[list[i]];
+      buf[n++] = src5[list[i]];
+      buf[n++] = src6[list[i]];
+    }
+
+  //Dispersion interactions, no mixing
+
+  } else if (flag == REVERSE_RHO_NONE) {
+    for (int k = 0; k < nsplit_alloc; k++) {
+      FFT_SCALAR *src = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      for (int i = 0; i < nlist; i++) {
+        buf[n++] = src[list[i]];
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   unpack another proc's ghost values from buf and add to own values
+------------------------------------------------------------------------- */
+
+void PPPMDisp::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
+{
+  int n = 0;
+
+  //Coulomb interactions
+
+  if (flag == REVERSE_RHO) {
+    FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
+    for (int i = 0; i < nlist; i++)
+      dest[list[i]] += buf[i];
+
+  //Dispersion interactions, geometric mixing
+
+  } else if (flag == REVERSE_RHO_G) {
+    FFT_SCALAR *dest = &density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    for (int i = 0; i < nlist; i++)
+      dest[list[i]] += buf[i];
+
+  //Dispersion interactions, arithmetic mixing
+
+  } else if (flag == REVERSE_RHO_A) {
+    FFT_SCALAR *dest0 = &density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *dest1 = &density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *dest2 = &density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *dest3 = &density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *dest4 = &density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *dest5 = &density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    FFT_SCALAR *dest6 = &density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6];
+    for (int i = 0; i < nlist; i++) {
+      dest0[list[i]] += buf[n++];
+      dest1[list[i]] += buf[n++];
+      dest2[list[i]] += buf[n++];
+      dest3[list[i]] += buf[n++];
+      dest4[list[i]] += buf[n++];
+      dest5[list[i]] += buf[n++];
+      dest6[list[i]] += buf[n++];
+    }
+
+  //Dispersion interactions, no mixing
+
+  } else if (flag == REVERSE_RHO_NONE) {
+    for (int k = 0; k < nsplit_alloc; k++) {
+      FFT_SCALAR *dest = &density_brick_none[k][nzlo_out_6][nylo_out_6][nxlo_out_6];
+      for (int i = 0; i < nlist; i++)
+        dest[list[i]] += buf[n++];
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   map nprocs to NX by NY grid as PX by PY procs - return optimal px,py 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
+{
+  // loop thru all possible factorizations of nprocs
+  // surf = surface area of largest proc sub-domain
+  // innermost if test minimizes surface area and surface/volume ratio
+
+  int bestsurf = 2 * (nx + ny);
+  int bestboxx = 0;
+  int bestboxy = 0;
+
+  int boxx,boxy,surf,ipx,ipy;
+
+  ipx = 1;
+  while (ipx <= nprocs) {
+    if (nprocs % ipx == 0) {
+      ipy = nprocs/ipx;
+      boxx = nx/ipx;
+      if (nx % ipx) boxx++;
+      boxy = ny/ipy;
+      if (ny % ipy) boxy++;
+      surf = boxx + boxy;
+      if (surf < bestsurf || 
+	  (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
+	bestsurf = surf;
+	bestboxx = boxx;
+	bestboxy = boxy;
+	*px = ipx;
+	*py = ipy;
+      }
+    }
+    ipx++;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   charge assignment into rho1d
+   dx,dy,dz = distance of particle from "lower left" grid point 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
+			      const FFT_SCALAR &dz, int ord, 
+                             FFT_SCALAR **rho_c, FFT_SCALAR **r1d)
+{
+  int k,l;
+  FFT_SCALAR r1,r2,r3;
+
+  for (k = (1-ord)/2; k <= ord/2; k++) {
+    r1 = r2 = r3 = ZEROF;
+
+    for (l = ord-1; l >= 0; l--) {
+      r1 = rho_c[l][k] + r1*dx;
+      r2 = rho_c[l][k] + r2*dy;
+      r3 = rho_c[l][k] + r3*dz;
+    }
+    r1d[0][k] = r1;
+    r1d[1][k] = r2;
+    r1d[2][k] = r3;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   charge assignment into drho1d
+   dx,dy,dz = distance of particle from "lower left" grid point
+------------------------------------------------------------------------- */
+
+void PPPMDisp::compute_drho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
+                          const FFT_SCALAR &dz, int ord, 
+                              FFT_SCALAR **drho_c, FFT_SCALAR **dr1d)
+{
+  int k,l;
+  FFT_SCALAR r1,r2,r3;
+
+  for (k = (1-ord)/2; k <= ord/2; k++) {
+    r1 = r2 = r3 = ZEROF;
+
+    for (l = ord-2; l >= 0; l--) {
+      r1 = drho_c[l][k] + r1*dx;
+      r2 = drho_c[l][k] + r2*dy;
+      r3 = drho_c[l][k] + r3*dz;
+    }
+    dr1d[0][k] = r1;
+    dr1d[1][k] = r2;
+    dr1d[2][k] = r3;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   generate coeffients for the weight function of order n
+
+              (n-1)
+  Wn(x) =     Sum    wn(k,x) , Sum is over every other integer
+           k=-(n-1)
+  For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
+      k is odd integers if n is even and even integers if n is odd
+              ---
+             | n-1
+             | Sum a(l,j)*(x-k/2)**l   if abs(x-k/2) < 1/2
+  wn(k,x) = <  l=0
+             |
+             |  0                       otherwise
+              ---
+  a coeffients are packed into the array rho_coeff to eliminate zeros
+  rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k) 
+------------------------------------------------------------------------- */
+
+void PPPMDisp::compute_rho_coeff(FFT_SCALAR **coeff , FFT_SCALAR **dcoeff, 
+                                 int ord)
+{
+  int j,k,l,m;
+  FFT_SCALAR s;
+
+  FFT_SCALAR **a;
+  memory->create2d_offset(a,ord,-ord,ord,"pppm/disp:a");
+
+  for (k = -ord; k <= ord; k++) 
+    for (l = 0; l < ord; l++)
+      a[l][k] = 0.0;
+        
+  a[0][0] = 1.0;
+  for (j = 1; j < ord; j++) {
+    for (k = -j; k <= j; k += 2) {
+      s = 0.0;
+      for (l = 0; l < j; l++) {
+	a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
+#ifdef FFT_SINGLE
+	s += powf(0.5,(float) l+1) *
+	  (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
+#else
+	s += pow(0.5,(double) l+1) * 
+	  (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
+#endif
+      }
+      a[0][k] = s;
+    }
+  }
+
+  m = (1-ord)/2;
+  for (k = -(ord-1); k < ord; k += 2) {
+    for (l = 0; l < ord; l++)
+      coeff[l][m] = a[l][k];
+    for (l = 1; l < ord; l++)
+      dcoeff[l-1][m] = l*a[l][k];
+    m++;
+  }
+
+  memory->destroy2d_offset(a,-ord);
+}
+
+/* ----------------------------------------------------------------------
+   Slab-geometry correction term to dampen inter-slab interactions between
+   periodically repeating slabs.  Yields good approximation to 2D Ewald if
+   adequate empty space is left between repeating slabs (J. Chem. Phys.
+   111, 3155).  Slabs defined here to be parallel to the xy plane. Also
+   extended to non-neutral systems (J. Chem. Phys. 131, 094107).
+------------------------------------------------------------------------- */
+
+void PPPMDisp::slabcorr(int eflag)
+{
+  // compute local contribution to global dipole moment
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double zprd = domain->zprd;
+  int nlocal = atom->nlocal;
+
+  double dipole = 0.0;
+  for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
+
+  // sum local contributions to get global dipole moment
+
+  double dipole_all;
+  MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
+
+  // need to make non-neutral systems and/or
+  //  per-atom energy translationally invariant
+
+  double dipole_r2 = 0.0;
+  if (eflag_atom || fabs(qsum) > SMALL) {
+    for (int i = 0; i < nlocal; i++)
+      dipole_r2 += q[i]*x[i][2]*x[i][2];
+
+    // sum local contributions
+
+    double tmp;
+    MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+    dipole_r2 = tmp;
+  }
+
+  // compute corrections
+
+  const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
+    qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
+  const double qscale = force->qqrd2e * scale;
+
+  if (eflag_global) energy_1 += qscale * e_slabcorr;
+
+  // per-atom energy
+
+  if (eflag_atom) {
+    double efact = qscale * MY_2PI/volume;
+    for (int i = 0; i < nlocal; i++)
+      eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
+        qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
+  }
+
+  // add on force corrections
+
+  double ffact = qscale * (-4.0*MY_PI/volume);
+  double **f = atom->f;
+
+  for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
+}
+
+/* ----------------------------------------------------------------------
+   perform and time the 1d FFTs required for N timesteps
+------------------------------------------------------------------------- */
+
+int PPPMDisp::timing_1d(int n, double &time1d)
+{
+  double time1,time2;
+  int mixing = 1;
+  if (function[2]) mixing = 4;
+  if (function[3]) mixing = nsplit_alloc/2;
+
+  if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
+  if (function[1] + function[2] + function[3])
+    for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF;
+
+  MPI_Barrier(world);
+  time1 = MPI_Wtime();
+
+  if (function[0]) {
+    for (int i = 0; i < n; i++) {
+      fft1->timing1d(work1,nfft_both,1);
+      fft2->timing1d(work1,nfft_both,-1);
+      if (differentiation_flag != 1){
+        fft2->timing1d(work1,nfft_both,-1);
+        fft2->timing1d(work1,nfft_both,-1);
+      }
+    }
+  }
+
+  MPI_Barrier(world);
+  time2 = MPI_Wtime();
+  time1d = time2 - time1;
+
+  MPI_Barrier(world);
+  time1 = MPI_Wtime();
+
+  if (function[1] + function[2] + function[3]) {
+    for (int i = 0; i < n; i++) {
+      fft1_6->timing1d(work1_6,nfft_both_6,1);
+      fft2_6->timing1d(work1_6,nfft_both_6,-1);
+      if (differentiation_flag != 1){
+        fft2_6->timing1d(work1_6,nfft_both_6,-1);
+        fft2_6->timing1d(work1_6,nfft_both_6,-1);
+      }
+    }
+  }
+
+  MPI_Barrier(world);
+  time2 = MPI_Wtime();
+  time1d += (time2 - time1)*mixing;
+
+  if (differentiation_flag) return 2;
+  return 4;
+}
+
+/* ----------------------------------------------------------------------
+   perform and time the 3d FFTs required for N timesteps
+------------------------------------------------------------------------- */
+
+int PPPMDisp::timing_3d(int n, double &time3d)
+{
+  double time1,time2;
+  int mixing = 1;
+  if (function[2]) mixing = 4;
+  if (function[3]) mixing = nsplit_alloc/2;
+
+  if (function[0]) for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
+  if (function[1] + function[2] + function[3]) 
+    for (int i = 0; i < 2*nfft_both_6; i++) work1_6[i] = ZEROF;
+
+
+
+  MPI_Barrier(world);
+  time1 = MPI_Wtime();
+
+  if (function[0]) {
+    for (int i = 0; i < n; i++) {
+      fft1->compute(work1,work1,1);
+      fft2->compute(work1,work1,-1);
+      if (differentiation_flag != 1) {
+        fft2->compute(work1,work1,-1);
+        fft2->compute(work1,work1,-1);
+      }
+    }
+  }
+
+  MPI_Barrier(world);
+  time2 = MPI_Wtime();
+  time3d = time2 - time1;
+
+  MPI_Barrier(world);
+  time1 = MPI_Wtime();
+  
+  if (function[1] + function[2] + function[3]) {
+    for (int i = 0; i < n; i++) {
+      fft1_6->compute(work1_6,work1_6,1);
+      fft2_6->compute(work1_6,work1_6,-1);
+      if (differentiation_flag != 1) {
+        fft2_6->compute(work1_6,work1_6,-1);
+        fft2_6->compute(work1_6,work1_6,-1);
+      }
+    }
+  }
+
+  MPI_Barrier(world);
+  time2 = MPI_Wtime();
+  time3d += (time2 - time1) * mixing;
+
+  if (differentiation_flag) return 2;
+  return 4;
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local arrays 
+------------------------------------------------------------------------- */
+
+double PPPMDisp::memory_usage()
+{
+  double bytes = nmax*3 * sizeof(double);
+  int mixing = 1;
+  int diff = 3;     //depends on differentiation
+  int per = 7;      //depends on per atom calculations
+  if (differentiation_flag) {
+    diff = 1;
+    per = 6;
+  }
+  if (!evflag_atom) per = 0;
+  if (function[2]) mixing = 7;
+  if (function[3]) mixing = nsplit_alloc;
+
+  if (function[0]) {
+    int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * 
+      (nzhi_out-nzlo_out+1);
+    bytes += (1 + diff +  per) * nbrick * sizeof(FFT_SCALAR);     //brick memory
+    bytes += 6 * nfft_both * sizeof(double);      // vg
+    bytes += nfft_both * sizeof(double);          // greensfn
+    bytes += nfft_both * 3 * sizeof(FFT_SCALAR);    // density_FFT, work1, work2 
+    bytes += cg->memory_usage();
+  }
+
+  if (function[1] + function[2] + function[3]) {
+    int nbrick = (nxhi_out_6-nxlo_out_6+1) * (nyhi_out_6-nylo_out_6+1) * 
+      (nzhi_out_6-nzlo_out_6+1);
+    bytes += (1 + diff + per ) * nbrick * sizeof(FFT_SCALAR) * mixing;     // density_brick + vd_brick + per atom bricks
+    bytes += 6 * nfft_both_6 * sizeof(double);      // vg
+    bytes += nfft_both_6 * sizeof(double);          // greensfn
+    bytes += nfft_both_6 * (mixing + 2) * sizeof(FFT_SCALAR);    // density_FFT, work1, work2 
+    bytes += cg_6->memory_usage();
+  }
+  return bytes;
+}
diff --git a/src/KSPACE/pppm_old.cpp b/src/KSPACE/pppm_old.cpp
index a368b5d5b0..22c7471b18 100644
--- a/src/KSPACE/pppm_old.cpp
+++ b/src/KSPACE/pppm_old.cpp
@@ -1,2863 +1,2863 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
-     per-atom energy/virial & group/group energy/force added by Stan Moore (BYU)
-------------------------------------------------------------------------- */
-
-#include "lmptype.h"
-#include "mpi.h"
-#include "string.h"
-#include "stdio.h"
-#include "stdlib.h"
-#include "math.h"
-#include "pppm_old.h"
-#include "math_const.h"
-#include "atom.h"
-#include "comm.h"
-#include "neighbor.h"
-#include "force.h"
-#include "pair.h"
-#include "bond.h"
-#include "angle.h"
-#include "domain.h"
-#include "fft3d_wrap.h"
-#include "remap_wrap.h"
-#include "memory.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-using namespace MathConst;
-
-#define MAXORDER 7
-#define OFFSET 16384
-#define SMALL 0.00001
-#define LARGE 10000.0
-#define EPS_HOC 1.0e-7
-
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
-
-/* ---------------------------------------------------------------------- */
-
-PPPMOld::PPPMOld(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
-{
-  if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command");
-
-  triclinic_support = 0;
-  pppmflag = 1;
-  group_group_enable = 0;
-
-  accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
-
-  nfactors = 3;
-  factors = new int[nfactors];
-  factors[0] = 2;
-  factors[1] = 3;
-  factors[2] = 5;
-
-  MPI_Comm_rank(world,&me);
-  MPI_Comm_size(world,&nprocs);
-
-  density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
-  density_fft = NULL;
-  u_brick = NULL;
-  v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
-  greensfn = NULL;
-  work1 = work2 = NULL;
-  vg = NULL;
-  fkx = fky = fkz = NULL;
-  buf1 = buf2 = buf3 = buf4 = NULL;
-
-  density_A_brick = density_B_brick = NULL;
-  density_A_fft = density_B_fft = NULL;
-
-  gf_b = NULL;
-  rho1d = rho_coeff = NULL;
-
-  fft1 = fft2 = NULL;
-  remap = NULL;
-
-  nmax = 0;
-  part2grid = NULL;
-}
-
-/* ----------------------------------------------------------------------
-   free all memory
-------------------------------------------------------------------------- */
-
-PPPMOld::~PPPMOld()
-{
-  delete [] factors;
-  deallocate();
-  deallocate_peratom();
-  deallocate_groups();
-  memory->destroy(part2grid);
-}
-
-/* ----------------------------------------------------------------------
-   called once before run
-------------------------------------------------------------------------- */
-
-void PPPMOld::init()
-{
-  if (me == 0) {
-    if (screen) fprintf(screen,"PPPM initialization ...\n");
-    if (logfile) fprintf(logfile,"PPPM initialization ...\n");
-  }
-
-  // error check
-
-  triclinic_check();
-  if (domain->dimension == 2) error->all(FLERR,
-                                         "Cannot use PPPM with 2d simulation");
-
-  if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
-
-  if (slabflag == 0 && domain->nonperiodic > 0)
-    error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM");
-  if (slabflag) {
-    if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
-        domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
-      error->all(FLERR,"Incorrect boundaries with slab PPPM");
-  }
-
-  if (order < 2 || order > MAXORDER) {
-    char str[128];
-    sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER);
-    error->all(FLERR,str);
-  }
-
-  // free all arrays previously allocated
-
-  deallocate();
-  deallocate_peratom();
-  peratom_allocate_flag = 0;
-  deallocate_groups();
-  group_allocate_flag = 0;
-
-  // extract short-range Coulombic cutoff from pair style
-
-  scale = 1.0;
-
-  pair_check();
-
-  int itmp=0;
-  double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
-  if (p_cutoff == NULL)
-    error->all(FLERR,"KSpace style is incompatible with Pair style");
-  cutoff = *p_cutoff;
-
-  // if kspace is TIP4P, extract TIP4P params from pair style
-  // bond/angle are not yet init(), so insure equilibrium request is valid
-
-  qdist = 0.0;
-
-  if (tip4pflag) {
-    double *p_qdist = (double *) force->pair->extract("qdist",itmp);
-    int *p_typeO = (int *) force->pair->extract("typeO",itmp);
-    int *p_typeH = (int *) force->pair->extract("typeH",itmp);
-    int *p_typeA = (int *) force->pair->extract("typeA",itmp);
-    int *p_typeB = (int *) force->pair->extract("typeB",itmp);
-    if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
-      error->all(FLERR,"KSpace style is incompatible with Pair style");
-    qdist = *p_qdist;
-    typeO = *p_typeO;
-    typeH = *p_typeH;
-    int typeA = *p_typeA;
-    int typeB = *p_typeB;
-
-    if (force->angle == NULL || force->bond == NULL)
-      error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
-    if (typeA < 1 || typeA > atom->nangletypes ||
-        force->angle->setflag[typeA] == 0)
-      error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P");
-    if (typeB < 1 || typeB > atom->nbondtypes ||
-        force->bond->setflag[typeB] == 0)
-      error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P");
-    double theta = force->angle->equilibrium_angle(typeA);
-    double blen = force->bond->equilibrium_distance(typeB);
-    alpha = qdist / (cos(0.5*theta) * blen);
-  }
-
-  // compute qsum & qsqsum and warn if not charge-neutral
-
-  qsum = qsqsum = 0.0;
-  for (int i = 0; i < atom->nlocal; i++) {
-    qsum += atom->q[i];
-    qsqsum += atom->q[i]*atom->q[i];
-  }
-
-  double tmp;
-  MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  qsum = tmp;
-  MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-  qsqsum = tmp;
-
-  if (qsqsum == 0.0)
-    error->all(FLERR,"Cannot use kspace solver on system with no charge");
-  if (fabs(qsum) > SMALL && me == 0) {
-    char str[128];
-    sprintf(str,"System is not charge neutral, net charge = %g",qsum);
-    error->warning(FLERR,str);
-  }
-
-  // set accuracy (force units) from accuracy_relative or accuracy_absolute
-
-  if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
-  else accuracy = accuracy_relative * two_charge_force;
-
-  // setup FFT grid resolution and g_ewald
-  // normally one iteration thru while loop is all that is required
-  // if grid stencil extends beyond neighbor proc, reduce order and try again
-
-  int iteration = 0;
-
-  while (order > 1) {
-    if (iteration && me == 0)
-      error->warning(FLERR,"Reducing PPPM order b/c stencil extends "
-                     "beyond neighbor processor");
-    iteration++;
-
-    set_grid();
-
-    if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
-      error->all(FLERR,"PPPM grid is too large");
-
-    // global indices of PPPM grid range from 0 to N-1
-    // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
-    //   global PPPM grid that I own without ghost cells
-    // for slab PPPM, assign z grid as if it were not extended
-
-    nxlo_in = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_pppm);
-    nxhi_in = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
-
-    nylo_in = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_pppm);
-    nyhi_in = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
-
-    nzlo_in = static_cast<int>
-      (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor);
-    nzhi_in = static_cast<int>
-      (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1;
-
-    // nlower,nupper = stencil size for mapping particles to PPPM grid
-
-    nlower = -(order-1)/2;
-    nupper = order/2;
-
-    // shift values for particle <-> grid mapping
-    // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
-
-    if (order % 2) shift = OFFSET + 0.5;
-    else shift = OFFSET;
-    if (order % 2) shiftone = 0.0;
-    else shiftone = 0.5;
-
-    // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
-    //   global PPPM grid that my particles can contribute charge to
-    // effectively nlo_in,nhi_in + ghost cells
-    // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
-    //           position a particle in my box can be at
-    // dist[3] = particle position bound = subbox + skin/2.0 + qdist
-    //   qdist = offset due to TIP4P fictitious charge
-    //   convert to triclinic if necessary
-    // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
-    // for slab PPPM, assign z grid as if it were not extended
-
-    triclinic = domain->triclinic;
-    double *prd,*sublo,*subhi;
-
-    if (triclinic == 0) {
-      prd = domain->prd;
-      boxlo = domain->boxlo;
-      sublo = domain->sublo;
-      subhi = domain->subhi;
-    } else {
-      prd = domain->prd_lamda;
-      boxlo = domain->boxlo_lamda;
-      sublo = domain->sublo_lamda;
-      subhi = domain->subhi_lamda;
-    }
-
-    double xprd = prd[0];
-    double yprd = prd[1];
-    double zprd = prd[2];
-    double zprd_slab = zprd*slab_volfactor;
-
-    double dist[3];
-    double cuthalf = 0.5*neighbor->skin + qdist;
-    if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
-    else {
-      dist[0] = cuthalf/domain->prd[0];
-      dist[1] = cuthalf/domain->prd[1];
-      dist[2] = cuthalf/domain->prd[2];
-    }
-
-    int nlo,nhi;
-
-    nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) *
-                            nx_pppm/xprd + shift) - OFFSET;
-    nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) *
-                            nx_pppm/xprd + shift) - OFFSET;
-    nxlo_out = nlo + nlower;
-    nxhi_out = nhi + nupper;
-
-    nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) *
-                            ny_pppm/yprd + shift) - OFFSET;
-    nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) *
-                            ny_pppm/yprd + shift) - OFFSET;
-    nylo_out = nlo + nlower;
-    nyhi_out = nhi + nupper;
-
-    nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) *
-                            nz_pppm/zprd_slab + shift) - OFFSET;
-    nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) *
-                            nz_pppm/zprd_slab + shift) - OFFSET;
-    nzlo_out = nlo + nlower;
-    nzhi_out = nhi + nupper;
-
-    // for slab PPPM, change the grid boundary for processors at +z end
-    //   to include the empty volume between periodically repeating slabs
-    // for slab PPPM, want charge data communicated from -z proc to +z proc,
-    //   but not vice versa, also want field data communicated from +z proc to
-    //   -z proc, but not vice versa
-    // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells)
-
-    if (slabflag == 1 && (comm->myloc[2] == comm->procgrid[2]-1)) {
-      nzhi_in = nz_pppm - 1;
-      nzhi_out = nz_pppm - 1;
-    }
-
-    // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions
-    //   that overlay domain I own
-    // proc in that direction tells me via sendrecv()
-    // if no neighbor proc, value is from self since I have ghosts regardless
-
-    int nplanes;
-    MPI_Status status;
-
-    nplanes = nxlo_in - nxlo_out;
-    if (comm->procneigh[0][0] != me)
-      MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0,
-                   &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0,
-                   world,&status);
-    else nxhi_ghost = nplanes;
-
-    nplanes = nxhi_out - nxhi_in;
-    if (comm->procneigh[0][1] != me)
-      MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0,
-                   &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0],
-                   0,world,&status);
-    else nxlo_ghost = nplanes;
-
-    nplanes = nylo_in - nylo_out;
-    if (comm->procneigh[1][0] != me)
-      MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0,
-                   &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0,
-                   world,&status);
-    else nyhi_ghost = nplanes;
-
-    nplanes = nyhi_out - nyhi_in;
-    if (comm->procneigh[1][1] != me)
-      MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0,
-                   &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0,
-                   world,&status);
-    else nylo_ghost = nplanes;
-
-    nplanes = nzlo_in - nzlo_out;
-    if (comm->procneigh[2][0] != me)
-      MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0,
-                   &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0,
-                   world,&status);
-    else nzhi_ghost = nplanes;
-
-    nplanes = nzhi_out - nzhi_in;
-    if (comm->procneigh[2][1] != me)
-      MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0,
-                   &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0,
-                   world,&status);
-    else nzlo_ghost = nplanes;
-
-    // test that ghost overlap is not bigger than my sub-domain
-
-    int flag = 0;
-    if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1;
-    if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1;
-    if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1;
-    if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1;
-    if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1;
-    if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1;
-
-    int flag_all;
-    MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
-
-    if (flag_all == 0) break;
-    order--;
-  }
-
-  if (order == 0) error->all(FLERR,"PPPM order has been reduced to 0");
-
-  // decomposition of FFT mesh
-  // global indices range from 0 to N-1
-  // proc owns entire x-dimension, clump of columns in y,z dimensions
-  // npey_fft,npez_fft = # of procs in y,z dims
-  // if nprocs is small enough, proc can own 1 or more entire xy planes,
-  //   else proc owns 2d sub-blocks of yz plane
-  // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
-  // nlo_fft,nhi_fft = lower/upper limit of the section
-  //   of the global FFT mesh that I own
-
-  int npey_fft,npez_fft;
-  if (nz_pppm >= nprocs) {
-    npey_fft = 1;
-    npez_fft = nprocs;
-  } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
-
-  int me_y = me % npey_fft;
-  int me_z = me / npey_fft;
-
-  nxlo_fft = 0;
-  nxhi_fft = nx_pppm - 1;
-  nylo_fft = me_y*ny_pppm/npey_fft;
-  nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
-  nzlo_fft = me_z*nz_pppm/npez_fft;
-  nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
-
-  // PPPM grid for this proc, including ghosts
-
-  ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
-    (nzhi_out-nzlo_out+1);
-
-  // FFT arrays on this proc, without ghosts
-  // nfft = FFT points in FFT decomposition on this proc
-  // nfft_brick = FFT points in 3d brick-decomposition on this proc
-  // nfft_both = greater of 2 values
-
-  nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
-    (nzhi_fft-nzlo_fft+1);
-  int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
-    (nzhi_in-nzlo_in+1);
-  nfft_both = MAX(nfft,nfft_brick);
-
-  // buffer space for use in brick2fft and fillbrick
-  // idel = max # of ghost planes to send or recv in +/- dir of each dim
-  // nx,ny,nz = owned planes (including ghosts) in each dim
-  // nxx,nyy,nzz = max # of grid cells to send in each dim
-  // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick
-
-  int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz;
-
-  idelx = MAX(nxlo_ghost,nxhi_ghost);
-  idelx = MAX(idelx,nxhi_out-nxhi_in);
-  idelx = MAX(idelx,nxlo_in-nxlo_out);
-
-  idely = MAX(nylo_ghost,nyhi_ghost);
-  idely = MAX(idely,nyhi_out-nyhi_in);
-  idely = MAX(idely,nylo_in-nylo_out);
-
-  idelz = MAX(nzlo_ghost,nzhi_ghost);
-  idelz = MAX(idelz,nzhi_out-nzhi_in);
-  idelz = MAX(idelz,nzlo_in-nzlo_out);
-
-  nx = nxhi_out - nxlo_out + 1;
-  ny = nyhi_out - nylo_out + 1;
-  nz = nzhi_out - nzlo_out + 1;
-
-  nxx = idelx * ny * nz;
-  nyy = idely * nx * nz;
-  nzz = idelz * nx * ny;
-
-  nbuf = MAX(nxx,nyy);
-  nbuf = MAX(nbuf,nzz);
-
-  nbuf_peratom = 7*nbuf;
-  nbuf *= 3;
-
-  // print stats
-
-  int ngrid_max,nfft_both_max,nbuf_max;
-  MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
-  MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
-  MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world);
-
-  if (me == 0) {
-    if (screen) fprintf(screen,"  brick FFT buffer size/proc = %d %d %d\n",
-                        ngrid_max,nfft_both_max,nbuf_max);
-    if (logfile) fprintf(logfile,"  brick FFT buffer size/proc = %d %d %d\n",
-                         ngrid_max,nfft_both_max,nbuf_max);
-  }
-
-  // allocate K-space dependent memory
-  // don't invoke allocate_peratom() here, wait to see if needed
-
-  allocate();
-
-  // pre-compute Green's function denomiator expansion
-  // pre-compute 1d charge distribution coefficients
-
-  compute_gf_denom();
-  compute_rho_coeff();
-}
-
-/* ----------------------------------------------------------------------
-   adjust PPPM coeffs, called initially and whenever volume has changed
-------------------------------------------------------------------------- */
-
-void PPPMOld::setup()
-{
-  int i,j,k,l,m,n;
-  double *prd;
-
-  // volume-dependent factors
-  // adjust z dimension for 2d slab PPPM
-  // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
-
-  if (triclinic == 0) prd = domain->prd;
-  else prd = domain->prd_lamda;
-
-  double xprd = prd[0];
-  double yprd = prd[1];
-  double zprd = prd[2];
-  double zprd_slab = zprd*slab_volfactor;
-  volume = xprd * yprd * zprd_slab;
-
-  delxinv = nx_pppm/xprd;
-  delyinv = ny_pppm/yprd;
-  delzinv = nz_pppm/zprd_slab;
-
-  delvolinv = delxinv*delyinv*delzinv;
-
-  double unitkx = (2.0*MY_PI/xprd);
-  double unitky = (2.0*MY_PI/yprd);
-  double unitkz = (2.0*MY_PI/zprd_slab);
-
-  // fkx,fky,fkz for my FFT grid pts
-
-  double per;
-
-  for (i = nxlo_fft; i <= nxhi_fft; i++) {
-    per = i - nx_pppm*(2*i/nx_pppm);
-    fkx[i] = unitkx*per;
-  }
-
-  for (i = nylo_fft; i <= nyhi_fft; i++) {
-    per = i - ny_pppm*(2*i/ny_pppm);
-    fky[i] = unitky*per;
-  }
-
-  for (i = nzlo_fft; i <= nzhi_fft; i++) {
-    per = i - nz_pppm*(2*i/nz_pppm);
-    fkz[i] = unitkz*per;
-  }
-
-  // virial coefficients
-
-  double sqk,vterm;
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++) {
-    for (j = nylo_fft; j <= nyhi_fft; j++) {
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
-        if (sqk == 0.0) {
-          vg[n][0] = 0.0;
-          vg[n][1] = 0.0;
-          vg[n][2] = 0.0;
-          vg[n][3] = 0.0;
-          vg[n][4] = 0.0;
-          vg[n][5] = 0.0;
-        } else {
-          vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
-          vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
-          vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
-          vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
-          vg[n][3] = vterm*fkx[i]*fky[j];
-          vg[n][4] = vterm*fkx[i]*fkz[k];
-          vg[n][5] = vterm*fky[j]*fkz[k];
-        }
-        n++;
-      }
-    }
-  }
-
-  // modified (Hockney-Eastwood) Coulomb Green's function
-
-  int nx,ny,nz,kper,lper,mper;
-  double snx,sny,snz,snx2,sny2,snz2;
-  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
-  double sum1,dot1,dot2;
-  double numerator,denominator;
-
-  int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
-                              pow(-log(EPS_HOC),0.25));
-  int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
-                              pow(-log(EPS_HOC),0.25));
-  int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
-                              pow(-log(EPS_HOC),0.25));
-
-  double form = 1.0;
-
-  n = 0;
-  for (m = nzlo_fft; m <= nzhi_fft; m++) {
-    mper = m - nz_pppm*(2*m/nz_pppm);
-    snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm);
-    snz2 = snz*snz;
-
-    for (l = nylo_fft; l <= nyhi_fft; l++) {
-      lper = l - ny_pppm*(2*l/ny_pppm);
-      sny = sin(0.5*unitky*lper*yprd/ny_pppm);
-      sny2 = sny*sny;
-
-      for (k = nxlo_fft; k <= nxhi_fft; k++) {
-        kper = k - nx_pppm*(2*k/nx_pppm);
-        snx = sin(0.5*unitkx*kper*xprd/nx_pppm);
-        snx2 = snx*snx;
-
-        sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
-          pow(unitkz*mper,2.0);
-
-        if (sqk != 0.0) {
-          numerator = form*12.5663706/sqk;
-          denominator = gf_denom(snx2,sny2,snz2);
-          sum1 = 0.0;
-          const double dorder = static_cast<double>(order);
-          for (nx = -nbx; nx <= nbx; nx++) {
-            qx = unitkx*(kper+nx_pppm*nx);
-            sx = exp(-0.25*pow(qx/g_ewald,2.0));
-            wx = 1.0;
-            argx = 0.5*qx*xprd/nx_pppm;
-            if (argx != 0.0) wx = pow(sin(argx)/argx,dorder);
-            for (ny = -nby; ny <= nby; ny++) {
-              qy = unitky*(lper+ny_pppm*ny);
-              sy = exp(-0.25*pow(qy/g_ewald,2.0));
-              wy = 1.0;
-              argy = 0.5*qy*yprd/ny_pppm;
-              if (argy != 0.0) wy = pow(sin(argy)/argy,dorder);
-              for (nz = -nbz; nz <= nbz; nz++) {
-                qz = unitkz*(mper+nz_pppm*nz);
-                sz = exp(-0.25*pow(qz/g_ewald,2.0));
-                wz = 1.0;
-                argz = 0.5*qz*zprd_slab/nz_pppm;
-                if (argz != 0.0) wz = pow(sin(argz)/argz,dorder);
-
-                dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
-                dot2 = qx*qx+qy*qy+qz*qz;
-                sum1 += (dot1/dot2) * sx*sy*sz * pow(wx*wy*wz,2.0);
-              }
-            }
-          }
-          greensfn[n++] = numerator*sum1/denominator;
-        } else greensfn[n++] = 0.0;
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   compute the PPPM long-range force, energy, virial
-------------------------------------------------------------------------- */
-
-void PPPMOld::compute(int eflag, int vflag)
-{
-  int i,j;
-
-  // set energy/virial flags
-  // invoke allocate_peratom() if needed for first time
-
-  if (eflag || vflag) ev_setup(eflag,vflag);
-  else evflag = evflag_atom = eflag_global = vflag_global =
-         eflag_atom = vflag_atom = 0;
-
-  if (evflag_atom && !peratom_allocate_flag) {
-    allocate_peratom();
-    peratom_allocate_flag = 1;
-  }
-
-  // convert atoms from box to lamda coords
-
-  if (triclinic == 0) boxlo = domain->boxlo;
-  else {
-    boxlo = domain->boxlo_lamda;
-    domain->x2lamda(atom->nlocal);
-  }
-
-  // extend size of per-atom arrays if necessary
-
-  if (atom->nlocal > nmax) {
-    memory->destroy(part2grid);
-    nmax = atom->nmax;
-    memory->create(part2grid,nmax,3,"pppm:part2grid");
-  }
-
-  // find grid points for all my particles
-  // map my particle charge onto my local 3d density grid
-
-  particle_map();
-  make_rho();
-
-  // all procs communicate density values from their ghost cells
-  //   to fully sum contribution in their 3d bricks
-  // remap from 3d decomposition to FFT decomposition
-
-  brick2fft();
-
-  // compute potential gradient on my FFT grid and
-  //   portion of e_long on this proc's FFT grid
-  // return gradients (electric fields) in 3d brick decomposition
-  // also performs per-atom calculations via poisson_peratom()
-
-  poisson();
-
-  // all procs communicate E-field values
-  // to fill ghost cells surrounding their 3d bricks
-
-  fillbrick();
-
-  // extra per-atom energy/virial communication
-
-  if (evflag_atom) fillbrick_peratom();
-
-  // calculate the force on my particles
-
-  fieldforce();
-
-  // extra per-atom energy/virial communication
-
-  if (evflag_atom) fieldforce_peratom();
-
-  // sum global energy across procs and add in volume-dependent term
-
-  const double qscale = force->qqrd2e * scale;
-
-  if (eflag_global) {
-    double energy_all;
-    MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
-    energy = energy_all;
-
-    energy *= 0.5*volume;
-    energy -= g_ewald*qsqsum/MY_PIS +
-      MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
-    energy *= qscale;
-  }
-
-  // sum global virial across procs
-
-  if (vflag_global) {
-    double virial_all[6];
-    MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
-    for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
-  }
-
-  // per-atom energy/virial
-  // energy includes self-energy correction
-
-  if (evflag_atom) {
-    double *q = atom->q;
-    int nlocal = atom->nlocal;
-
-    if (eflag_atom) {
-      for (i = 0; i < nlocal; i++) {
-        eatom[i] *= 0.5;
-        eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
-          (g_ewald*g_ewald*volume);
-        eatom[i] *= qscale;
-      }
-    }
-
-    if (vflag_atom) {
-      for (i = 0; i < nlocal; i++)
-        for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale;
-    }
-  }
-
-  // 2d slab correction
-
-  if (slabflag == 1) slabcorr();
-
-  // convert atoms back from lamda to box coords
-
-  if (triclinic) domain->lamda2x(atom->nlocal);
-}
-
-/* ----------------------------------------------------------------------
-   allocate memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPMOld::allocate()
-{
-  memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:density_brick");
-  memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:vdx_brick");
-  memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:vdy_brick");
-  memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:vdz_brick");
-
-  memory->create(density_fft,nfft_both,"pppm:density_fft");
-  memory->create(greensfn,nfft_both,"pppm:greensfn");
-  memory->create(work1,2*nfft_both,"pppm:work1");
-  memory->create(work2,2*nfft_both,"pppm:work2");
-  memory->create(vg,nfft_both,6,"pppm:vg");
-
-  memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx");
-  memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky");
-  memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz");
-
-  memory->create(buf1,nbuf,"pppm:buf1");
-  memory->create(buf2,nbuf,"pppm:buf2");
-
-  // summation coeffs
-
-  memory->create(gf_b,order,"pppm:gf_b");
-  memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
-  memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
-
-  // create 2 FFTs and a Remap
-  // 1st FFT keeps data in FFT decompostion
-  // 2nd FFT returns data in 3d brick decomposition
-  // remap takes data from 3d brick to FFT decomposition
-
-  int tmp;
-
-  fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
-                   nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
-                   nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
-                   0,0,&tmp);
-
-  fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
-                   nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
-                   nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                   0,0,&tmp);
-
-  remap = new Remap(lmp,world,
-                    nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                    nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
-                    1,0,0,FFT_PRECISION);
-}
-
-/* ----------------------------------------------------------------------
-   allocate per-atom memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPMOld::allocate_peratom()
-{
-  memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:u_brick");
-
-  memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:v0_brick");
-  memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:v1_brick");
-  memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:v2_brick");
-  memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:v3_brick");
-  memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:v4_brick");
-  memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:v5_brick");
-
-  memory->create(buf3,nbuf_peratom,"pppm:buf3");
-  memory->create(buf4,nbuf_peratom,"pppm:buf4");
-}
-
-/* ----------------------------------------------------------------------
-   deallocate memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPMOld::deallocate()
-{
-  memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
-
-  memory->destroy(density_fft);
-  memory->destroy(greensfn);
-  memory->destroy(work1);
-  memory->destroy(work2);
-  memory->destroy(vg);
-
-  memory->destroy1d_offset(fkx,nxlo_fft);
-  memory->destroy1d_offset(fky,nylo_fft);
-  memory->destroy1d_offset(fkz,nzlo_fft);
-
-  memory->destroy(buf1);
-  memory->destroy(buf2);
-
-  memory->destroy(gf_b);
-  memory->destroy2d_offset(rho1d,-order/2);
-  memory->destroy2d_offset(rho_coeff,(1-order)/2);
-
-  delete fft1;
-  delete fft2;
-  delete remap;
-}
-
-/* ----------------------------------------------------------------------
-   deallocate per-atom memory that depends on # of K-vectors and order
-------------------------------------------------------------------------- */
-
-void PPPMOld::deallocate_peratom()
-{
-  memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
-
-  memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out);
-
-  memory->destroy(buf3);
-  memory->destroy(buf4);
-}
-
-/* ----------------------------------------------------------------------
-   set size of FFT grid (nx,ny,nz_pppm) and g_ewald
-------------------------------------------------------------------------- */
-
-void PPPMOld::set_grid()
-{
-  // see JCP 109, pg 7698 for derivation of coefficients
-  // higher order coefficients may be computed if needed
-
-  double **acons;
-  memory->create(acons,8,7,"pppm:acons");
-
-  acons[1][0] = 2.0 / 3.0;
-  acons[2][0] = 1.0 / 50.0;
-  acons[2][1] = 5.0 / 294.0;
-  acons[3][0] = 1.0 / 588.0;
-  acons[3][1] = 7.0 / 1440.0;
-  acons[3][2] = 21.0 / 3872.0;
-  acons[4][0] = 1.0 / 4320.0;
-  acons[4][1] = 3.0 / 1936.0;
-  acons[4][2] = 7601.0 / 2271360.0;
-  acons[4][3] = 143.0 / 28800.0;
-  acons[5][0] = 1.0 / 23232.0;
-  acons[5][1] = 7601.0 / 13628160.0;
-  acons[5][2] = 143.0 / 69120.0;
-  acons[5][3] = 517231.0 / 106536960.0;
-  acons[5][4] = 106640677.0 / 11737571328.0;
-  acons[6][0] = 691.0 / 68140800.0;
-  acons[6][1] = 13.0 / 57600.0;
-  acons[6][2] = 47021.0 / 35512320.0;
-  acons[6][3] = 9694607.0 / 2095994880.0;
-  acons[6][4] = 733191589.0 / 59609088000.0;
-  acons[6][5] = 326190917.0 / 11700633600.0;
-  acons[7][0] = 1.0 / 345600.0;
-  acons[7][1] = 3617.0 / 35512320.0;
-  acons[7][2] = 745739.0 / 838397952.0;
-  acons[7][3] = 56399353.0 / 12773376000.0;
-  acons[7][4] = 25091609.0 / 1560084480.0;
-  acons[7][5] = 1755948832039.0 / 36229939200000.0;
-  acons[7][6] = 4887769399.0 / 37838389248.0;
-
-  double q2 = qsqsum * force->qqrd2e;
-
-  // use xprd,yprd,zprd even if triclinic so grid size is the same
-  // adjust z dimension for 2d slab PPPM
-  // 3d PPPM just uses zprd since slab_volfactor = 1.0
-
-  double xprd = domain->xprd;
-  double yprd = domain->yprd;
-  double zprd = domain->zprd;
-  double zprd_slab = zprd*slab_volfactor;
-
-  // make initial g_ewald estimate
-  // based on desired accuracy and real space cutoff
-  // fluid-occupied volume used to estimate real-space error
-  // zprd used rather than zprd_slab
-
-  double h_x,h_y,h_z;
-  bigint natoms = atom->natoms;
-
-  if (!gewaldflag) {
-    if (accuracy <= 0.0)
-      error->all(FLERR,"KSpace accuracy must be > 0");
-    g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
-    if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff;
-    else g_ewald = sqrt(-log(g_ewald)) / cutoff;
-  }
-
-  // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
-  // nz_pppm uses extended zprd_slab instead of zprd
-  // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1
-  // reduce it until accuracy target is met
-
-  if (!gridflag) {
-    double err;
-    h_x = h_y = h_z = 1.0/g_ewald;
-
-    nx_pppm = static_cast<int> (xprd/h_x) + 1;
-    ny_pppm = static_cast<int> (yprd/h_y) + 1;
-    nz_pppm = static_cast<int> (zprd_slab/h_z) + 1;
-
-    err = rms(h_x,xprd,natoms,q2,acons);
-    while (err > accuracy) {
-      err = rms(h_x,xprd,natoms,q2,acons);
-      nx_pppm++;
-      h_x = xprd/nx_pppm;
-    }
-
-    err = rms(h_y,yprd,natoms,q2,acons);
-    while (err > accuracy) {
-      err = rms(h_y,yprd,natoms,q2,acons);
-      ny_pppm++;
-      h_y = yprd/ny_pppm;
-    }
-
-    err = rms(h_z,zprd_slab,natoms,q2,acons);
-    while (err > accuracy) {
-      err = rms(h_z,zprd_slab,natoms,q2,acons);
-      nz_pppm++;
-      h_z = zprd_slab/nz_pppm;
-    }
-  }
-
-  // boost grid size until it is factorable
-
-  while (!factorable(nx_pppm)) nx_pppm++;
-  while (!factorable(ny_pppm)) ny_pppm++;
-  while (!factorable(nz_pppm)) nz_pppm++;
-
-  // adjust g_ewald for new grid size
-
-  h_x = xprd/static_cast<double>(nx_pppm);
-  h_y = yprd/static_cast<double>(ny_pppm);
-  h_z = zprd_slab/static_cast<double>(nz_pppm);
-
-  if (!gewaldflag) {
-    double gew1,gew2,dgew,f,fmid,hmin,rtb;
-    int ncount;
-
-    gew1 = 0.0;
-    g_ewald = gew1;
-    f = diffpr(h_x,h_y,h_z,q2,acons);
-
-    hmin = MIN(h_x,MIN(h_y,h_z));
-    gew2 = 10.0/hmin;
-    g_ewald = gew2;
-    fmid = diffpr(h_x,h_y,h_z,q2,acons);
-
-    if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPM G");
-    rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2);
-    ncount = 0;
-    while (fabs(dgew) > SMALL && fmid != 0.0) {
-      dgew *= 0.5;
-      g_ewald = rtb + dgew;
-      fmid = diffpr(h_x,h_y,h_z,q2,acons);
-      if (fmid <= 0.0) rtb = g_ewald;
-      ncount++;
-      if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPM G");
-    }
-  }
-
-  // final RMS accuracy
-
-  double lprx = rms(h_x,xprd,natoms,q2,acons);
-  double lpry = rms(h_y,yprd,natoms,q2,acons);
-  double lprz = rms(h_z,zprd_slab,natoms,q2,acons);
-  double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
-  double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab);
-  double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff);
-  double tpr = estimate_table_accuracy(q2_over_sqrt,spr);
-  double accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr);
-
-  // free local memory
-
-  memory->destroy(acons);
-
-  // print info
-
-  if (me == 0) {
-#ifdef FFT_SINGLE
-    const char fft_prec[] = "single";
-#else
-    const char fft_prec[] = "double";
-#endif
-    if (screen) {
-      fprintf(screen,"  G vector (1/distance)= %g\n",g_ewald);
-      fprintf(screen,"  grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
-      fprintf(screen,"  stencil order = %d\n",order);
-      fprintf(screen,"  estimated absolute RMS force accuracy = %g\n",
-              accuracy);
-      fprintf(screen,"  estimated relative force accuracy = %g\n",
-              accuracy/two_charge_force);
-      fprintf(screen,"  using %s precision FFTs\n",fft_prec);
-    }
-    if (logfile) {
-      fprintf(logfile,"  G vector (1/distance) = %g\n",g_ewald);
-      fprintf(logfile,"  grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
-      fprintf(logfile,"  stencil order = %d\n",order);
-      fprintf(logfile,"  estimated absolute RMS force accuracy = %g\n",
-              accuracy);
-      fprintf(logfile,"  estimated relative force accuracy = %g\n",
-              accuracy/two_charge_force);
-      fprintf(logfile,"  using %s precision FFTs\n",fft_prec);
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   check if all factors of n are in list of factors
-   return 1 if yes, 0 if no
-------------------------------------------------------------------------- */
-
-int PPPMOld::factorable(int n)
-{
-  int i;
-
-  while (n > 1) {
-    for (i = 0; i < nfactors; i++) {
-      if (n % factors[i] == 0) {
-        n /= factors[i];
-        break;
-      }
-    }
-    if (i == nfactors) return 0;
-  }
-
-  return 1;
-}
-
-/* ----------------------------------------------------------------------
-   compute RMS accuracy for a dimension
-------------------------------------------------------------------------- */
-
-double PPPMOld::rms(double h, double prd, bigint natoms,
-                 double q2, double **acons)
-{
-  double sum = 0.0;
-  for (int m = 0; m < order; m++)
-    sum += acons[order][m] * pow(h*g_ewald,2.0*m);
-  double value = q2 * pow(h*g_ewald,(double)order) *
-    sqrt(g_ewald*prd*sqrt(2.0*MY_PI)*sum/natoms) / (prd*prd);
-  return value;
-}
-
-/* ----------------------------------------------------------------------
-   compute difference in real-space and KSpace RMS accuracy
-------------------------------------------------------------------------- */
-
-double PPPMOld::diffpr(double h_x, double h_y, double h_z, double q2,
-                    double **acons)
-{
-  double lprx,lpry,lprz,kspace_prec,real_prec;
-  double xprd = domain->xprd;
-  double yprd = domain->yprd;
-  double zprd = domain->zprd;
-  bigint natoms = atom->natoms;
-
-  lprx = rms(h_x,xprd,natoms,q2,acons);
-  lpry = rms(h_y,yprd,natoms,q2,acons);
-  lprz = rms(h_z,zprd*slab_volfactor,natoms,q2,acons);
-  kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
-  real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) /
-   sqrt(static_cast<double>(natoms)*cutoff*xprd*yprd*zprd);
-  double value = kspace_prec - real_prec;
-  return value;
-}
-
-/* ----------------------------------------------------------------------
-   pre-compute Green's function denominator expansion coeffs, Gamma(2n)
-------------------------------------------------------------------------- */
-
-void PPPMOld::compute_gf_denom()
-{
-  int k,l,m;
-
-  for (l = 1; l < order; l++) gf_b[l] = 0.0;
-  gf_b[0] = 1.0;
-
-  for (m = 1; m < order; m++) {
-    for (l = m; l > 0; l--)
-      gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1));
-    gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5));
-  }
-
-  bigint ifact = 1;
-  for (k = 1; k < 2*order; k++) ifact *= k;
-  double gaminv = 1.0/ifact;
-  for (l = 0; l < order; l++) gf_b[l] *= gaminv;
-}
-
-/* ----------------------------------------------------------------------
-   ghost-swap to accumulate full density in brick decomposition
-   remap density from 3d brick decomposition to FFT decomposition
-------------------------------------------------------------------------- */
-
-void PPPMOld::brick2fft()
-{
-  int i,n,ix,iy,iz;
-  MPI_Request request;
-  MPI_Status status;
-
-  // pack my ghosts for +x processor
-  // pass data to self or +x processor
-  // unpack and sum recv data into my real cells
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_out; iy <= nyhi_out; iy++)
-      for (ix = nxhi_in+1; ix <= nxhi_out; ix++)
-        buf1[n++] = density_brick[iz][iy][ix];
-
-  if (comm->procneigh[0][1] == me)
-    for (i = 0; i < n; i++) buf2[i] = buf1[i];
-  else {
-    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request);
-    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_out; iy <= nyhi_out; iy++)
-      for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++)
-        density_brick[iz][iy][ix] += buf2[n++];
-
-  // pack my ghosts for -x processor
-  // pass data to self or -x processor
-  // unpack and sum recv data into my real cells
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_out; iy <= nyhi_out; iy++)
-      for (ix = nxlo_out; ix < nxlo_in; ix++)
-        buf1[n++] = density_brick[iz][iy][ix];
-
-  if (comm->procneigh[0][0] == me)
-    for (i = 0; i < n; i++) buf2[i] = buf1[i];
-  else {
-    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request);
-    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_out; iy <= nyhi_out; iy++)
-      for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++)
-        density_brick[iz][iy][ix] += buf2[n++];
-
-  // pack my ghosts for +y processor
-  // pass data to self or +y processor
-  // unpack and sum recv data into my real cells
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nyhi_in+1; iy <= nyhi_out; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++)
-        buf1[n++] = density_brick[iz][iy][ix];
-
-  if (comm->procneigh[1][1] == me)
-    for (i = 0; i < n; i++) buf2[i] = buf1[i];
-  else {
-    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request);
-    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++)
-        density_brick[iz][iy][ix] += buf2[n++];
-
-  // pack my ghosts for -y processor
-  // pass data to self or -y processor
-  // unpack and sum recv data into my real cells
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_out; iy < nylo_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++)
-        buf1[n++] = density_brick[iz][iy][ix];
-
-  if (comm->procneigh[1][0] == me)
-    for (i = 0; i < n; i++) buf2[i] = buf1[i];
-  else {
-    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request);
-    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++)
-        density_brick[iz][iy][ix] += buf2[n++];
-
-  // pack my ghosts for +z processor
-  // pass data to self or +z processor
-  // unpack and sum recv data into my real cells
-
-  n = 0;
-  for (iz = nzhi_in+1; iz <= nzhi_out; iz++)
-    for (iy = nylo_in; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++)
-        buf1[n++] = density_brick[iz][iy][ix];
-
-  if (comm->procneigh[2][1] == me)
-    for (i = 0; i < n; i++) buf2[i] = buf1[i];
-  else {
-    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request);
-    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++)
-    for (iy = nylo_in; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++)
-        density_brick[iz][iy][ix] += buf2[n++];
-
-  // pack my ghosts for -z processor
-  // pass data to self or -z processor
-  // unpack and sum recv data into my real cells
-
-  n = 0;
-  for (iz = nzlo_out; iz < nzlo_in; iz++)
-    for (iy = nylo_in; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++)
-        buf1[n++] = density_brick[iz][iy][ix];
-
-  if (comm->procneigh[2][0] == me)
-    for (i = 0; i < n; i++) buf2[i] = buf1[i];
-  else {
-    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request);
-    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++)
-    for (iy = nylo_in; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++)
-        density_brick[iz][iy][ix] += buf2[n++];
-
-  // remap from 3d brick decomposition to FFT decomposition
-  // copy grabs inner portion of density from 3d brick
-  // remap could be done as pre-stage of FFT,
-  //   but this works optimally on only double values, not complex values
-
-  n = 0;
-  for (iz = nzlo_in; iz <= nzhi_in; iz++)
-    for (iy = nylo_in; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++)
-        density_fft[n++] = density_brick[iz][iy][ix];
-
-  remap->perform(density_fft,density_fft,work1);
-}
-
-/* ----------------------------------------------------------------------
-   ghost-swap to fill ghost cells of my brick with field values
-------------------------------------------------------------------------- */
-
-void PPPMOld::fillbrick()
-{
-  int i,n,ix,iy,iz;
-  MPI_Request request;
-  MPI_Status status;
-
-  // pack my real cells for +z processor
-  // pass data to self or +z processor
-  // unpack and sum recv data into my ghost cells
-
-  n = 0;
-  for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++)
-    for (iy = nylo_in; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
-        buf1[n++] = vdx_brick[iz][iy][ix];
-        buf1[n++] = vdy_brick[iz][iy][ix];
-        buf1[n++] = vdz_brick[iz][iy][ix];
-      }
-
-  if (comm->procneigh[2][1] == me)
-    for (i = 0; i < n; i++) buf2[i] = buf1[i];
-  else {
-    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request);
-    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzlo_out; iz < nzlo_in; iz++)
-    for (iy = nylo_in; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
-        vdx_brick[iz][iy][ix] = buf2[n++];
-        vdy_brick[iz][iy][ix] = buf2[n++];
-        vdz_brick[iz][iy][ix] = buf2[n++];
-      }
-
-  // pack my real cells for -z processor
-  // pass data to self or -z processor
-  // unpack and sum recv data into my ghost cells
-
-  n = 0;
-  for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++)
-    for (iy = nylo_in; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
-        buf1[n++] = vdx_brick[iz][iy][ix];
-        buf1[n++] = vdy_brick[iz][iy][ix];
-        buf1[n++] = vdz_brick[iz][iy][ix];
-      }
-
-  if (comm->procneigh[2][0] == me)
-    for (i = 0; i < n; i++) buf2[i] = buf1[i];
-  else {
-    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request);
-    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzhi_in+1; iz <= nzhi_out; iz++)
-    for (iy = nylo_in; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
-        vdx_brick[iz][iy][ix] = buf2[n++];
-        vdy_brick[iz][iy][ix] = buf2[n++];
-        vdz_brick[iz][iy][ix] = buf2[n++];
-      }
-
-  // pack my real cells for +y processor
-  // pass data to self or +y processor
-  // unpack and sum recv data into my ghost cells
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
-        buf1[n++] = vdx_brick[iz][iy][ix];
-        buf1[n++] = vdy_brick[iz][iy][ix];
-        buf1[n++] = vdz_brick[iz][iy][ix];
-      }
-
-  if (comm->procneigh[1][1] == me)
-    for (i = 0; i < n; i++) buf2[i] = buf1[i];
-  else {
-    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request);
-    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_out; iy < nylo_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
-        vdx_brick[iz][iy][ix] = buf2[n++];
-        vdy_brick[iz][iy][ix] = buf2[n++];
-        vdz_brick[iz][iy][ix] = buf2[n++];
-      }
-
-  // pack my real cells for -y processor
-  // pass data to self or -y processor
-  // unpack and sum recv data into my ghost cells
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
-        buf1[n++] = vdx_brick[iz][iy][ix];
-        buf1[n++] = vdy_brick[iz][iy][ix];
-        buf1[n++] = vdz_brick[iz][iy][ix];
-      }
-
-  if (comm->procneigh[1][0] == me)
-    for (i = 0; i < n; i++) buf2[i] = buf1[i];
-  else {
-    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request);
-    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nyhi_in+1; iy <= nyhi_out; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
-        vdx_brick[iz][iy][ix] = buf2[n++];
-        vdy_brick[iz][iy][ix] = buf2[n++];
-        vdz_brick[iz][iy][ix] = buf2[n++];
-      }
-
-  // pack my real cells for +x processor
-  // pass data to self or +x processor
-  // unpack and sum recv data into my ghost cells
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_out; iy <= nyhi_out; iy++)
-      for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) {
-        buf1[n++] = vdx_brick[iz][iy][ix];
-        buf1[n++] = vdy_brick[iz][iy][ix];
-        buf1[n++] = vdz_brick[iz][iy][ix];
-      }
-
-  if (comm->procneigh[0][1] == me)
-    for (i = 0; i < n; i++) buf2[i] = buf1[i];
-  else {
-    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request);
-    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_out; iy <= nyhi_out; iy++)
-      for (ix = nxlo_out; ix < nxlo_in; ix++) {
-        vdx_brick[iz][iy][ix] = buf2[n++];
-        vdy_brick[iz][iy][ix] = buf2[n++];
-        vdz_brick[iz][iy][ix] = buf2[n++];
-      }
-
-  // pack my real cells for -x processor
-  // pass data to self or -x processor
-  // unpack and sum recv data into my ghost cells
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_out; iy <= nyhi_out; iy++)
-      for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) {
-        buf1[n++] = vdx_brick[iz][iy][ix];
-        buf1[n++] = vdy_brick[iz][iy][ix];
-        buf1[n++] = vdz_brick[iz][iy][ix];
-      }
-
-  if (comm->procneigh[0][0] == me)
-    for (i = 0; i < n; i++) buf2[i] = buf1[i];
-  else {
-    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request);
-    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_out; iy <= nyhi_out; iy++)
-      for (ix = nxhi_in+1; ix <= nxhi_out; ix++) {
-        vdx_brick[iz][iy][ix] = buf2[n++];
-        vdy_brick[iz][iy][ix] = buf2[n++];
-        vdz_brick[iz][iy][ix] = buf2[n++];
-      }
-}
-
-/* ----------------------------------------------------------------------
-   ghost-swap to fill ghost cells of my brick with per-atom field values
-------------------------------------------------------------------------- */
-
-void PPPMOld::fillbrick_peratom()
-{
-  int i,n,ix,iy,iz;
-  MPI_Request request;
-  MPI_Status status;
-
-  // pack my real cells for +z processor
-  // pass data to self or +z processor
-  // unpack and sum recv data into my ghost cells
-
-  n = 0;
-  for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++)
-    for (iy = nylo_in; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
-        if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
-        if (vflag_atom) {
-          buf3[n++] = v0_brick[iz][iy][ix];
-          buf3[n++] = v1_brick[iz][iy][ix];
-          buf3[n++] = v2_brick[iz][iy][ix];
-          buf3[n++] = v3_brick[iz][iy][ix];
-          buf3[n++] = v4_brick[iz][iy][ix];
-          buf3[n++] = v5_brick[iz][iy][ix];
-        }
-      }
-
-  if (comm->procneigh[2][1] == me)
-    for (i = 0; i < n; i++) buf4[i] = buf3[i];
-  else {
-    MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
-              comm->procneigh[2][0],0,world,&request);
-    MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzlo_out; iz < nzlo_in; iz++)
-    for (iy = nylo_in; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
-        if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
-        if (vflag_atom) {
-          v0_brick[iz][iy][ix] = buf4[n++];
-          v1_brick[iz][iy][ix] = buf4[n++];
-          v2_brick[iz][iy][ix] = buf4[n++];
-          v3_brick[iz][iy][ix] = buf4[n++];
-          v4_brick[iz][iy][ix] = buf4[n++];
-          v5_brick[iz][iy][ix] = buf4[n++];
-        }
-      }
-
-  // pack my real cells for -z processor
-  // pass data to self or -z processor
-  // unpack and sum recv data into my ghost cells
-
-  n = 0;
-  for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++)
-    for (iy = nylo_in; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
-        if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
-        if (vflag_atom) {
-          buf3[n++] = v0_brick[iz][iy][ix];
-          buf3[n++] = v1_brick[iz][iy][ix];
-          buf3[n++] = v2_brick[iz][iy][ix];
-          buf3[n++] = v3_brick[iz][iy][ix];
-          buf3[n++] = v4_brick[iz][iy][ix];
-          buf3[n++] = v5_brick[iz][iy][ix];
-        }
-      }
-
-  if (comm->procneigh[2][0] == me)
-    for (i = 0; i < n; i++) buf4[i] = buf3[i];
-  else {
-    MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
-              comm->procneigh[2][1],0,world,&request);
-    MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzhi_in+1; iz <= nzhi_out; iz++)
-    for (iy = nylo_in; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
-        if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
-        if (vflag_atom) {
-          v0_brick[iz][iy][ix] = buf4[n++];
-          v1_brick[iz][iy][ix] = buf4[n++];
-          v2_brick[iz][iy][ix] = buf4[n++];
-          v3_brick[iz][iy][ix] = buf4[n++];
-          v4_brick[iz][iy][ix] = buf4[n++];
-          v5_brick[iz][iy][ix] = buf4[n++];
-        }
-      }
-
-  // pack my real cells for +y processor
-  // pass data to self or +y processor
-  // unpack and sum recv data into my ghost cells
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
-        if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
-        if (vflag_atom) {
-          buf3[n++] = v0_brick[iz][iy][ix];
-          buf3[n++] = v1_brick[iz][iy][ix];
-          buf3[n++] = v2_brick[iz][iy][ix];
-          buf3[n++] = v3_brick[iz][iy][ix];
-          buf3[n++] = v4_brick[iz][iy][ix];
-          buf3[n++] = v5_brick[iz][iy][ix];
-        }
-      }
-
-  if (comm->procneigh[1][1] == me)
-    for (i = 0; i < n; i++) buf4[i] = buf3[i];
-  else {
-    MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
-              comm->procneigh[1][0],0,world,&request);
-    MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_out; iy < nylo_in; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
-        if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
-        if (vflag_atom) {
-          v0_brick[iz][iy][ix] = buf4[n++];
-          v1_brick[iz][iy][ix] = buf4[n++];
-          v2_brick[iz][iy][ix] = buf4[n++];
-          v3_brick[iz][iy][ix] = buf4[n++];
-          v4_brick[iz][iy][ix] = buf4[n++];
-          v5_brick[iz][iy][ix] = buf4[n++];
-        }
-      }
-
-  // pack my real cells for -y processor
-  // pass data to self or -y processor
-  // unpack and sum recv data into my ghost cells
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
-        if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
-        if (vflag_atom) {
-          buf3[n++] = v0_brick[iz][iy][ix];
-          buf3[n++] = v1_brick[iz][iy][ix];
-          buf3[n++] = v2_brick[iz][iy][ix];
-          buf3[n++] = v3_brick[iz][iy][ix];
-          buf3[n++] = v4_brick[iz][iy][ix];
-          buf3[n++] = v5_brick[iz][iy][ix];
-        }
-      }
-
-  if (comm->procneigh[1][0] == me)
-    for (i = 0; i < n; i++) buf4[i] = buf3[i];
-  else {
-    MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
-              comm->procneigh[1][1],0,world,&request);
-    MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nyhi_in+1; iy <= nyhi_out; iy++)
-      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
-        if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
-        if (vflag_atom) {
-          v0_brick[iz][iy][ix] = buf4[n++];
-          v1_brick[iz][iy][ix] = buf4[n++];
-          v2_brick[iz][iy][ix] = buf4[n++];
-          v3_brick[iz][iy][ix] = buf4[n++];
-          v4_brick[iz][iy][ix] = buf4[n++];
-          v5_brick[iz][iy][ix] = buf4[n++];
-        }
-      }
-
-  // pack my real cells for +x processor
-  // pass data to self or +x processor
-  // unpack and sum recv data into my ghost cells
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_out; iy <= nyhi_out; iy++)
-      for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) {
-        if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
-        if (vflag_atom) {
-          buf3[n++] = v0_brick[iz][iy][ix];
-          buf3[n++] = v1_brick[iz][iy][ix];
-          buf3[n++] = v2_brick[iz][iy][ix];
-          buf3[n++] = v3_brick[iz][iy][ix];
-          buf3[n++] = v4_brick[iz][iy][ix];
-          buf3[n++] = v5_brick[iz][iy][ix];
-        }
-      }
-
-  if (comm->procneigh[0][1] == me)
-    for (i = 0; i < n; i++) buf4[i] = buf3[i];
-  else {
-    MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
-              comm->procneigh[0][0],0,world,&request);
-    MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_out; iy <= nyhi_out; iy++)
-      for (ix = nxlo_out; ix < nxlo_in; ix++) {
-        if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
-        if (vflag_atom) {
-          v0_brick[iz][iy][ix] = buf4[n++];
-          v1_brick[iz][iy][ix] = buf4[n++];
-          v2_brick[iz][iy][ix] = buf4[n++];
-          v3_brick[iz][iy][ix] = buf4[n++];
-          v4_brick[iz][iy][ix] = buf4[n++];
-          v5_brick[iz][iy][ix] = buf4[n++];
-        }
-      }
-
-  // pack my real cells for -x processor
-  // pass data to self or -x processor
-  // unpack and sum recv data into my ghost cells
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_out; iy <= nyhi_out; iy++)
-      for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) {
-        if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
-        if (vflag_atom) {
-          buf3[n++] = v0_brick[iz][iy][ix];
-          buf3[n++] = v1_brick[iz][iy][ix];
-          buf3[n++] = v2_brick[iz][iy][ix];
-          buf3[n++] = v3_brick[iz][iy][ix];
-          buf3[n++] = v4_brick[iz][iy][ix];
-          buf3[n++] = v5_brick[iz][iy][ix];
-        }
-      }
-
-  if (comm->procneigh[0][0] == me)
-    for (i = 0; i < n; i++) buf4[i] = buf3[i];
-  else {
-    MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
-              comm->procneigh[0][1],0,world,&request);
-    MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
-    MPI_Wait(&request,&status);
-  }
-
-  n = 0;
-  for (iz = nzlo_out; iz <= nzhi_out; iz++)
-    for (iy = nylo_out; iy <= nyhi_out; iy++)
-      for (ix = nxhi_in+1; ix <= nxhi_out; ix++) {
-        if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
-        if (vflag_atom) {
-          v0_brick[iz][iy][ix] = buf4[n++];
-          v1_brick[iz][iy][ix] = buf4[n++];
-          v2_brick[iz][iy][ix] = buf4[n++];
-          v3_brick[iz][iy][ix] = buf4[n++];
-          v4_brick[iz][iy][ix] = buf4[n++];
-          v5_brick[iz][iy][ix] = buf4[n++];
-        }
-      }
-}
-
-/* ----------------------------------------------------------------------
-   find center grid pt for each of my particles
-   check that full stencil for the particle will fit in my 3d brick
-   store central grid pt indices in part2grid array
-------------------------------------------------------------------------- */
-
-void PPPMOld::particle_map()
-{
-  int nx,ny,nz;
-
-  double **x = atom->x;
-  int nlocal = atom->nlocal;
-
-  int flag = 0;
-  for (int i = 0; i < nlocal; i++) {
-
-    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-    // current particle coord can be outside global and local box
-    // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
-
-    nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
-    ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
-    nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
-
-    part2grid[i][0] = nx;
-    part2grid[i][1] = ny;
-    part2grid[i][2] = nz;
-
-    // check that entire stencil around nx,ny,nz will fit in my 3d brick
-
-    if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
-        ny+nlower < nylo_out || ny+nupper > nyhi_out ||
-        nz+nlower < nzlo_out || nz+nupper > nzhi_out)
-      flag = 1;
-  }
-
-  if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
-}
-
-/* ----------------------------------------------------------------------
-   create discretized "density" on section of global grid due to my particles
-   density(x,y,z) = charge "density" at grid points of my 3d brick
-   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
-   in global grid
-------------------------------------------------------------------------- */
-
-void PPPMOld::make_rho()
-{
-  int l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-
-  // clear 3d density array
-
-  memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
-         ngrid*sizeof(FFT_SCALAR));
-
-  // loop over my charges, add their contribution to nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-
-  double *q = atom->q;
-  double **x = atom->x;
-  int nlocal = atom->nlocal;
-
-  for (int i = 0; i < nlocal; i++) {
-
-    nx = part2grid[i][0];
-    ny = part2grid[i][1];
-    nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-    compute_rho1d(dx,dy,dz);
-
-    z0 = delvolinv * q[i];
-    for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
-      y0 = z0*rho1d[2][n];
-      for (m = nlower; m <= nupper; m++) {
-        my = m+ny;
-        x0 = y0*rho1d[1][m];
-        for (l = nlower; l <= nupper; l++) {
-          mx = l+nx;
-          density_brick[mz][my][mx] += x0*rho1d[0][l];
-        }
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   FFT-based Poisson solver
-------------------------------------------------------------------------- */
-
-void PPPMOld::poisson()
-{
-  int i,j,k,n;
-  double eng;
-
-  // transform charge density (r -> k)
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work1[n++] = density_fft[i];
-    work1[n++] = ZEROF;
-  }
-
-  fft1->compute(work1,work1,1);
-
-  // global energy and virial contribution
-
-  double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
-  double s2 = scaleinv*scaleinv;
-
-  if (eflag_global || vflag_global) {
-    if (vflag_global) {
-      n = 0;
-      for (i = 0; i < nfft; i++) {
-        eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
-        for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
-        if (eflag_global) energy += eng;
-        n += 2;
-      }
-    } else {
-      n = 0;
-      for (i = 0; i < nfft; i++) {
-        energy +=
-          s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
-        n += 2;
-      }
-    }
-  }
-
-  // scale by 1/total-grid-pts to get rho(k)
-  // multiply by Green's function to get V(k)
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work1[n++] *= scaleinv * greensfn[i];
-    work1[n++] *= scaleinv * greensfn[i];
-  }
-
-  // extra FFTs for per-atom energy/virial
-
-  if (evflag_atom) poisson_peratom();
-
-  // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
-  // FFT leaves data in 3d brick decomposition
-  // copy it into inner portion of vdx,vdy,vdz arrays
-
-  // x direction gradient
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++)
-    for (j = nylo_fft; j <= nyhi_fft; j++)
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        work2[n] = fkx[i]*work1[n+1];
-        work2[n+1] = -fkx[i]*work1[n];
-        n += 2;
-      }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        vdx_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-
-  // y direction gradient
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++)
-    for (j = nylo_fft; j <= nyhi_fft; j++)
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        work2[n] = fky[j]*work1[n+1];
-        work2[n+1] = -fky[j]*work1[n];
-        n += 2;
-      }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        vdy_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-
-  // z direction gradient
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++)
-    for (j = nylo_fft; j <= nyhi_fft; j++)
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        work2[n] = fkz[k]*work1[n+1];
-        work2[n+1] = -fkz[k]*work1[n];
-        n += 2;
-      }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        vdz_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-}
-
-/* ----------------------------------------------------------------------
-   FFT-based Poisson solver for per-atom energy/virial
-------------------------------------------------------------------------- */
-
-void PPPMOld::poisson_peratom()
-{
-  int i,j,k,n;
-
-  // energy
-
-  if (eflag_atom) {
-    n = 0;
-    for (i = 0; i < nfft; i++) {
-      work2[n] = work1[n];
-      work2[n+1] = work1[n+1];
-      n += 2;
-    }
-
-    fft2->compute(work2,work2,-1);
-
-    n = 0;
-    for (k = nzlo_in; k <= nzhi_in; k++)
-      for (j = nylo_in; j <= nyhi_in; j++)
-        for (i = nxlo_in; i <= nxhi_in; i++) {
-          u_brick[k][j][i] = work2[n];
-          n += 2;
-        }
-  }
-
-  // 6 components of virial in v0 thru v5
-
-  if (!vflag_atom) return;
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work2[n] = work1[n]*vg[i][0];
-    work2[n+1] = work1[n+1]*vg[i][0];
-    n += 2;
-  }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        v0_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work2[n] = work1[n]*vg[i][1];
-    work2[n+1] = work1[n+1]*vg[i][1];
-    n += 2;
-  }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        v1_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work2[n] = work1[n]*vg[i][2];
-    work2[n+1] = work1[n+1]*vg[i][2];
-    n += 2;
-  }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        v2_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work2[n] = work1[n]*vg[i][3];
-    work2[n+1] = work1[n+1]*vg[i][3];
-    n += 2;
-  }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        v3_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work2[n] = work1[n]*vg[i][4];
-    work2[n+1] = work1[n+1]*vg[i][4];
-    n += 2;
-  }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        v4_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work2[n] = work1[n]*vg[i][5];
-    work2[n+1] = work1[n+1]*vg[i][5];
-    n += 2;
-  }
-
-  fft2->compute(work2,work2,-1);
-
-  n = 0;
-  for (k = nzlo_in; k <= nzhi_in; k++)
-    for (j = nylo_in; j <= nyhi_in; j++)
-      for (i = nxlo_in; i <= nxhi_in; i++) {
-        v5_brick[k][j][i] = work2[n];
-        n += 2;
-      }
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get electric field & force on my particles
-------------------------------------------------------------------------- */
-
-void PPPMOld::fieldforce()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-  FFT_SCALAR ekx,eky,ekz;
-
-  // loop over my charges, interpolate electric field from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-  // ek = 3 components of E-field on particle
-
-  double *q = atom->q;
-  double **x = atom->x;
-  double **f = atom->f;
-
-  int nlocal = atom->nlocal;
-
-  for (i = 0; i < nlocal; i++) {
-    nx = part2grid[i][0];
-    ny = part2grid[i][1];
-    nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-    compute_rho1d(dx,dy,dz);
-
-    ekx = eky = ekz = ZEROF;
-    for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
-      z0 = rho1d[2][n];
-      for (m = nlower; m <= nupper; m++) {
-        my = m+ny;
-        y0 = z0*rho1d[1][m];
-        for (l = nlower; l <= nupper; l++) {
-          mx = l+nx;
-          x0 = y0*rho1d[0][l];
-          ekx -= x0*vdx_brick[mz][my][mx];
-          eky -= x0*vdy_brick[mz][my][mx];
-          ekz -= x0*vdz_brick[mz][my][mx];
-        }
-      }
-    }
-
-    // convert E-field to force
-
-    const double qfactor = force->qqrd2e * scale * q[i];
-    f[i][0] += qfactor*ekx;
-    f[i][1] += qfactor*eky;
-    if (slabflag != 2) f[i][2] += qfactor*ekz;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   interpolate from grid to get per-atom energy/virial
-------------------------------------------------------------------------- */
-
-void PPPMOld::fieldforce_peratom()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-  FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
-
-  // loop over my charges, interpolate from nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-
-  double *q = atom->q;
-  double **x = atom->x;
-  double **f = atom->f;
-
-  int nlocal = atom->nlocal;
-
-  for (i = 0; i < nlocal; i++) {
-    nx = part2grid[i][0];
-    ny = part2grid[i][1];
-    nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-    compute_rho1d(dx,dy,dz);
-
-    u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
-    for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
-      z0 = rho1d[2][n];
-      for (m = nlower; m <= nupper; m++) {
-        my = m+ny;
-        y0 = z0*rho1d[1][m];
-        for (l = nlower; l <= nupper; l++) {
-          mx = l+nx;
-          x0 = y0*rho1d[0][l];
-          if (eflag_atom) u += x0*u_brick[mz][my][mx];
-          if (vflag_atom) {
-            v0 += x0*v0_brick[mz][my][mx];
-            v1 += x0*v1_brick[mz][my][mx];
-            v2 += x0*v2_brick[mz][my][mx];
-            v3 += x0*v3_brick[mz][my][mx];
-            v4 += x0*v4_brick[mz][my][mx];
-            v5 += x0*v5_brick[mz][my][mx];
-          }
-        }
-      }
-    }
-
-    if (eflag_atom) eatom[i] += q[i]*u;
-    if (vflag_atom) {
-      vatom[i][0] += v0;
-      vatom[i][1] += v1;
-      vatom[i][2] += v2;
-      vatom[i][3] += v3;
-      vatom[i][4] += v4;
-      vatom[i][5] += v5;
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
-------------------------------------------------------------------------- */
-
-void PPPMOld::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
-{
-  // loop thru all possible factorizations of nprocs
-  // surf = surface area of largest proc sub-domain
-  // innermost if test minimizes surface area and surface/volume ratio
-
-  int bestsurf = 2 * (nx + ny);
-  int bestboxx = 0;
-  int bestboxy = 0;
-
-  int boxx,boxy,surf,ipx,ipy;
-
-  ipx = 1;
-  while (ipx <= nprocs) {
-    if (nprocs % ipx == 0) {
-      ipy = nprocs/ipx;
-      boxx = nx/ipx;
-      if (nx % ipx) boxx++;
-      boxy = ny/ipy;
-      if (ny % ipy) boxy++;
-      surf = boxx + boxy;
-      if (surf < bestsurf ||
-          (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
-        bestsurf = surf;
-        bestboxx = boxx;
-        bestboxy = boxy;
-        *px = ipx;
-        *py = ipy;
-      }
-    }
-    ipx++;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   charge assignment into rho1d
-   dx,dy,dz = distance of particle from "lower left" grid point
-------------------------------------------------------------------------- */
-
-void PPPMOld::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
-                         const FFT_SCALAR &dz)
-{
-  int k,l;
-  FFT_SCALAR r1,r2,r3;
-
-  for (k = (1-order)/2; k <= order/2; k++) {
-    r1 = r2 = r3 = ZEROF;
-
-    for (l = order-1; l >= 0; l--) {
-      r1 = rho_coeff[l][k] + r1*dx;
-      r2 = rho_coeff[l][k] + r2*dy;
-      r3 = rho_coeff[l][k] + r3*dz;
-    }
-    rho1d[0][k] = r1;
-    rho1d[1][k] = r2;
-    rho1d[2][k] = r3;
-  }
-}
-
-/* ----------------------------------------------------------------------
-   generate coeffients for the weight function of order n
-
-              (n-1)
-  Wn(x) =     Sum    wn(k,x) , Sum is over every other integer
-           k=-(n-1)
-  For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
-      k is odd integers if n is even and even integers if n is odd
-              ---
-             | n-1
-             | Sum a(l,j)*(x-k/2)**l   if abs(x-k/2) < 1/2
-  wn(k,x) = <  l=0
-             |
-             |  0                       otherwise
-              ---
-  a coeffients are packed into the array rho_coeff to eliminate zeros
-  rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k)
-------------------------------------------------------------------------- */
-
-void PPPMOld::compute_rho_coeff()
-{
-  int j,k,l,m;
-  FFT_SCALAR s;
-
-  FFT_SCALAR **a;
-  memory->create2d_offset(a,order,-order,order,"pppm:a");
-
-  for (k = -order; k <= order; k++)
-    for (l = 0; l < order; l++)
-      a[l][k] = 0.0;
-
-  a[0][0] = 1.0;
-  for (j = 1; j < order; j++) {
-    for (k = -j; k <= j; k += 2) {
-      s = 0.0;
-      for (l = 0; l < j; l++) {
-        a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
-#ifdef FFT_SINGLE
-        s += powf(0.5,(float) l+1) *
-          (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
-#else
-        s += pow(0.5,(double) l+1) *
-          (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
-#endif
-      }
-      a[0][k] = s;
-    }
-  }
-
-  m = (1-order)/2;
-  for (k = -(order-1); k < order; k += 2) {
-    for (l = 0; l < order; l++)
-      rho_coeff[l][m] = a[l][k];
-    m++;
-  }
-
-  memory->destroy2d_offset(a,-order);
-}
-
-/* ----------------------------------------------------------------------
-   Slab-geometry correction term to dampen inter-slab interactions between
-   periodically repeating slabs.  Yields good approximation to 2D Ewald if
-   adequate empty space is left between repeating slabs (J. Chem. Phys.
-   111, 3155).  Slabs defined here to be parallel to the xy plane. Also
-   extended to non-neutral systems (J. Chem. Phys. 131, 094107).
-------------------------------------------------------------------------- */
-
-void PPPMOld::slabcorr()
-{
-  // compute local contribution to global dipole moment
-
-  double *q = atom->q;
-  double **x = atom->x;
-  double zprd = domain->zprd;
-  int nlocal = atom->nlocal;
-
-  double dipole = 0.0;
-  for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
-
-  // sum local contributions to get global dipole moment
-
-  double dipole_all;
-  MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
-
-  // need to make non-neutral systems and/or
-  //  per-atom energy translationally invariant
-
-  double dipole_r2 = 0.0;
-  if (eflag_atom || fabs(qsum) > SMALL) {
-    for (int i = 0; i < nlocal; i++)
-      dipole_r2 += q[i]*x[i][2]*x[i][2];
-
-    // sum local contributions
-
-    double tmp;
-    MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
-    dipole_r2 = tmp;
-  }
-
-  // compute corrections
-
-  const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
-    qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
-  const double qscale = force->qqrd2e * scale;
-
-  if (eflag_global) energy += qscale * e_slabcorr;
-
-  // per-atom energy
-
-  if (eflag_atom) {
-    double efact = qscale * MY_2PI/volume;
-    for (int i = 0; i < nlocal; i++)
-      eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
-        qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
-  }
-
-  // add on force corrections
-
-  double ffact = qscale * (-4.0*MY_PI/volume);
-  double **f = atom->f;
-
-  for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
-}
-
-
-/* ----------------------------------------------------------------------
-   perform and time the 1d FFTs required for N timesteps
-------------------------------------------------------------------------- */
-
-int PPPMOld::timing_1d(int n, double &time1d)
-{
-  double time1,time2;
-
-  for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
-
-  MPI_Barrier(world);
-  time1 = MPI_Wtime();
-
-  for (int i = 0; i < n; i++) {
-    fft1->timing1d(work1,nfft_both,1);
-    fft2->timing1d(work1,nfft_both,-1);
-    fft2->timing1d(work1,nfft_both,-1);
-    fft2->timing1d(work1,nfft_both,-1);
-  }
-
-  MPI_Barrier(world);
-  time2 = MPI_Wtime();
-  time1d = time2 - time1;
-
-  return 4;
-}
-
-/* ----------------------------------------------------------------------
-   perform and time the 3d FFTs required for N timesteps
-------------------------------------------------------------------------- */
-
-int PPPMOld::timing_3d(int n, double &time3d)
-{
-  double time1,time2;
-
-  for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
-
-  MPI_Barrier(world);
-  time1 = MPI_Wtime();
-
-  for (int i = 0; i < n; i++) {
-    fft1->compute(work1,work1,1);
-    fft2->compute(work1,work1,-1);
-    fft2->compute(work1,work1,-1);
-    fft2->compute(work1,work1,-1);
-  }
-
-  MPI_Barrier(world);
-  time2 = MPI_Wtime();
-  time3d = time2 - time1;
-
-  return 4;
-}
-
-/* ----------------------------------------------------------------------
-   memory usage of local arrays
-------------------------------------------------------------------------- */
-
-double PPPMOld::memory_usage()
-{
-  double bytes = nmax*3 * sizeof(double);
-  int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
-    (nzhi_out-nzlo_out+1);
-  bytes += 4 * nbrick * sizeof(FFT_SCALAR);
-  bytes += 6 * nfft_both * sizeof(double);
-  bytes += nfft_both * sizeof(double);
-  bytes += nfft_both*5 * sizeof(FFT_SCALAR);
-  bytes += 2 * nbuf * sizeof(FFT_SCALAR);
-
-  if (peratom_allocate_flag) {
-    bytes += 7 * nbrick * sizeof(FFT_SCALAR);
-    bytes += 2 * nbuf_peratom * sizeof(FFT_SCALAR);
-  }
-
-  if (group_allocate_flag) {
-    bytes += 2 * nbrick * sizeof(FFT_SCALAR);
-    bytes += 2 * nfft_both * sizeof(FFT_SCALAR);;
-  }
-
-  return bytes;
-}
-
-/* ----------------------------------------------------------------------
-   group-group interactions
- ------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   compute the PPPM total long-range force and energy for groups A and B
- ------------------------------------------------------------------------- */
-
-void PPPMOld::compute_group_group(int groupbit_A, int groupbit_B, int BA_flag)
-{
-  if (slabflag)
-    error->all(FLERR,"Cannot (yet) use K-space slab "
-               "correction with compute group/group");
-
-  int i,j;
-
-  if (!group_allocate_flag) {
-    allocate_groups();
-    group_allocate_flag = 1;
-  }
-
-  e2group = 0; //energy
-  f2group[0] = 0; //force in x-direction
-  f2group[1] = 0; //force in y-direction
-  f2group[2] = 0; //force in z-direction
-
-  double *q = atom->q;
-  int nlocal = atom->nlocal;
-  int *mask = atom->mask;
-
-
-  // map my particle charge onto my local 3d density grid
-
-  make_rho_groups(groupbit_A,groupbit_B,BA_flag);
-
-  // all procs communicate density values from their ghost cells
-  //   to fully sum contribution in their 3d bricks
-  // remap from 3d decomposition to FFT decomposition
-
-  // temporarily store and switch pointers so we can
-  //  use brick2fft() for groups A and B (without
-  //  writing an additional function)
-
-  FFT_SCALAR ***density_brick_real = density_brick;
-  FFT_SCALAR *density_fft_real = density_fft;
-
-  // group A
-
-  density_brick = density_A_brick;
-  density_fft = density_A_fft;
-
-  brick2fft();
-
-  // group B
-
-  density_brick = density_B_brick;
-  density_fft = density_B_fft;
-
-  brick2fft();
-
-  // switch back pointers
-
-  density_brick = density_brick_real;
-  density_fft = density_fft_real;
-
-  // compute potential gradient on my FFT grid and
-  //   portion of group-group energy/force on this proc's FFT grid
-
-  poisson_groups(BA_flag);
-
-  const double qscale = force->qqrd2e * scale;
-
-  // total group A <--> group B energy
-  // self and boundary correction terms are in compute_group_group.cpp
-
-  double e2group_all;
-  MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world);
-  e2group = e2group_all;
-
-  e2group *= qscale*0.5*volume;
-
-  // total group A <--> group B force
-
-  double f2group_all[3];
-  MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world);
-
-  for (i = 0; i < 3; i++) f2group[i] = qscale*volume*f2group_all[i];
-}
-
-/* ----------------------------------------------------------------------
- allocate group-group memory that depends on # of K-vectors and order
- ------------------------------------------------------------------------- */
-
-void PPPMOld::allocate_groups()
-{
-  memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:density_A_brick");
-  memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:density_B_brick");
-  memory->create(density_A_fft,nfft_both,"pppm:density_A_fft");
-  memory->create(density_B_fft,nfft_both,"pppm:density_B_fft");
-}
-
-/* ----------------------------------------------------------------------
- deallocate group-group memory that depends on # of K-vectors and order
- ------------------------------------------------------------------------- */
-
-void PPPMOld::deallocate_groups()
-{
-  memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out);
-  memory->destroy(density_A_fft);
-  memory->destroy(density_B_fft);
-}
-
-/* ----------------------------------------------------------------------
- create discretized "density" on section of global grid due to my particles
- density(x,y,z) = charge "density" at grid points of my 3d brick
- (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
- in global grid for group-group interactions
- ------------------------------------------------------------------------- */
-
-void PPPMOld::make_rho_groups(int groupbit_A, int groupbit_B, int BA_flag)
-{
-  int l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-
-  // clear 3d density arrays
-
-  memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0,
-         ngrid*sizeof(FFT_SCALAR));
-
-  memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0,
-         ngrid*sizeof(FFT_SCALAR));
-
-  // loop over my charges, add their contribution to nearby grid points
-  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
-  // (dx,dy,dz) = distance to "lower left" grid pt
-  // (mx,my,mz) = global coords of moving stencil pt
-
-  double *q = atom->q;
-  double **x = atom->x;
-  int nlocal = atom->nlocal;
-  int *mask = atom->mask;
-
-  for (int i = 0; i < nlocal; i++) {
-
-    if ((mask[i] & groupbit_A) && (mask[i] & groupbit_B))
-      if (BA_flag) continue;
-
-    if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
-
-      nx = part2grid[i][0];
-      ny = part2grid[i][1];
-      nz = part2grid[i][2];
-      dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-      dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-      dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
-
-      compute_rho1d(dx,dy,dz);
-
-      z0 = delvolinv * q[i];
-      for (n = nlower; n <= nupper; n++) {
-        mz = n+nz;
-        y0 = z0*rho1d[2][n];
-        for (m = nlower; m <= nupper; m++) {
-          my = m+ny;
-          x0 = y0*rho1d[1][m];
-          for (l = nlower; l <= nupper; l++) {
-            mx = l+nx;
-
-            // group A
-
-            if (mask[i] & groupbit_A)
-              density_A_brick[mz][my][mx] += x0*rho1d[0][l];
-
-            // group B
-
-            if (mask[i] & groupbit_B)
-              density_B_brick[mz][my][mx] += x0*rho1d[0][l];
-          }
-        }
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   FFT-based Poisson solver for group-group interactions
- ------------------------------------------------------------------------- */
-
-void PPPMOld::poisson_groups(int BA_flag)
-{
-  int i,j,k,n;
-  double eng;
-
-  // reuse memory (already declared)
-
-  FFT_SCALAR *work_A = work1;
-  FFT_SCALAR *work_B = work2;
-
-  // transform charge density (r -> k)
-
-  // group A
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work_A[n++] = density_A_fft[i];
-    work_A[n++] = ZEROF;
-  }
-
-  fft1->compute(work_A,work_A,1);
-
-  // group B
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work_B[n++] = density_B_fft[i];
-    work_B[n++] = ZEROF;
-  }
-
-  fft1->compute(work_B,work_B,1);
-
-  // group-group energy and force contribution,
-  //  keep everything in reciprocal space so
-  //  no inverse FFTs needed
-
-  double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
-  double s2 = scaleinv*scaleinv;
-
-  // energy
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    e2group += s2 * greensfn[i] *
-      (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]);
-    n += 2;
-  }
-
-  if (BA_flag) return;
-
-
-  // multiply by Green's function and s2
-  //  (only for work_A so it is not squared below)
-
-  n = 0;
-  for (i = 0; i < nfft; i++) {
-    work_A[n++] *= s2 * greensfn[i];
-    work_A[n++] *= s2 * greensfn[i];
-  }
-
-  double partial_group;
-
-  // force, x direction
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++)
-    for (j = nylo_fft; j <= nyhi_fft; j++)
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
-        f2group[0] += fkx[i] * partial_group;
-        n += 2;
-      }
-
-  // force, y direction
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++)
-    for (j = nylo_fft; j <= nyhi_fft; j++)
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
-        f2group[1] += fky[j] * partial_group;
-        n += 2;
-      }
-
-  // force, z direction
-
-  n = 0;
-  for (k = nzlo_fft; k <= nzhi_fft; k++)
-    for (j = nylo_fft; j <= nyhi_fft; j++)
-      for (i = nxlo_fft; i <= nxhi_fft; i++) {
-        partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
-        f2group[2] += fkz[k] * partial_group;
-        n += 2;
-      }
-}
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
+     per-atom energy/virial & group/group energy/force added by Stan Moore (BYU)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "string.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "math.h"
+#include "pppm_old.h"
+#include "math_const.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "force.h"
+#include "pair.h"
+#include "bond.h"
+#include "angle.h"
+#include "domain.h"
+#include "fft3d_wrap.h"
+#include "remap_wrap.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define MAXORDER 7
+#define OFFSET 16384
+#define SMALL 0.00001
+#define LARGE 10000.0
+#define EPS_HOC 1.0e-7
+
+#ifdef FFT_SINGLE
+#define ZEROF 0.0f
+#define ONEF  1.0f
+#else
+#define ZEROF 0.0
+#define ONEF  1.0
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+PPPMOld::PPPMOld(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
+{
+  if (narg < 1) error->all(FLERR,"Illegal kspace_style pppm command");
+
+  triclinic_support = 0;
+  pppmflag = 1;
+  group_group_enable = 0;
+
+  accuracy_relative = fabs(force->numeric(FLERR,arg[0]));
+
+  nfactors = 3;
+  factors = new int[nfactors];
+  factors[0] = 2;
+  factors[1] = 3;
+  factors[2] = 5;
+
+  MPI_Comm_rank(world,&me);
+  MPI_Comm_size(world,&nprocs);
+
+  density_brick = vdx_brick = vdy_brick = vdz_brick = NULL;
+  density_fft = NULL;
+  u_brick = NULL;
+  v0_brick = v1_brick = v2_brick = v3_brick = v4_brick = v5_brick = NULL;
+  greensfn = NULL;
+  work1 = work2 = NULL;
+  vg = NULL;
+  fkx = fky = fkz = NULL;
+  buf1 = buf2 = buf3 = buf4 = NULL;
+
+  density_A_brick = density_B_brick = NULL;
+  density_A_fft = density_B_fft = NULL;
+
+  gf_b = NULL;
+  rho1d = rho_coeff = NULL;
+
+  fft1 = fft2 = NULL;
+  remap = NULL;
+
+  nmax = 0;
+  part2grid = NULL;
+}
+
+/* ----------------------------------------------------------------------
+   free all memory
+------------------------------------------------------------------------- */
+
+PPPMOld::~PPPMOld()
+{
+  delete [] factors;
+  deallocate();
+  deallocate_peratom();
+  deallocate_groups();
+  memory->destroy(part2grid);
+}
+
+/* ----------------------------------------------------------------------
+   called once before run
+------------------------------------------------------------------------- */
+
+void PPPMOld::init()
+{
+  if (me == 0) {
+    if (screen) fprintf(screen,"PPPM initialization ...\n");
+    if (logfile) fprintf(logfile,"PPPM initialization ...\n");
+  }
+
+  // error check
+
+  triclinic_check();
+  if (domain->dimension == 2) error->all(FLERR,
+                                         "Cannot use PPPM with 2d simulation");
+
+  if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
+
+  if (slabflag == 0 && domain->nonperiodic > 0)
+    error->all(FLERR,"Cannot use nonperiodic boundaries with PPPM");
+  if (slabflag) {
+    if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
+        domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
+      error->all(FLERR,"Incorrect boundaries with slab PPPM");
+  }
+
+  if (order < 2 || order > MAXORDER) {
+    char str[128];
+    sprintf(str,"PPPM order cannot be < 2 or > than %d",MAXORDER);
+    error->all(FLERR,str);
+  }
+
+  // free all arrays previously allocated
+
+  deallocate();
+  deallocate_peratom();
+  peratom_allocate_flag = 0;
+  deallocate_groups();
+  group_allocate_flag = 0;
+
+  // extract short-range Coulombic cutoff from pair style
+
+  scale = 1.0;
+
+  pair_check();
+
+  int itmp=0;
+  double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
+  if (p_cutoff == NULL)
+    error->all(FLERR,"KSpace style is incompatible with Pair style");
+  cutoff = *p_cutoff;
+
+  // if kspace is TIP4P, extract TIP4P params from pair style
+  // bond/angle are not yet init(), so insure equilibrium request is valid
+
+  qdist = 0.0;
+
+  if (tip4pflag) {
+    double *p_qdist = (double *) force->pair->extract("qdist",itmp);
+    int *p_typeO = (int *) force->pair->extract("typeO",itmp);
+    int *p_typeH = (int *) force->pair->extract("typeH",itmp);
+    int *p_typeA = (int *) force->pair->extract("typeA",itmp);
+    int *p_typeB = (int *) force->pair->extract("typeB",itmp);
+    if (!p_qdist || !p_typeO || !p_typeH || !p_typeA || !p_typeB)
+      error->all(FLERR,"KSpace style is incompatible with Pair style");
+    qdist = *p_qdist;
+    typeO = *p_typeO;
+    typeH = *p_typeH;
+    int typeA = *p_typeA;
+    int typeB = *p_typeB;
+
+    if (force->angle == NULL || force->bond == NULL)
+      error->all(FLERR,"Bond and angle potentials must be defined for TIP4P");
+    if (typeA < 1 || typeA > atom->nangletypes ||
+        force->angle->setflag[typeA] == 0)
+      error->all(FLERR,"Bad TIP4P angle type for PPPM/TIP4P");
+    if (typeB < 1 || typeB > atom->nbondtypes ||
+        force->bond->setflag[typeB] == 0)
+      error->all(FLERR,"Bad TIP4P bond type for PPPM/TIP4P");
+    double theta = force->angle->equilibrium_angle(typeA);
+    double blen = force->bond->equilibrium_distance(typeB);
+    alpha = qdist / (cos(0.5*theta) * blen);
+  }
+
+  // compute qsum & qsqsum and warn if not charge-neutral
+
+  qsum = qsqsum = 0.0;
+  for (int i = 0; i < atom->nlocal; i++) {
+    qsum += atom->q[i];
+    qsqsum += atom->q[i]*atom->q[i];
+  }
+
+  double tmp;
+  MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  qsum = tmp;
+  MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  qsqsum = tmp;
+
+  if (qsqsum == 0.0)
+    error->all(FLERR,"Cannot use kspace solver on system with no charge");
+  if (fabs(qsum) > SMALL && me == 0) {
+    char str[128];
+    sprintf(str,"System is not charge neutral, net charge = %g",qsum);
+    error->warning(FLERR,str);
+  }
+
+  // set accuracy (force units) from accuracy_relative or accuracy_absolute
+
+  if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
+  else accuracy = accuracy_relative * two_charge_force;
+
+  // setup FFT grid resolution and g_ewald
+  // normally one iteration thru while loop is all that is required
+  // if grid stencil extends beyond neighbor proc, reduce order and try again
+
+  int iteration = 0;
+
+  while (order > 1) {
+    if (iteration && me == 0)
+      error->warning(FLERR,"Reducing PPPM order b/c stencil extends "
+                     "beyond neighbor processor");
+    iteration++;
+
+    set_grid();
+
+    if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
+      error->all(FLERR,"PPPM grid is too large");
+
+    // global indices of PPPM grid range from 0 to N-1
+    // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
+    //   global PPPM grid that I own without ghost cells
+    // for slab PPPM, assign z grid as if it were not extended
+
+    nxlo_in = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_pppm);
+    nxhi_in = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
+
+    nylo_in = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_pppm);
+    nyhi_in = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
+
+    nzlo_in = static_cast<int>
+      (comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor);
+    nzhi_in = static_cast<int>
+      (comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1;
+
+    // nlower,nupper = stencil size for mapping particles to PPPM grid
+
+    nlower = -(order-1)/2;
+    nupper = order/2;
+
+    // shift values for particle <-> grid mapping
+    // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
+
+    if (order % 2) shift = OFFSET + 0.5;
+    else shift = OFFSET;
+    if (order % 2) shiftone = 0.0;
+    else shiftone = 0.5;
+
+    // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
+    //   global PPPM grid that my particles can contribute charge to
+    // effectively nlo_in,nhi_in + ghost cells
+    // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
+    //           position a particle in my box can be at
+    // dist[3] = particle position bound = subbox + skin/2.0 + qdist
+    //   qdist = offset due to TIP4P fictitious charge
+    //   convert to triclinic if necessary
+    // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
+    // for slab PPPM, assign z grid as if it were not extended
+
+    triclinic = domain->triclinic;
+    double *prd,*sublo,*subhi;
+
+    if (triclinic == 0) {
+      prd = domain->prd;
+      boxlo = domain->boxlo;
+      sublo = domain->sublo;
+      subhi = domain->subhi;
+    } else {
+      prd = domain->prd_lamda;
+      boxlo = domain->boxlo_lamda;
+      sublo = domain->sublo_lamda;
+      subhi = domain->subhi_lamda;
+    }
+
+    double xprd = prd[0];
+    double yprd = prd[1];
+    double zprd = prd[2];
+    double zprd_slab = zprd*slab_volfactor;
+
+    double dist[3];
+    double cuthalf = 0.5*neighbor->skin + qdist;
+    if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
+    else {
+      dist[0] = cuthalf/domain->prd[0];
+      dist[1] = cuthalf/domain->prd[1];
+      dist[2] = cuthalf/domain->prd[2];
+    }
+
+    int nlo,nhi;
+
+    nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) *
+                            nx_pppm/xprd + shift) - OFFSET;
+    nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) *
+                            nx_pppm/xprd + shift) - OFFSET;
+    nxlo_out = nlo + nlower;
+    nxhi_out = nhi + nupper;
+
+    nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) *
+                            ny_pppm/yprd + shift) - OFFSET;
+    nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) *
+                            ny_pppm/yprd + shift) - OFFSET;
+    nylo_out = nlo + nlower;
+    nyhi_out = nhi + nupper;
+
+    nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) *
+                            nz_pppm/zprd_slab + shift) - OFFSET;
+    nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) *
+                            nz_pppm/zprd_slab + shift) - OFFSET;
+    nzlo_out = nlo + nlower;
+    nzhi_out = nhi + nupper;
+
+    // for slab PPPM, change the grid boundary for processors at +z end
+    //   to include the empty volume between periodically repeating slabs
+    // for slab PPPM, want charge data communicated from -z proc to +z proc,
+    //   but not vice versa, also want field data communicated from +z proc to
+    //   -z proc, but not vice versa
+    // this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells)
+
+    if (slabflag == 1 && (comm->myloc[2] == comm->procgrid[2]-1)) {
+      nzhi_in = nz_pppm - 1;
+      nzhi_out = nz_pppm - 1;
+    }
+
+    // nlo_ghost,nhi_ghost = # of planes I will recv from 6 directions
+    //   that overlay domain I own
+    // proc in that direction tells me via sendrecv()
+    // if no neighbor proc, value is from self since I have ghosts regardless
+
+    int nplanes;
+    MPI_Status status;
+
+    nplanes = nxlo_in - nxlo_out;
+    if (comm->procneigh[0][0] != me)
+      MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][0],0,
+                   &nxhi_ghost,1,MPI_INT,comm->procneigh[0][1],0,
+                   world,&status);
+    else nxhi_ghost = nplanes;
+
+    nplanes = nxhi_out - nxhi_in;
+    if (comm->procneigh[0][1] != me)
+      MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[0][1],0,
+                   &nxlo_ghost,1,MPI_INT,comm->procneigh[0][0],
+                   0,world,&status);
+    else nxlo_ghost = nplanes;
+
+    nplanes = nylo_in - nylo_out;
+    if (comm->procneigh[1][0] != me)
+      MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][0],0,
+                   &nyhi_ghost,1,MPI_INT,comm->procneigh[1][1],0,
+                   world,&status);
+    else nyhi_ghost = nplanes;
+
+    nplanes = nyhi_out - nyhi_in;
+    if (comm->procneigh[1][1] != me)
+      MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[1][1],0,
+                   &nylo_ghost,1,MPI_INT,comm->procneigh[1][0],0,
+                   world,&status);
+    else nylo_ghost = nplanes;
+
+    nplanes = nzlo_in - nzlo_out;
+    if (comm->procneigh[2][0] != me)
+      MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][0],0,
+                   &nzhi_ghost,1,MPI_INT,comm->procneigh[2][1],0,
+                   world,&status);
+    else nzhi_ghost = nplanes;
+
+    nplanes = nzhi_out - nzhi_in;
+    if (comm->procneigh[2][1] != me)
+      MPI_Sendrecv(&nplanes,1,MPI_INT,comm->procneigh[2][1],0,
+                   &nzlo_ghost,1,MPI_INT,comm->procneigh[2][0],0,
+                   world,&status);
+    else nzlo_ghost = nplanes;
+
+    // test that ghost overlap is not bigger than my sub-domain
+
+    int flag = 0;
+    if (nxlo_ghost > nxhi_in-nxlo_in+1) flag = 1;
+    if (nxhi_ghost > nxhi_in-nxlo_in+1) flag = 1;
+    if (nylo_ghost > nyhi_in-nylo_in+1) flag = 1;
+    if (nyhi_ghost > nyhi_in-nylo_in+1) flag = 1;
+    if (nzlo_ghost > nzhi_in-nzlo_in+1) flag = 1;
+    if (nzhi_ghost > nzhi_in-nzlo_in+1) flag = 1;
+
+    int flag_all;
+    MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
+
+    if (flag_all == 0) break;
+    order--;
+  }
+
+  if (order == 0) error->all(FLERR,"PPPM order has been reduced to 0");
+
+  // decomposition of FFT mesh
+  // global indices range from 0 to N-1
+  // proc owns entire x-dimension, clump of columns in y,z dimensions
+  // npey_fft,npez_fft = # of procs in y,z dims
+  // if nprocs is small enough, proc can own 1 or more entire xy planes,
+  //   else proc owns 2d sub-blocks of yz plane
+  // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
+  // nlo_fft,nhi_fft = lower/upper limit of the section
+  //   of the global FFT mesh that I own
+
+  int npey_fft,npez_fft;
+  if (nz_pppm >= nprocs) {
+    npey_fft = 1;
+    npez_fft = nprocs;
+  } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
+
+  int me_y = me % npey_fft;
+  int me_z = me / npey_fft;
+
+  nxlo_fft = 0;
+  nxhi_fft = nx_pppm - 1;
+  nylo_fft = me_y*ny_pppm/npey_fft;
+  nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
+  nzlo_fft = me_z*nz_pppm/npez_fft;
+  nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
+
+  // PPPM grid for this proc, including ghosts
+
+  ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
+    (nzhi_out-nzlo_out+1);
+
+  // FFT arrays on this proc, without ghosts
+  // nfft = FFT points in FFT decomposition on this proc
+  // nfft_brick = FFT points in 3d brick-decomposition on this proc
+  // nfft_both = greater of 2 values
+
+  nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
+    (nzhi_fft-nzlo_fft+1);
+  int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
+    (nzhi_in-nzlo_in+1);
+  nfft_both = MAX(nfft,nfft_brick);
+
+  // buffer space for use in brick2fft and fillbrick
+  // idel = max # of ghost planes to send or recv in +/- dir of each dim
+  // nx,ny,nz = owned planes (including ghosts) in each dim
+  // nxx,nyy,nzz = max # of grid cells to send in each dim
+  // nbuf = max in any dim, augment by 3x for components of vd_xyz in fillbrick
+
+  int idelx,idely,idelz,nx,ny,nz,nxx,nyy,nzz;
+
+  idelx = MAX(nxlo_ghost,nxhi_ghost);
+  idelx = MAX(idelx,nxhi_out-nxhi_in);
+  idelx = MAX(idelx,nxlo_in-nxlo_out);
+
+  idely = MAX(nylo_ghost,nyhi_ghost);
+  idely = MAX(idely,nyhi_out-nyhi_in);
+  idely = MAX(idely,nylo_in-nylo_out);
+
+  idelz = MAX(nzlo_ghost,nzhi_ghost);
+  idelz = MAX(idelz,nzhi_out-nzhi_in);
+  idelz = MAX(idelz,nzlo_in-nzlo_out);
+
+  nx = nxhi_out - nxlo_out + 1;
+  ny = nyhi_out - nylo_out + 1;
+  nz = nzhi_out - nzlo_out + 1;
+
+  nxx = idelx * ny * nz;
+  nyy = idely * nx * nz;
+  nzz = idelz * nx * ny;
+
+  nbuf = MAX(nxx,nyy);
+  nbuf = MAX(nbuf,nzz);
+
+  nbuf_peratom = 7*nbuf;
+  nbuf *= 3;
+
+  // print stats
+
+  int ngrid_max,nfft_both_max,nbuf_max;
+  MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
+  MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
+  MPI_Allreduce(&nbuf,&nbuf_max,1,MPI_INT,MPI_MAX,world);
+
+  if (me == 0) {
+    if (screen) fprintf(screen,"  brick FFT buffer size/proc = %d %d %d\n",
+                        ngrid_max,nfft_both_max,nbuf_max);
+    if (logfile) fprintf(logfile,"  brick FFT buffer size/proc = %d %d %d\n",
+                         ngrid_max,nfft_both_max,nbuf_max);
+  }
+
+  // allocate K-space dependent memory
+  // don't invoke allocate_peratom() here, wait to see if needed
+
+  allocate();
+
+  // pre-compute Green's function denomiator expansion
+  // pre-compute 1d charge distribution coefficients
+
+  compute_gf_denom();
+  compute_rho_coeff();
+}
+
+/* ----------------------------------------------------------------------
+   adjust PPPM coeffs, called initially and whenever volume has changed
+------------------------------------------------------------------------- */
+
+void PPPMOld::setup()
+{
+  int i,j,k,l,m,n;
+  double *prd;
+
+  // volume-dependent factors
+  // adjust z dimension for 2d slab PPPM
+  // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+  double zprd_slab = zprd*slab_volfactor;
+  volume = xprd * yprd * zprd_slab;
+
+  delxinv = nx_pppm/xprd;
+  delyinv = ny_pppm/yprd;
+  delzinv = nz_pppm/zprd_slab;
+
+  delvolinv = delxinv*delyinv*delzinv;
+
+  double unitkx = (2.0*MY_PI/xprd);
+  double unitky = (2.0*MY_PI/yprd);
+  double unitkz = (2.0*MY_PI/zprd_slab);
+
+  // fkx,fky,fkz for my FFT grid pts
+
+  double per;
+
+  for (i = nxlo_fft; i <= nxhi_fft; i++) {
+    per = i - nx_pppm*(2*i/nx_pppm);
+    fkx[i] = unitkx*per;
+  }
+
+  for (i = nylo_fft; i <= nyhi_fft; i++) {
+    per = i - ny_pppm*(2*i/ny_pppm);
+    fky[i] = unitky*per;
+  }
+
+  for (i = nzlo_fft; i <= nzhi_fft; i++) {
+    per = i - nz_pppm*(2*i/nz_pppm);
+    fkz[i] = unitkz*per;
+  }
+
+  // virial coefficients
+
+  double sqk,vterm;
+
+  n = 0;
+  for (k = nzlo_fft; k <= nzhi_fft; k++) {
+    for (j = nylo_fft; j <= nyhi_fft; j++) {
+      for (i = nxlo_fft; i <= nxhi_fft; i++) {
+        sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
+        if (sqk == 0.0) {
+          vg[n][0] = 0.0;
+          vg[n][1] = 0.0;
+          vg[n][2] = 0.0;
+          vg[n][3] = 0.0;
+          vg[n][4] = 0.0;
+          vg[n][5] = 0.0;
+        } else {
+          vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
+          vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
+          vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
+          vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
+          vg[n][3] = vterm*fkx[i]*fky[j];
+          vg[n][4] = vterm*fkx[i]*fkz[k];
+          vg[n][5] = vterm*fky[j]*fkz[k];
+        }
+        n++;
+      }
+    }
+  }
+
+  // modified (Hockney-Eastwood) Coulomb Green's function
+
+  int nx,ny,nz,kper,lper,mper;
+  double snx,sny,snz,snx2,sny2,snz2;
+  double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+  double sum1,dot1,dot2;
+  double numerator,denominator;
+
+  int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
+                              pow(-log(EPS_HOC),0.25));
+  int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
+                              pow(-log(EPS_HOC),0.25));
+  int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
+                              pow(-log(EPS_HOC),0.25));
+
+  double form = 1.0;
+
+  n = 0;
+  for (m = nzlo_fft; m <= nzhi_fft; m++) {
+    mper = m - nz_pppm*(2*m/nz_pppm);
+    snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm);
+    snz2 = snz*snz;
+
+    for (l = nylo_fft; l <= nyhi_fft; l++) {
+      lper = l - ny_pppm*(2*l/ny_pppm);
+      sny = sin(0.5*unitky*lper*yprd/ny_pppm);
+      sny2 = sny*sny;
+
+      for (k = nxlo_fft; k <= nxhi_fft; k++) {
+        kper = k - nx_pppm*(2*k/nx_pppm);
+        snx = sin(0.5*unitkx*kper*xprd/nx_pppm);
+        snx2 = snx*snx;
+
+        sqk = pow(unitkx*kper,2.0) + pow(unitky*lper,2.0) +
+          pow(unitkz*mper,2.0);
+
+        if (sqk != 0.0) {
+          numerator = form*12.5663706/sqk;
+          denominator = gf_denom(snx2,sny2,snz2);
+          sum1 = 0.0;
+          const double dorder = static_cast<double>(order);
+          for (nx = -nbx; nx <= nbx; nx++) {
+            qx = unitkx*(kper+nx_pppm*nx);
+            sx = exp(-0.25*pow(qx/g_ewald,2.0));
+            wx = 1.0;
+            argx = 0.5*qx*xprd/nx_pppm;
+            if (argx != 0.0) wx = pow(sin(argx)/argx,dorder);
+            for (ny = -nby; ny <= nby; ny++) {
+              qy = unitky*(lper+ny_pppm*ny);
+              sy = exp(-0.25*pow(qy/g_ewald,2.0));
+              wy = 1.0;
+              argy = 0.5*qy*yprd/ny_pppm;
+              if (argy != 0.0) wy = pow(sin(argy)/argy,dorder);
+              for (nz = -nbz; nz <= nbz; nz++) {
+                qz = unitkz*(mper+nz_pppm*nz);
+                sz = exp(-0.25*pow(qz/g_ewald,2.0));
+                wz = 1.0;
+                argz = 0.5*qz*zprd_slab/nz_pppm;
+                if (argz != 0.0) wz = pow(sin(argz)/argz,dorder);
+
+                dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
+                dot2 = qx*qx+qy*qy+qz*qz;
+                sum1 += (dot1/dot2) * sx*sy*sz * pow(wx*wy*wz,2.0);
+              }
+            }
+          }
+          greensfn[n++] = numerator*sum1/denominator;
+        } else greensfn[n++] = 0.0;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute the PPPM long-range force, energy, virial
+------------------------------------------------------------------------- */
+
+void PPPMOld::compute(int eflag, int vflag)
+{
+  int i,j;
+
+  // set energy/virial flags
+  // invoke allocate_peratom() if needed for first time
+
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = evflag_atom = eflag_global = vflag_global =
+         eflag_atom = vflag_atom = 0;
+
+  if (evflag_atom && !peratom_allocate_flag) {
+    allocate_peratom();
+    peratom_allocate_flag = 1;
+  }
+
+  // convert atoms from box to lamda coords
+
+  if (triclinic == 0) boxlo = domain->boxlo;
+  else {
+    boxlo = domain->boxlo_lamda;
+    domain->x2lamda(atom->nlocal);
+  }
+
+  // extend size of per-atom arrays if necessary
+
+  if (atom->nlocal > nmax) {
+    memory->destroy(part2grid);
+    nmax = atom->nmax;
+    memory->create(part2grid,nmax,3,"pppm:part2grid");
+  }
+
+  // find grid points for all my particles
+  // map my particle charge onto my local 3d density grid
+
+  particle_map();
+  make_rho();
+
+  // all procs communicate density values from their ghost cells
+  //   to fully sum contribution in their 3d bricks
+  // remap from 3d decomposition to FFT decomposition
+
+  brick2fft();
+
+  // compute potential gradient on my FFT grid and
+  //   portion of e_long on this proc's FFT grid
+  // return gradients (electric fields) in 3d brick decomposition
+  // also performs per-atom calculations via poisson_peratom()
+
+  poisson();
+
+  // all procs communicate E-field values
+  // to fill ghost cells surrounding their 3d bricks
+
+  fillbrick();
+
+  // extra per-atom energy/virial communication
+
+  if (evflag_atom) fillbrick_peratom();
+
+  // calculate the force on my particles
+
+  fieldforce();
+
+  // extra per-atom energy/virial communication
+
+  if (evflag_atom) fieldforce_peratom();
+
+  // sum global energy across procs and add in volume-dependent term
+
+  const double qscale = force->qqrd2e * scale;
+
+  if (eflag_global) {
+    double energy_all;
+    MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
+    energy = energy_all;
+
+    energy *= 0.5*volume;
+    energy -= g_ewald*qsqsum/MY_PIS +
+      MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
+    energy *= qscale;
+  }
+
+  // sum global virial across procs
+
+  if (vflag_global) {
+    double virial_all[6];
+    MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
+    for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
+  }
+
+  // per-atom energy/virial
+  // energy includes self-energy correction
+
+  if (evflag_atom) {
+    double *q = atom->q;
+    int nlocal = atom->nlocal;
+
+    if (eflag_atom) {
+      for (i = 0; i < nlocal; i++) {
+        eatom[i] *= 0.5;
+        eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
+          (g_ewald*g_ewald*volume);
+        eatom[i] *= qscale;
+      }
+    }
+
+    if (vflag_atom) {
+      for (i = 0; i < nlocal; i++)
+        for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale;
+    }
+  }
+
+  // 2d slab correction
+
+  if (slabflag == 1) slabcorr();
+
+  // convert atoms back from lamda to box coords
+
+  if (triclinic) domain->lamda2x(atom->nlocal);
+}
+
+/* ----------------------------------------------------------------------
+   allocate memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPMOld::allocate()
+{
+  memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:density_brick");
+  memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:vdx_brick");
+  memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:vdy_brick");
+  memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:vdz_brick");
+
+  memory->create(density_fft,nfft_both,"pppm:density_fft");
+  memory->create(greensfn,nfft_both,"pppm:greensfn");
+  memory->create(work1,2*nfft_both,"pppm:work1");
+  memory->create(work2,2*nfft_both,"pppm:work2");
+  memory->create(vg,nfft_both,6,"pppm:vg");
+
+  memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx");
+  memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky");
+  memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz");
+
+  memory->create(buf1,nbuf,"pppm:buf1");
+  memory->create(buf2,nbuf,"pppm:buf2");
+
+  // summation coeffs
+
+  memory->create(gf_b,order,"pppm:gf_b");
+  memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
+  memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
+
+  // create 2 FFTs and a Remap
+  // 1st FFT keeps data in FFT decompostion
+  // 2nd FFT returns data in 3d brick decomposition
+  // remap takes data from 3d brick to FFT decomposition
+
+  int tmp;
+
+  fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+                   nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+                   nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+                   0,0,&tmp);
+
+  fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
+                   nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+                   nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+                   0,0,&tmp);
+
+  remap = new Remap(lmp,world,
+                    nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
+                    nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
+                    1,0,0,FFT_PRECISION);
+}
+
+/* ----------------------------------------------------------------------
+   allocate per-atom memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPMOld::allocate_peratom()
+{
+  memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:u_brick");
+
+  memory->create3d_offset(v0_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:v0_brick");
+  memory->create3d_offset(v1_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:v1_brick");
+  memory->create3d_offset(v2_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:v2_brick");
+  memory->create3d_offset(v3_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:v3_brick");
+  memory->create3d_offset(v4_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:v4_brick");
+  memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:v5_brick");
+
+  memory->create(buf3,nbuf_peratom,"pppm:buf3");
+  memory->create(buf4,nbuf_peratom,"pppm:buf4");
+}
+
+/* ----------------------------------------------------------------------
+   deallocate memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPMOld::deallocate()
+{
+  memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);
+
+  memory->destroy(density_fft);
+  memory->destroy(greensfn);
+  memory->destroy(work1);
+  memory->destroy(work2);
+  memory->destroy(vg);
+
+  memory->destroy1d_offset(fkx,nxlo_fft);
+  memory->destroy1d_offset(fky,nylo_fft);
+  memory->destroy1d_offset(fkz,nzlo_fft);
+
+  memory->destroy(buf1);
+  memory->destroy(buf2);
+
+  memory->destroy(gf_b);
+  memory->destroy2d_offset(rho1d,-order/2);
+  memory->destroy2d_offset(rho_coeff,(1-order)/2);
+
+  delete fft1;
+  delete fft2;
+  delete remap;
+}
+
+/* ----------------------------------------------------------------------
+   deallocate per-atom memory that depends on # of K-vectors and order
+------------------------------------------------------------------------- */
+
+void PPPMOld::deallocate_peratom()
+{
+  memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
+
+  memory->destroy3d_offset(v0_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(v1_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(v2_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(v3_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(v4_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(v5_brick,nzlo_out,nylo_out,nxlo_out);
+
+  memory->destroy(buf3);
+  memory->destroy(buf4);
+}
+
+/* ----------------------------------------------------------------------
+   set size of FFT grid (nx,ny,nz_pppm) and g_ewald
+------------------------------------------------------------------------- */
+
+void PPPMOld::set_grid()
+{
+  // see JCP 109, pg 7698 for derivation of coefficients
+  // higher order coefficients may be computed if needed
+
+  double **acons;
+  memory->create(acons,8,7,"pppm:acons");
+
+  acons[1][0] = 2.0 / 3.0;
+  acons[2][0] = 1.0 / 50.0;
+  acons[2][1] = 5.0 / 294.0;
+  acons[3][0] = 1.0 / 588.0;
+  acons[3][1] = 7.0 / 1440.0;
+  acons[3][2] = 21.0 / 3872.0;
+  acons[4][0] = 1.0 / 4320.0;
+  acons[4][1] = 3.0 / 1936.0;
+  acons[4][2] = 7601.0 / 2271360.0;
+  acons[4][3] = 143.0 / 28800.0;
+  acons[5][0] = 1.0 / 23232.0;
+  acons[5][1] = 7601.0 / 13628160.0;
+  acons[5][2] = 143.0 / 69120.0;
+  acons[5][3] = 517231.0 / 106536960.0;
+  acons[5][4] = 106640677.0 / 11737571328.0;
+  acons[6][0] = 691.0 / 68140800.0;
+  acons[6][1] = 13.0 / 57600.0;
+  acons[6][2] = 47021.0 / 35512320.0;
+  acons[6][3] = 9694607.0 / 2095994880.0;
+  acons[6][4] = 733191589.0 / 59609088000.0;
+  acons[6][5] = 326190917.0 / 11700633600.0;
+  acons[7][0] = 1.0 / 345600.0;
+  acons[7][1] = 3617.0 / 35512320.0;
+  acons[7][2] = 745739.0 / 838397952.0;
+  acons[7][3] = 56399353.0 / 12773376000.0;
+  acons[7][4] = 25091609.0 / 1560084480.0;
+  acons[7][5] = 1755948832039.0 / 36229939200000.0;
+  acons[7][6] = 4887769399.0 / 37838389248.0;
+
+  double q2 = qsqsum * force->qqrd2e;
+
+  // use xprd,yprd,zprd even if triclinic so grid size is the same
+  // adjust z dimension for 2d slab PPPM
+  // 3d PPPM just uses zprd since slab_volfactor = 1.0
+
+  double xprd = domain->xprd;
+  double yprd = domain->yprd;
+  double zprd = domain->zprd;
+  double zprd_slab = zprd*slab_volfactor;
+
+  // make initial g_ewald estimate
+  // based on desired accuracy and real space cutoff
+  // fluid-occupied volume used to estimate real-space error
+  // zprd used rather than zprd_slab
+
+  double h_x,h_y,h_z;
+  bigint natoms = atom->natoms;
+
+  if (!gewaldflag) {
+    if (accuracy <= 0.0)
+      error->all(FLERR,"KSpace accuracy must be > 0");
+    g_ewald = accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2);
+    if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/cutoff;
+    else g_ewald = sqrt(-log(g_ewald)) / cutoff;
+  }
+
+  // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
+  // nz_pppm uses extended zprd_slab instead of zprd
+  // h = 1/g_ewald is upper bound on h such that h*g_ewald <= 1
+  // reduce it until accuracy target is met
+
+  if (!gridflag) {
+    double err;
+    h_x = h_y = h_z = 1.0/g_ewald;
+
+    nx_pppm = static_cast<int> (xprd/h_x) + 1;
+    ny_pppm = static_cast<int> (yprd/h_y) + 1;
+    nz_pppm = static_cast<int> (zprd_slab/h_z) + 1;
+
+    err = rms(h_x,xprd,natoms,q2,acons);
+    while (err > accuracy) {
+      err = rms(h_x,xprd,natoms,q2,acons);
+      nx_pppm++;
+      h_x = xprd/nx_pppm;
+    }
+
+    err = rms(h_y,yprd,natoms,q2,acons);
+    while (err > accuracy) {
+      err = rms(h_y,yprd,natoms,q2,acons);
+      ny_pppm++;
+      h_y = yprd/ny_pppm;
+    }
+
+    err = rms(h_z,zprd_slab,natoms,q2,acons);
+    while (err > accuracy) {
+      err = rms(h_z,zprd_slab,natoms,q2,acons);
+      nz_pppm++;
+      h_z = zprd_slab/nz_pppm;
+    }
+  }
+
+  // boost grid size until it is factorable
+
+  while (!factorable(nx_pppm)) nx_pppm++;
+  while (!factorable(ny_pppm)) ny_pppm++;
+  while (!factorable(nz_pppm)) nz_pppm++;
+
+  // adjust g_ewald for new grid size
+
+  h_x = xprd/static_cast<double>(nx_pppm);
+  h_y = yprd/static_cast<double>(ny_pppm);
+  h_z = zprd_slab/static_cast<double>(nz_pppm);
+
+  if (!gewaldflag) {
+    double gew1,gew2,dgew,f,fmid,hmin,rtb;
+    int ncount;
+
+    gew1 = 0.0;
+    g_ewald = gew1;
+    f = diffpr(h_x,h_y,h_z,q2,acons);
+
+    hmin = MIN(h_x,MIN(h_y,h_z));
+    gew2 = 10.0/hmin;
+    g_ewald = gew2;
+    fmid = diffpr(h_x,h_y,h_z,q2,acons);
+
+    if (f*fmid >= 0.0) error->all(FLERR,"Cannot compute PPPM G");
+    rtb = f < 0.0 ? (dgew=gew2-gew1,gew1) : (dgew=gew1-gew2,gew2);
+    ncount = 0;
+    while (fabs(dgew) > SMALL && fmid != 0.0) {
+      dgew *= 0.5;
+      g_ewald = rtb + dgew;
+      fmid = diffpr(h_x,h_y,h_z,q2,acons);
+      if (fmid <= 0.0) rtb = g_ewald;
+      ncount++;
+      if (ncount > LARGE) error->all(FLERR,"Cannot compute PPPM G");
+    }
+  }
+
+  // final RMS accuracy
+
+  double lprx = rms(h_x,xprd,natoms,q2,acons);
+  double lpry = rms(h_y,yprd,natoms,q2,acons);
+  double lprz = rms(h_z,zprd_slab,natoms,q2,acons);
+  double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
+  double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd_slab);
+  double spr = 2.0 *q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff);
+  double tpr = estimate_table_accuracy(q2_over_sqrt,spr);
+  double accuracy = sqrt(lpr*lpr + spr*spr + tpr*tpr);
+
+  // free local memory
+
+  memory->destroy(acons);
+
+  // print info
+
+  if (me == 0) {
+#ifdef FFT_SINGLE
+    const char fft_prec[] = "single";
+#else
+    const char fft_prec[] = "double";
+#endif
+    if (screen) {
+      fprintf(screen,"  G vector (1/distance)= %g\n",g_ewald);
+      fprintf(screen,"  grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
+      fprintf(screen,"  stencil order = %d\n",order);
+      fprintf(screen,"  estimated absolute RMS force accuracy = %g\n",
+              accuracy);
+      fprintf(screen,"  estimated relative force accuracy = %g\n",
+              accuracy/two_charge_force);
+      fprintf(screen,"  using %s precision FFTs\n",fft_prec);
+    }
+    if (logfile) {
+      fprintf(logfile,"  G vector (1/distance) = %g\n",g_ewald);
+      fprintf(logfile,"  grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
+      fprintf(logfile,"  stencil order = %d\n",order);
+      fprintf(logfile,"  estimated absolute RMS force accuracy = %g\n",
+              accuracy);
+      fprintf(logfile,"  estimated relative force accuracy = %g\n",
+              accuracy/two_charge_force);
+      fprintf(logfile,"  using %s precision FFTs\n",fft_prec);
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   check if all factors of n are in list of factors
+   return 1 if yes, 0 if no
+------------------------------------------------------------------------- */
+
+int PPPMOld::factorable(int n)
+{
+  int i;
+
+  while (n > 1) {
+    for (i = 0; i < nfactors; i++) {
+      if (n % factors[i] == 0) {
+        n /= factors[i];
+        break;
+      }
+    }
+    if (i == nfactors) return 0;
+  }
+
+  return 1;
+}
+
+/* ----------------------------------------------------------------------
+   compute RMS accuracy for a dimension
+------------------------------------------------------------------------- */
+
+double PPPMOld::rms(double h, double prd, bigint natoms,
+                 double q2, double **acons)
+{
+  double sum = 0.0;
+  for (int m = 0; m < order; m++)
+    sum += acons[order][m] * pow(h*g_ewald,2.0*m);
+  double value = q2 * pow(h*g_ewald,(double)order) *
+    sqrt(g_ewald*prd*sqrt(2.0*MY_PI)*sum/natoms) / (prd*prd);
+  return value;
+}
+
+/* ----------------------------------------------------------------------
+   compute difference in real-space and KSpace RMS accuracy
+------------------------------------------------------------------------- */
+
+double PPPMOld::diffpr(double h_x, double h_y, double h_z, double q2,
+                    double **acons)
+{
+  double lprx,lpry,lprz,kspace_prec,real_prec;
+  double xprd = domain->xprd;
+  double yprd = domain->yprd;
+  double zprd = domain->zprd;
+  bigint natoms = atom->natoms;
+
+  lprx = rms(h_x,xprd,natoms,q2,acons);
+  lpry = rms(h_y,yprd,natoms,q2,acons);
+  lprz = rms(h_z,zprd*slab_volfactor,natoms,q2,acons);
+  kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
+  real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) /
+   sqrt(static_cast<double>(natoms)*cutoff*xprd*yprd*zprd);
+  double value = kspace_prec - real_prec;
+  return value;
+}
+
+/* ----------------------------------------------------------------------
+   pre-compute Green's function denominator expansion coeffs, Gamma(2n)
+------------------------------------------------------------------------- */
+
+void PPPMOld::compute_gf_denom()
+{
+  int k,l,m;
+
+  for (l = 1; l < order; l++) gf_b[l] = 0.0;
+  gf_b[0] = 1.0;
+
+  for (m = 1; m < order; m++) {
+    for (l = m; l > 0; l--)
+      gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1));
+    gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5));
+  }
+
+  bigint ifact = 1;
+  for (k = 1; k < 2*order; k++) ifact *= k;
+  double gaminv = 1.0/ifact;
+  for (l = 0; l < order; l++) gf_b[l] *= gaminv;
+}
+
+/* ----------------------------------------------------------------------
+   ghost-swap to accumulate full density in brick decomposition
+   remap density from 3d brick decomposition to FFT decomposition
+------------------------------------------------------------------------- */
+
+void PPPMOld::brick2fft()
+{
+  int i,n,ix,iy,iz;
+  MPI_Request request;
+  MPI_Status status;
+
+  // pack my ghosts for +x processor
+  // pass data to self or +x processor
+  // unpack and sum recv data into my real cells
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_out; iy <= nyhi_out; iy++)
+      for (ix = nxhi_in+1; ix <= nxhi_out; ix++)
+        buf1[n++] = density_brick[iz][iy][ix];
+
+  if (comm->procneigh[0][1] == me)
+    for (i = 0; i < n; i++) buf2[i] = buf1[i];
+  else {
+    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request);
+    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_out; iy <= nyhi_out; iy++)
+      for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++)
+        density_brick[iz][iy][ix] += buf2[n++];
+
+  // pack my ghosts for -x processor
+  // pass data to self or -x processor
+  // unpack and sum recv data into my real cells
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_out; iy <= nyhi_out; iy++)
+      for (ix = nxlo_out; ix < nxlo_in; ix++)
+        buf1[n++] = density_brick[iz][iy][ix];
+
+  if (comm->procneigh[0][0] == me)
+    for (i = 0; i < n; i++) buf2[i] = buf1[i];
+  else {
+    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request);
+    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_out; iy <= nyhi_out; iy++)
+      for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++)
+        density_brick[iz][iy][ix] += buf2[n++];
+
+  // pack my ghosts for +y processor
+  // pass data to self or +y processor
+  // unpack and sum recv data into my real cells
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nyhi_in+1; iy <= nyhi_out; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++)
+        buf1[n++] = density_brick[iz][iy][ix];
+
+  if (comm->procneigh[1][1] == me)
+    for (i = 0; i < n; i++) buf2[i] = buf1[i];
+  else {
+    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request);
+    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++)
+        density_brick[iz][iy][ix] += buf2[n++];
+
+  // pack my ghosts for -y processor
+  // pass data to self or -y processor
+  // unpack and sum recv data into my real cells
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_out; iy < nylo_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++)
+        buf1[n++] = density_brick[iz][iy][ix];
+
+  if (comm->procneigh[1][0] == me)
+    for (i = 0; i < n; i++) buf2[i] = buf1[i];
+  else {
+    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request);
+    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++)
+        density_brick[iz][iy][ix] += buf2[n++];
+
+  // pack my ghosts for +z processor
+  // pass data to self or +z processor
+  // unpack and sum recv data into my real cells
+
+  n = 0;
+  for (iz = nzhi_in+1; iz <= nzhi_out; iz++)
+    for (iy = nylo_in; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++)
+        buf1[n++] = density_brick[iz][iy][ix];
+
+  if (comm->procneigh[2][1] == me)
+    for (i = 0; i < n; i++) buf2[i] = buf1[i];
+  else {
+    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request);
+    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++)
+    for (iy = nylo_in; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++)
+        density_brick[iz][iy][ix] += buf2[n++];
+
+  // pack my ghosts for -z processor
+  // pass data to self or -z processor
+  // unpack and sum recv data into my real cells
+
+  n = 0;
+  for (iz = nzlo_out; iz < nzlo_in; iz++)
+    for (iy = nylo_in; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++)
+        buf1[n++] = density_brick[iz][iy][ix];
+
+  if (comm->procneigh[2][0] == me)
+    for (i = 0; i < n; i++) buf2[i] = buf1[i];
+  else {
+    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request);
+    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++)
+    for (iy = nylo_in; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++)
+        density_brick[iz][iy][ix] += buf2[n++];
+
+  // remap from 3d brick decomposition to FFT decomposition
+  // copy grabs inner portion of density from 3d brick
+  // remap could be done as pre-stage of FFT,
+  //   but this works optimally on only double values, not complex values
+
+  n = 0;
+  for (iz = nzlo_in; iz <= nzhi_in; iz++)
+    for (iy = nylo_in; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++)
+        density_fft[n++] = density_brick[iz][iy][ix];
+
+  remap->perform(density_fft,density_fft,work1);
+}
+
+/* ----------------------------------------------------------------------
+   ghost-swap to fill ghost cells of my brick with field values
+------------------------------------------------------------------------- */
+
+void PPPMOld::fillbrick()
+{
+  int i,n,ix,iy,iz;
+  MPI_Request request;
+  MPI_Status status;
+
+  // pack my real cells for +z processor
+  // pass data to self or +z processor
+  // unpack and sum recv data into my ghost cells
+
+  n = 0;
+  for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++)
+    for (iy = nylo_in; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+        buf1[n++] = vdx_brick[iz][iy][ix];
+        buf1[n++] = vdy_brick[iz][iy][ix];
+        buf1[n++] = vdz_brick[iz][iy][ix];
+      }
+
+  if (comm->procneigh[2][1] == me)
+    for (i = 0; i < n; i++) buf2[i] = buf1[i];
+  else {
+    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request);
+    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzlo_out; iz < nzlo_in; iz++)
+    for (iy = nylo_in; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+        vdx_brick[iz][iy][ix] = buf2[n++];
+        vdy_brick[iz][iy][ix] = buf2[n++];
+        vdz_brick[iz][iy][ix] = buf2[n++];
+      }
+
+  // pack my real cells for -z processor
+  // pass data to self or -z processor
+  // unpack and sum recv data into my ghost cells
+
+  n = 0;
+  for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++)
+    for (iy = nylo_in; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+        buf1[n++] = vdx_brick[iz][iy][ix];
+        buf1[n++] = vdy_brick[iz][iy][ix];
+        buf1[n++] = vdz_brick[iz][iy][ix];
+      }
+
+  if (comm->procneigh[2][0] == me)
+    for (i = 0; i < n; i++) buf2[i] = buf1[i];
+  else {
+    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request);
+    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzhi_in+1; iz <= nzhi_out; iz++)
+    for (iy = nylo_in; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+        vdx_brick[iz][iy][ix] = buf2[n++];
+        vdy_brick[iz][iy][ix] = buf2[n++];
+        vdz_brick[iz][iy][ix] = buf2[n++];
+      }
+
+  // pack my real cells for +y processor
+  // pass data to self or +y processor
+  // unpack and sum recv data into my ghost cells
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+        buf1[n++] = vdx_brick[iz][iy][ix];
+        buf1[n++] = vdy_brick[iz][iy][ix];
+        buf1[n++] = vdz_brick[iz][iy][ix];
+      }
+
+  if (comm->procneigh[1][1] == me)
+    for (i = 0; i < n; i++) buf2[i] = buf1[i];
+  else {
+    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request);
+    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_out; iy < nylo_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+        vdx_brick[iz][iy][ix] = buf2[n++];
+        vdy_brick[iz][iy][ix] = buf2[n++];
+        vdz_brick[iz][iy][ix] = buf2[n++];
+      }
+
+  // pack my real cells for -y processor
+  // pass data to self or -y processor
+  // unpack and sum recv data into my ghost cells
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+        buf1[n++] = vdx_brick[iz][iy][ix];
+        buf1[n++] = vdy_brick[iz][iy][ix];
+        buf1[n++] = vdz_brick[iz][iy][ix];
+      }
+
+  if (comm->procneigh[1][0] == me)
+    for (i = 0; i < n; i++) buf2[i] = buf1[i];
+  else {
+    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request);
+    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nyhi_in+1; iy <= nyhi_out; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+        vdx_brick[iz][iy][ix] = buf2[n++];
+        vdy_brick[iz][iy][ix] = buf2[n++];
+        vdz_brick[iz][iy][ix] = buf2[n++];
+      }
+
+  // pack my real cells for +x processor
+  // pass data to self or +x processor
+  // unpack and sum recv data into my ghost cells
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_out; iy <= nyhi_out; iy++)
+      for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) {
+        buf1[n++] = vdx_brick[iz][iy][ix];
+        buf1[n++] = vdy_brick[iz][iy][ix];
+        buf1[n++] = vdz_brick[iz][iy][ix];
+      }
+
+  if (comm->procneigh[0][1] == me)
+    for (i = 0; i < n; i++) buf2[i] = buf1[i];
+  else {
+    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request);
+    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_out; iy <= nyhi_out; iy++)
+      for (ix = nxlo_out; ix < nxlo_in; ix++) {
+        vdx_brick[iz][iy][ix] = buf2[n++];
+        vdy_brick[iz][iy][ix] = buf2[n++];
+        vdz_brick[iz][iy][ix] = buf2[n++];
+      }
+
+  // pack my real cells for -x processor
+  // pass data to self or -x processor
+  // unpack and sum recv data into my ghost cells
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_out; iy <= nyhi_out; iy++)
+      for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) {
+        buf1[n++] = vdx_brick[iz][iy][ix];
+        buf1[n++] = vdy_brick[iz][iy][ix];
+        buf1[n++] = vdz_brick[iz][iy][ix];
+      }
+
+  if (comm->procneigh[0][0] == me)
+    for (i = 0; i < n; i++) buf2[i] = buf1[i];
+  else {
+    MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request);
+    MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_out; iy <= nyhi_out; iy++)
+      for (ix = nxhi_in+1; ix <= nxhi_out; ix++) {
+        vdx_brick[iz][iy][ix] = buf2[n++];
+        vdy_brick[iz][iy][ix] = buf2[n++];
+        vdz_brick[iz][iy][ix] = buf2[n++];
+      }
+}
+
+/* ----------------------------------------------------------------------
+   ghost-swap to fill ghost cells of my brick with per-atom field values
+------------------------------------------------------------------------- */
+
+void PPPMOld::fillbrick_peratom()
+{
+  int i,n,ix,iy,iz;
+  MPI_Request request;
+  MPI_Status status;
+
+  // pack my real cells for +z processor
+  // pass data to self or +z processor
+  // unpack and sum recv data into my ghost cells
+
+  n = 0;
+  for (iz = nzhi_in-nzhi_ghost+1; iz <= nzhi_in; iz++)
+    for (iy = nylo_in; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+        if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
+        if (vflag_atom) {
+          buf3[n++] = v0_brick[iz][iy][ix];
+          buf3[n++] = v1_brick[iz][iy][ix];
+          buf3[n++] = v2_brick[iz][iy][ix];
+          buf3[n++] = v3_brick[iz][iy][ix];
+          buf3[n++] = v4_brick[iz][iy][ix];
+          buf3[n++] = v5_brick[iz][iy][ix];
+        }
+      }
+
+  if (comm->procneigh[2][1] == me)
+    for (i = 0; i < n; i++) buf4[i] = buf3[i];
+  else {
+    MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
+              comm->procneigh[2][0],0,world,&request);
+    MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzlo_out; iz < nzlo_in; iz++)
+    for (iy = nylo_in; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+        if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
+        if (vflag_atom) {
+          v0_brick[iz][iy][ix] = buf4[n++];
+          v1_brick[iz][iy][ix] = buf4[n++];
+          v2_brick[iz][iy][ix] = buf4[n++];
+          v3_brick[iz][iy][ix] = buf4[n++];
+          v4_brick[iz][iy][ix] = buf4[n++];
+          v5_brick[iz][iy][ix] = buf4[n++];
+        }
+      }
+
+  // pack my real cells for -z processor
+  // pass data to self or -z processor
+  // unpack and sum recv data into my ghost cells
+
+  n = 0;
+  for (iz = nzlo_in; iz < nzlo_in+nzlo_ghost; iz++)
+    for (iy = nylo_in; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+        if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
+        if (vflag_atom) {
+          buf3[n++] = v0_brick[iz][iy][ix];
+          buf3[n++] = v1_brick[iz][iy][ix];
+          buf3[n++] = v2_brick[iz][iy][ix];
+          buf3[n++] = v3_brick[iz][iy][ix];
+          buf3[n++] = v4_brick[iz][iy][ix];
+          buf3[n++] = v5_brick[iz][iy][ix];
+        }
+      }
+
+  if (comm->procneigh[2][0] == me)
+    for (i = 0; i < n; i++) buf4[i] = buf3[i];
+  else {
+    MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
+              comm->procneigh[2][1],0,world,&request);
+    MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzhi_in+1; iz <= nzhi_out; iz++)
+    for (iy = nylo_in; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+        if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
+        if (vflag_atom) {
+          v0_brick[iz][iy][ix] = buf4[n++];
+          v1_brick[iz][iy][ix] = buf4[n++];
+          v2_brick[iz][iy][ix] = buf4[n++];
+          v3_brick[iz][iy][ix] = buf4[n++];
+          v4_brick[iz][iy][ix] = buf4[n++];
+          v5_brick[iz][iy][ix] = buf4[n++];
+        }
+      }
+
+  // pack my real cells for +y processor
+  // pass data to self or +y processor
+  // unpack and sum recv data into my ghost cells
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nyhi_in-nyhi_ghost+1; iy <= nyhi_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+        if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
+        if (vflag_atom) {
+          buf3[n++] = v0_brick[iz][iy][ix];
+          buf3[n++] = v1_brick[iz][iy][ix];
+          buf3[n++] = v2_brick[iz][iy][ix];
+          buf3[n++] = v3_brick[iz][iy][ix];
+          buf3[n++] = v4_brick[iz][iy][ix];
+          buf3[n++] = v5_brick[iz][iy][ix];
+        }
+      }
+
+  if (comm->procneigh[1][1] == me)
+    for (i = 0; i < n; i++) buf4[i] = buf3[i];
+  else {
+    MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
+              comm->procneigh[1][0],0,world,&request);
+    MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_out; iy < nylo_in; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+        if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
+        if (vflag_atom) {
+          v0_brick[iz][iy][ix] = buf4[n++];
+          v1_brick[iz][iy][ix] = buf4[n++];
+          v2_brick[iz][iy][ix] = buf4[n++];
+          v3_brick[iz][iy][ix] = buf4[n++];
+          v4_brick[iz][iy][ix] = buf4[n++];
+          v5_brick[iz][iy][ix] = buf4[n++];
+        }
+      }
+
+  // pack my real cells for -y processor
+  // pass data to self or -y processor
+  // unpack and sum recv data into my ghost cells
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_in; iy < nylo_in+nylo_ghost; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+        if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
+        if (vflag_atom) {
+          buf3[n++] = v0_brick[iz][iy][ix];
+          buf3[n++] = v1_brick[iz][iy][ix];
+          buf3[n++] = v2_brick[iz][iy][ix];
+          buf3[n++] = v3_brick[iz][iy][ix];
+          buf3[n++] = v4_brick[iz][iy][ix];
+          buf3[n++] = v5_brick[iz][iy][ix];
+        }
+      }
+
+  if (comm->procneigh[1][0] == me)
+    for (i = 0; i < n; i++) buf4[i] = buf3[i];
+  else {
+    MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
+              comm->procneigh[1][1],0,world,&request);
+    MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nyhi_in+1; iy <= nyhi_out; iy++)
+      for (ix = nxlo_in; ix <= nxhi_in; ix++) {
+        if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
+        if (vflag_atom) {
+          v0_brick[iz][iy][ix] = buf4[n++];
+          v1_brick[iz][iy][ix] = buf4[n++];
+          v2_brick[iz][iy][ix] = buf4[n++];
+          v3_brick[iz][iy][ix] = buf4[n++];
+          v4_brick[iz][iy][ix] = buf4[n++];
+          v5_brick[iz][iy][ix] = buf4[n++];
+        }
+      }
+
+  // pack my real cells for +x processor
+  // pass data to self or +x processor
+  // unpack and sum recv data into my ghost cells
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_out; iy <= nyhi_out; iy++)
+      for (ix = nxhi_in-nxhi_ghost+1; ix <= nxhi_in; ix++) {
+        if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
+        if (vflag_atom) {
+          buf3[n++] = v0_brick[iz][iy][ix];
+          buf3[n++] = v1_brick[iz][iy][ix];
+          buf3[n++] = v2_brick[iz][iy][ix];
+          buf3[n++] = v3_brick[iz][iy][ix];
+          buf3[n++] = v4_brick[iz][iy][ix];
+          buf3[n++] = v5_brick[iz][iy][ix];
+        }
+      }
+
+  if (comm->procneigh[0][1] == me)
+    for (i = 0; i < n; i++) buf4[i] = buf3[i];
+  else {
+    MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
+              comm->procneigh[0][0],0,world,&request);
+    MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_out; iy <= nyhi_out; iy++)
+      for (ix = nxlo_out; ix < nxlo_in; ix++) {
+        if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
+        if (vflag_atom) {
+          v0_brick[iz][iy][ix] = buf4[n++];
+          v1_brick[iz][iy][ix] = buf4[n++];
+          v2_brick[iz][iy][ix] = buf4[n++];
+          v3_brick[iz][iy][ix] = buf4[n++];
+          v4_brick[iz][iy][ix] = buf4[n++];
+          v5_brick[iz][iy][ix] = buf4[n++];
+        }
+      }
+
+  // pack my real cells for -x processor
+  // pass data to self or -x processor
+  // unpack and sum recv data into my ghost cells
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_out; iy <= nyhi_out; iy++)
+      for (ix = nxlo_in; ix < nxlo_in+nxlo_ghost; ix++) {
+        if (eflag_atom) buf3[n++] = u_brick[iz][iy][ix];
+        if (vflag_atom) {
+          buf3[n++] = v0_brick[iz][iy][ix];
+          buf3[n++] = v1_brick[iz][iy][ix];
+          buf3[n++] = v2_brick[iz][iy][ix];
+          buf3[n++] = v3_brick[iz][iy][ix];
+          buf3[n++] = v4_brick[iz][iy][ix];
+          buf3[n++] = v5_brick[iz][iy][ix];
+        }
+      }
+
+  if (comm->procneigh[0][0] == me)
+    for (i = 0; i < n; i++) buf4[i] = buf3[i];
+  else {
+    MPI_Irecv(buf4,nbuf_peratom,MPI_FFT_SCALAR,
+              comm->procneigh[0][1],0,world,&request);
+    MPI_Send(buf3,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
+    MPI_Wait(&request,&status);
+  }
+
+  n = 0;
+  for (iz = nzlo_out; iz <= nzhi_out; iz++)
+    for (iy = nylo_out; iy <= nyhi_out; iy++)
+      for (ix = nxhi_in+1; ix <= nxhi_out; ix++) {
+        if (eflag_atom) u_brick[iz][iy][ix] = buf4[n++];
+        if (vflag_atom) {
+          v0_brick[iz][iy][ix] = buf4[n++];
+          v1_brick[iz][iy][ix] = buf4[n++];
+          v2_brick[iz][iy][ix] = buf4[n++];
+          v3_brick[iz][iy][ix] = buf4[n++];
+          v4_brick[iz][iy][ix] = buf4[n++];
+          v5_brick[iz][iy][ix] = buf4[n++];
+        }
+      }
+}
+
+/* ----------------------------------------------------------------------
+   find center grid pt for each of my particles
+   check that full stencil for the particle will fit in my 3d brick
+   store central grid pt indices in part2grid array
+------------------------------------------------------------------------- */
+
+void PPPMOld::particle_map()
+{
+  int nx,ny,nz;
+
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+
+  int flag = 0;
+  for (int i = 0; i < nlocal; i++) {
+
+    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+    // current particle coord can be outside global and local box
+    // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
+
+    nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
+    ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
+    nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
+
+    part2grid[i][0] = nx;
+    part2grid[i][1] = ny;
+    part2grid[i][2] = nz;
+
+    // check that entire stencil around nx,ny,nz will fit in my 3d brick
+
+    if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
+        ny+nlower < nylo_out || ny+nupper > nyhi_out ||
+        nz+nlower < nzlo_out || nz+nupper > nzhi_out)
+      flag = 1;
+  }
+
+  if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
+}
+
+/* ----------------------------------------------------------------------
+   create discretized "density" on section of global grid due to my particles
+   density(x,y,z) = charge "density" at grid points of my 3d brick
+   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+   in global grid
+------------------------------------------------------------------------- */
+
+void PPPMOld::make_rho()
+{
+  int l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+
+  // clear 3d density array
+
+  memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
+         ngrid*sizeof(FFT_SCALAR));
+
+  // loop over my charges, add their contribution to nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+
+  double *q = atom->q;
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; i++) {
+
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+    compute_rho1d(dx,dy,dz);
+
+    z0 = delvolinv * q[i];
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      y0 = z0*rho1d[2][n];
+      for (m = nlower; m <= nupper; m++) {
+        my = m+ny;
+        x0 = y0*rho1d[1][m];
+        for (l = nlower; l <= nupper; l++) {
+          mx = l+nx;
+          density_brick[mz][my][mx] += x0*rho1d[0][l];
+        }
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   FFT-based Poisson solver
+------------------------------------------------------------------------- */
+
+void PPPMOld::poisson()
+{
+  int i,j,k,n;
+  double eng;
+
+  // transform charge density (r -> k)
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work1[n++] = density_fft[i];
+    work1[n++] = ZEROF;
+  }
+
+  fft1->compute(work1,work1,1);
+
+  // global energy and virial contribution
+
+  double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
+  double s2 = scaleinv*scaleinv;
+
+  if (eflag_global || vflag_global) {
+    if (vflag_global) {
+      n = 0;
+      for (i = 0; i < nfft; i++) {
+        eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
+        for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
+        if (eflag_global) energy += eng;
+        n += 2;
+      }
+    } else {
+      n = 0;
+      for (i = 0; i < nfft; i++) {
+        energy +=
+          s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
+        n += 2;
+      }
+    }
+  }
+
+  // scale by 1/total-grid-pts to get rho(k)
+  // multiply by Green's function to get V(k)
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work1[n++] *= scaleinv * greensfn[i];
+    work1[n++] *= scaleinv * greensfn[i];
+  }
+
+  // extra FFTs for per-atom energy/virial
+
+  if (evflag_atom) poisson_peratom();
+
+  // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k)
+  // FFT leaves data in 3d brick decomposition
+  // copy it into inner portion of vdx,vdy,vdz arrays
+
+  // x direction gradient
+
+  n = 0;
+  for (k = nzlo_fft; k <= nzhi_fft; k++)
+    for (j = nylo_fft; j <= nyhi_fft; j++)
+      for (i = nxlo_fft; i <= nxhi_fft; i++) {
+        work2[n] = fkx[i]*work1[n+1];
+        work2[n+1] = -fkx[i]*work1[n];
+        n += 2;
+      }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        vdx_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+
+  // y direction gradient
+
+  n = 0;
+  for (k = nzlo_fft; k <= nzhi_fft; k++)
+    for (j = nylo_fft; j <= nyhi_fft; j++)
+      for (i = nxlo_fft; i <= nxhi_fft; i++) {
+        work2[n] = fky[j]*work1[n+1];
+        work2[n+1] = -fky[j]*work1[n];
+        n += 2;
+      }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        vdy_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+
+  // z direction gradient
+
+  n = 0;
+  for (k = nzlo_fft; k <= nzhi_fft; k++)
+    for (j = nylo_fft; j <= nyhi_fft; j++)
+      for (i = nxlo_fft; i <= nxhi_fft; i++) {
+        work2[n] = fkz[k]*work1[n+1];
+        work2[n+1] = -fkz[k]*work1[n];
+        n += 2;
+      }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        vdz_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+}
+
+/* ----------------------------------------------------------------------
+   FFT-based Poisson solver for per-atom energy/virial
+------------------------------------------------------------------------- */
+
+void PPPMOld::poisson_peratom()
+{
+  int i,j,k,n;
+
+  // energy
+
+  if (eflag_atom) {
+    n = 0;
+    for (i = 0; i < nfft; i++) {
+      work2[n] = work1[n];
+      work2[n+1] = work1[n+1];
+      n += 2;
+    }
+
+    fft2->compute(work2,work2,-1);
+
+    n = 0;
+    for (k = nzlo_in; k <= nzhi_in; k++)
+      for (j = nylo_in; j <= nyhi_in; j++)
+        for (i = nxlo_in; i <= nxhi_in; i++) {
+          u_brick[k][j][i] = work2[n];
+          n += 2;
+        }
+  }
+
+  // 6 components of virial in v0 thru v5
+
+  if (!vflag_atom) return;
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work2[n] = work1[n]*vg[i][0];
+    work2[n+1] = work1[n+1]*vg[i][0];
+    n += 2;
+  }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        v0_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work2[n] = work1[n]*vg[i][1];
+    work2[n+1] = work1[n+1]*vg[i][1];
+    n += 2;
+  }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        v1_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work2[n] = work1[n]*vg[i][2];
+    work2[n+1] = work1[n+1]*vg[i][2];
+    n += 2;
+  }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        v2_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work2[n] = work1[n]*vg[i][3];
+    work2[n+1] = work1[n+1]*vg[i][3];
+    n += 2;
+  }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        v3_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work2[n] = work1[n]*vg[i][4];
+    work2[n+1] = work1[n+1]*vg[i][4];
+    n += 2;
+  }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        v4_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work2[n] = work1[n]*vg[i][5];
+    work2[n+1] = work1[n+1]*vg[i][5];
+    n += 2;
+  }
+
+  fft2->compute(work2,work2,-1);
+
+  n = 0;
+  for (k = nzlo_in; k <= nzhi_in; k++)
+    for (j = nylo_in; j <= nyhi_in; j++)
+      for (i = nxlo_in; i <= nxhi_in; i++) {
+        v5_brick[k][j][i] = work2[n];
+        n += 2;
+      }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get electric field & force on my particles
+------------------------------------------------------------------------- */
+
+void PPPMOld::fieldforce()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+  FFT_SCALAR ekx,eky,ekz;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of E-field on particle
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double **f = atom->f;
+
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+    compute_rho1d(dx,dy,dz);
+
+    ekx = eky = ekz = ZEROF;
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      z0 = rho1d[2][n];
+      for (m = nlower; m <= nupper; m++) {
+        my = m+ny;
+        y0 = z0*rho1d[1][m];
+        for (l = nlower; l <= nupper; l++) {
+          mx = l+nx;
+          x0 = y0*rho1d[0][l];
+          ekx -= x0*vdx_brick[mz][my][mx];
+          eky -= x0*vdy_brick[mz][my][mx];
+          ekz -= x0*vdz_brick[mz][my][mx];
+        }
+      }
+    }
+
+    // convert E-field to force
+
+    const double qfactor = force->qqrd2e * scale * q[i];
+    f[i][0] += qfactor*ekx;
+    f[i][1] += qfactor*eky;
+    if (slabflag != 2) f[i][2] += qfactor*ekz;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get per-atom energy/virial
+------------------------------------------------------------------------- */
+
+void PPPMOld::fieldforce_peratom()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+  FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
+
+  // loop over my charges, interpolate from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double **f = atom->f;
+
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++) {
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+    compute_rho1d(dx,dy,dz);
+
+    u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      z0 = rho1d[2][n];
+      for (m = nlower; m <= nupper; m++) {
+        my = m+ny;
+        y0 = z0*rho1d[1][m];
+        for (l = nlower; l <= nupper; l++) {
+          mx = l+nx;
+          x0 = y0*rho1d[0][l];
+          if (eflag_atom) u += x0*u_brick[mz][my][mx];
+          if (vflag_atom) {
+            v0 += x0*v0_brick[mz][my][mx];
+            v1 += x0*v1_brick[mz][my][mx];
+            v2 += x0*v2_brick[mz][my][mx];
+            v3 += x0*v3_brick[mz][my][mx];
+            v4 += x0*v4_brick[mz][my][mx];
+            v5 += x0*v5_brick[mz][my][mx];
+          }
+        }
+      }
+    }
+
+    if (eflag_atom) eatom[i] += q[i]*u;
+    if (vflag_atom) {
+      vatom[i][0] += v0;
+      vatom[i][1] += v1;
+      vatom[i][2] += v2;
+      vatom[i][3] += v3;
+      vatom[i][4] += v4;
+      vatom[i][5] += v5;
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
+------------------------------------------------------------------------- */
+
+void PPPMOld::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
+{
+  // loop thru all possible factorizations of nprocs
+  // surf = surface area of largest proc sub-domain
+  // innermost if test minimizes surface area and surface/volume ratio
+
+  int bestsurf = 2 * (nx + ny);
+  int bestboxx = 0;
+  int bestboxy = 0;
+
+  int boxx,boxy,surf,ipx,ipy;
+
+  ipx = 1;
+  while (ipx <= nprocs) {
+    if (nprocs % ipx == 0) {
+      ipy = nprocs/ipx;
+      boxx = nx/ipx;
+      if (nx % ipx) boxx++;
+      boxy = ny/ipy;
+      if (ny % ipy) boxy++;
+      surf = boxx + boxy;
+      if (surf < bestsurf ||
+          (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
+        bestsurf = surf;
+        bestboxx = boxx;
+        bestboxy = boxy;
+        *px = ipx;
+        *py = ipy;
+      }
+    }
+    ipx++;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   charge assignment into rho1d
+   dx,dy,dz = distance of particle from "lower left" grid point
+------------------------------------------------------------------------- */
+
+void PPPMOld::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
+                         const FFT_SCALAR &dz)
+{
+  int k,l;
+  FFT_SCALAR r1,r2,r3;
+
+  for (k = (1-order)/2; k <= order/2; k++) {
+    r1 = r2 = r3 = ZEROF;
+
+    for (l = order-1; l >= 0; l--) {
+      r1 = rho_coeff[l][k] + r1*dx;
+      r2 = rho_coeff[l][k] + r2*dy;
+      r3 = rho_coeff[l][k] + r3*dz;
+    }
+    rho1d[0][k] = r1;
+    rho1d[1][k] = r2;
+    rho1d[2][k] = r3;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   generate coeffients for the weight function of order n
+
+              (n-1)
+  Wn(x) =     Sum    wn(k,x) , Sum is over every other integer
+           k=-(n-1)
+  For k=-(n-1),-(n-1)+2, ....., (n-1)-2,n-1
+      k is odd integers if n is even and even integers if n is odd
+              ---
+             | n-1
+             | Sum a(l,j)*(x-k/2)**l   if abs(x-k/2) < 1/2
+  wn(k,x) = <  l=0
+             |
+             |  0                       otherwise
+              ---
+  a coeffients are packed into the array rho_coeff to eliminate zeros
+  rho_coeff(l,((k+mod(n+1,2))/2) = a(l,k)
+------------------------------------------------------------------------- */
+
+void PPPMOld::compute_rho_coeff()
+{
+  int j,k,l,m;
+  FFT_SCALAR s;
+
+  FFT_SCALAR **a;
+  memory->create2d_offset(a,order,-order,order,"pppm:a");
+
+  for (k = -order; k <= order; k++)
+    for (l = 0; l < order; l++)
+      a[l][k] = 0.0;
+
+  a[0][0] = 1.0;
+  for (j = 1; j < order; j++) {
+    for (k = -j; k <= j; k += 2) {
+      s = 0.0;
+      for (l = 0; l < j; l++) {
+        a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
+#ifdef FFT_SINGLE
+        s += powf(0.5,(float) l+1) *
+          (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
+#else
+        s += pow(0.5,(double) l+1) *
+          (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
+#endif
+      }
+      a[0][k] = s;
+    }
+  }
+
+  m = (1-order)/2;
+  for (k = -(order-1); k < order; k += 2) {
+    for (l = 0; l < order; l++)
+      rho_coeff[l][m] = a[l][k];
+    m++;
+  }
+
+  memory->destroy2d_offset(a,-order);
+}
+
+/* ----------------------------------------------------------------------
+   Slab-geometry correction term to dampen inter-slab interactions between
+   periodically repeating slabs.  Yields good approximation to 2D Ewald if
+   adequate empty space is left between repeating slabs (J. Chem. Phys.
+   111, 3155).  Slabs defined here to be parallel to the xy plane. Also
+   extended to non-neutral systems (J. Chem. Phys. 131, 094107).
+------------------------------------------------------------------------- */
+
+void PPPMOld::slabcorr()
+{
+  // compute local contribution to global dipole moment
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double zprd = domain->zprd;
+  int nlocal = atom->nlocal;
+
+  double dipole = 0.0;
+  for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2];
+
+  // sum local contributions to get global dipole moment
+
+  double dipole_all;
+  MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
+
+  // need to make non-neutral systems and/or
+  //  per-atom energy translationally invariant
+
+  double dipole_r2 = 0.0;
+  if (eflag_atom || fabs(qsum) > SMALL) {
+    for (int i = 0; i < nlocal; i++)
+      dipole_r2 += q[i]*x[i][2]*x[i][2];
+
+    // sum local contributions
+
+    double tmp;
+    MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+    dipole_r2 = tmp;
+  }
+
+  // compute corrections
+
+  const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
+    qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
+  const double qscale = force->qqrd2e * scale;
+
+  if (eflag_global) energy += qscale * e_slabcorr;
+
+  // per-atom energy
+
+  if (eflag_atom) {
+    double efact = qscale * MY_2PI/volume;
+    for (int i = 0; i < nlocal; i++)
+      eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
+        qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
+  }
+
+  // add on force corrections
+
+  double ffact = qscale * (-4.0*MY_PI/volume);
+  double **f = atom->f;
+
+  for (int i = 0; i < nlocal; i++) f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
+}
+
+
+/* ----------------------------------------------------------------------
+   perform and time the 1d FFTs required for N timesteps
+------------------------------------------------------------------------- */
+
+int PPPMOld::timing_1d(int n, double &time1d)
+{
+  double time1,time2;
+
+  for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
+
+  MPI_Barrier(world);
+  time1 = MPI_Wtime();
+
+  for (int i = 0; i < n; i++) {
+    fft1->timing1d(work1,nfft_both,1);
+    fft2->timing1d(work1,nfft_both,-1);
+    fft2->timing1d(work1,nfft_both,-1);
+    fft2->timing1d(work1,nfft_both,-1);
+  }
+
+  MPI_Barrier(world);
+  time2 = MPI_Wtime();
+  time1d = time2 - time1;
+
+  return 4;
+}
+
+/* ----------------------------------------------------------------------
+   perform and time the 3d FFTs required for N timesteps
+------------------------------------------------------------------------- */
+
+int PPPMOld::timing_3d(int n, double &time3d)
+{
+  double time1,time2;
+
+  for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
+
+  MPI_Barrier(world);
+  time1 = MPI_Wtime();
+
+  for (int i = 0; i < n; i++) {
+    fft1->compute(work1,work1,1);
+    fft2->compute(work1,work1,-1);
+    fft2->compute(work1,work1,-1);
+    fft2->compute(work1,work1,-1);
+  }
+
+  MPI_Barrier(world);
+  time2 = MPI_Wtime();
+  time3d = time2 - time1;
+
+  return 4;
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local arrays
+------------------------------------------------------------------------- */
+
+double PPPMOld::memory_usage()
+{
+  double bytes = nmax*3 * sizeof(double);
+  int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
+    (nzhi_out-nzlo_out+1);
+  bytes += 4 * nbrick * sizeof(FFT_SCALAR);
+  bytes += 6 * nfft_both * sizeof(double);
+  bytes += nfft_both * sizeof(double);
+  bytes += nfft_both*5 * sizeof(FFT_SCALAR);
+  bytes += 2 * nbuf * sizeof(FFT_SCALAR);
+
+  if (peratom_allocate_flag) {
+    bytes += 7 * nbrick * sizeof(FFT_SCALAR);
+    bytes += 2 * nbuf_peratom * sizeof(FFT_SCALAR);
+  }
+
+  if (group_allocate_flag) {
+    bytes += 2 * nbrick * sizeof(FFT_SCALAR);
+    bytes += 2 * nfft_both * sizeof(FFT_SCALAR);;
+  }
+
+  return bytes;
+}
+
+/* ----------------------------------------------------------------------
+   group-group interactions
+ ------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   compute the PPPM total long-range force and energy for groups A and B
+ ------------------------------------------------------------------------- */
+
+void PPPMOld::compute_group_group(int groupbit_A, int groupbit_B, int BA_flag)
+{
+  if (slabflag)
+    error->all(FLERR,"Cannot (yet) use K-space slab "
+               "correction with compute group/group");
+
+  int i,j;
+
+  if (!group_allocate_flag) {
+    allocate_groups();
+    group_allocate_flag = 1;
+  }
+
+  e2group = 0; //energy
+  f2group[0] = 0; //force in x-direction
+  f2group[1] = 0; //force in y-direction
+  f2group[2] = 0; //force in z-direction
+
+  double *q = atom->q;
+  int nlocal = atom->nlocal;
+  int *mask = atom->mask;
+
+
+  // map my particle charge onto my local 3d density grid
+
+  make_rho_groups(groupbit_A,groupbit_B,BA_flag);
+
+  // all procs communicate density values from their ghost cells
+  //   to fully sum contribution in their 3d bricks
+  // remap from 3d decomposition to FFT decomposition
+
+  // temporarily store and switch pointers so we can
+  //  use brick2fft() for groups A and B (without
+  //  writing an additional function)
+
+  FFT_SCALAR ***density_brick_real = density_brick;
+  FFT_SCALAR *density_fft_real = density_fft;
+
+  // group A
+
+  density_brick = density_A_brick;
+  density_fft = density_A_fft;
+
+  brick2fft();
+
+  // group B
+
+  density_brick = density_B_brick;
+  density_fft = density_B_fft;
+
+  brick2fft();
+
+  // switch back pointers
+
+  density_brick = density_brick_real;
+  density_fft = density_fft_real;
+
+  // compute potential gradient on my FFT grid and
+  //   portion of group-group energy/force on this proc's FFT grid
+
+  poisson_groups(BA_flag);
+
+  const double qscale = force->qqrd2e * scale;
+
+  // total group A <--> group B energy
+  // self and boundary correction terms are in compute_group_group.cpp
+
+  double e2group_all;
+  MPI_Allreduce(&e2group,&e2group_all,1,MPI_DOUBLE,MPI_SUM,world);
+  e2group = e2group_all;
+
+  e2group *= qscale*0.5*volume;
+
+  // total group A <--> group B force
+
+  double f2group_all[3];
+  MPI_Allreduce(f2group,f2group_all,3,MPI_DOUBLE,MPI_SUM,world);
+
+  for (i = 0; i < 3; i++) f2group[i] = qscale*volume*f2group_all[i];
+}
+
+/* ----------------------------------------------------------------------
+ allocate group-group memory that depends on # of K-vectors and order
+ ------------------------------------------------------------------------- */
+
+void PPPMOld::allocate_groups()
+{
+  memory->create3d_offset(density_A_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:density_A_brick");
+  memory->create3d_offset(density_B_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
+                          nxlo_out,nxhi_out,"pppm:density_B_brick");
+  memory->create(density_A_fft,nfft_both,"pppm:density_A_fft");
+  memory->create(density_B_fft,nfft_both,"pppm:density_B_fft");
+}
+
+/* ----------------------------------------------------------------------
+ deallocate group-group memory that depends on # of K-vectors and order
+ ------------------------------------------------------------------------- */
+
+void PPPMOld::deallocate_groups()
+{
+  memory->destroy3d_offset(density_A_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy3d_offset(density_B_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->destroy(density_A_fft);
+  memory->destroy(density_B_fft);
+}
+
+/* ----------------------------------------------------------------------
+ create discretized "density" on section of global grid due to my particles
+ density(x,y,z) = charge "density" at grid points of my 3d brick
+ (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+ in global grid for group-group interactions
+ ------------------------------------------------------------------------- */
+
+void PPPMOld::make_rho_groups(int groupbit_A, int groupbit_B, int BA_flag)
+{
+  int l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+
+  // clear 3d density arrays
+
+  memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0,
+         ngrid*sizeof(FFT_SCALAR));
+
+  memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0,
+         ngrid*sizeof(FFT_SCALAR));
+
+  // loop over my charges, add their contribution to nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+
+  double *q = atom->q;
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+  int *mask = atom->mask;
+
+  for (int i = 0; i < nlocal; i++) {
+
+    if ((mask[i] & groupbit_A) && (mask[i] & groupbit_B))
+      if (BA_flag) continue;
+
+    if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
+
+      nx = part2grid[i][0];
+      ny = part2grid[i][1];
+      nz = part2grid[i][2];
+      dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+      dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+      dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+      compute_rho1d(dx,dy,dz);
+
+      z0 = delvolinv * q[i];
+      for (n = nlower; n <= nupper; n++) {
+        mz = n+nz;
+        y0 = z0*rho1d[2][n];
+        for (m = nlower; m <= nupper; m++) {
+          my = m+ny;
+          x0 = y0*rho1d[1][m];
+          for (l = nlower; l <= nupper; l++) {
+            mx = l+nx;
+
+            // group A
+
+            if (mask[i] & groupbit_A)
+              density_A_brick[mz][my][mx] += x0*rho1d[0][l];
+
+            // group B
+
+            if (mask[i] & groupbit_B)
+              density_B_brick[mz][my][mx] += x0*rho1d[0][l];
+          }
+        }
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   FFT-based Poisson solver for group-group interactions
+ ------------------------------------------------------------------------- */
+
+void PPPMOld::poisson_groups(int BA_flag)
+{
+  int i,j,k,n;
+  double eng;
+
+  // reuse memory (already declared)
+
+  FFT_SCALAR *work_A = work1;
+  FFT_SCALAR *work_B = work2;
+
+  // transform charge density (r -> k)
+
+  // group A
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work_A[n++] = density_A_fft[i];
+    work_A[n++] = ZEROF;
+  }
+
+  fft1->compute(work_A,work_A,1);
+
+  // group B
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work_B[n++] = density_B_fft[i];
+    work_B[n++] = ZEROF;
+  }
+
+  fft1->compute(work_B,work_B,1);
+
+  // group-group energy and force contribution,
+  //  keep everything in reciprocal space so
+  //  no inverse FFTs needed
+
+  double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
+  double s2 = scaleinv*scaleinv;
+
+  // energy
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    e2group += s2 * greensfn[i] *
+      (work_A[n]*work_B[n] + work_A[n+1]*work_B[n+1]);
+    n += 2;
+  }
+
+  if (BA_flag) return;
+
+
+  // multiply by Green's function and s2
+  //  (only for work_A so it is not squared below)
+
+  n = 0;
+  for (i = 0; i < nfft; i++) {
+    work_A[n++] *= s2 * greensfn[i];
+    work_A[n++] *= s2 * greensfn[i];
+  }
+
+  double partial_group;
+
+  // force, x direction
+
+  n = 0;
+  for (k = nzlo_fft; k <= nzhi_fft; k++)
+    for (j = nylo_fft; j <= nyhi_fft; j++)
+      for (i = nxlo_fft; i <= nxhi_fft; i++) {
+        partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+        f2group[0] += fkx[i] * partial_group;
+        n += 2;
+      }
+
+  // force, y direction
+
+  n = 0;
+  for (k = nzlo_fft; k <= nzhi_fft; k++)
+    for (j = nylo_fft; j <= nyhi_fft; j++)
+      for (i = nxlo_fft; i <= nxhi_fft; i++) {
+        partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+        f2group[1] += fky[j] * partial_group;
+        n += 2;
+      }
+
+  // force, z direction
+
+  n = 0;
+  for (k = nzlo_fft; k <= nzhi_fft; k++)
+    for (j = nylo_fft; j <= nyhi_fft; j++)
+      for (i = nxlo_fft; i <= nxhi_fft; i++) {
+        partial_group = work_A[n+1]*work_B[n] - work_A[n]*work_B[n+1];
+        f2group[2] += fkz[k] * partial_group;
+        n += 2;
+      }
+}