git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7883 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2012-03-02 15:42:48 +00:00 · 2012-03-02 15:42:48 +00:00 · b2ccacbdc0
parent 8705054819
commit b2ccacbdc0
6 changed files with 1047 additions and 0 deletions
--- a/src/USER-OMP/pair_beck_omp.cpp
+++ b/src/USER-OMP/pair_beck_omp.cpp
@ -0,0 +1,173 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_beck_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+#include "suffix.h"
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairBeckOMP::PairBeckOMP(LAMMPS *lmp) :
+  PairBeck(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  suffix_flag |= Suffix::OMP;
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairBeckOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairBeckOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,r5,force_beck,factor_lj;
+  double r,rinv;
+  double aaij,alphaij,betaij;
+  double term1,term1inv,term2,term3,term4,term5,term6;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  double *special_lj = force->special_lj;
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+        r = sqrt(rsq);
+        r5 = rsq*rsq*r; 
+        aaij = aa[itype][jtype];
+        alphaij = alpha[itype][jtype];
+        betaij = beta[itype][jtype];
+        term1 = aaij*aaij + rsq;
+        term2 = 1.0/pow(term1,5);
+        term3 = 21.672 + 30.0*aaij*aaij + 6.0*rsq;
+        term4 = alphaij + r5*betaij;
+        term5 = alphaij + 6.0*r5*betaij; 
+        rinv  = 1.0/r;
+	force_beck = AA[itype][jtype]*exp(-1.0*r*term4)*term5;
+        force_beck -= BB[itype][jtype]*r*term2*term3; 
+ 
+	fpair = factor_lj*force_beck*rinv;
+        
+	f[i][0] += delx*fpair;
+	f[i][1] += dely*fpair;
+	f[i][2] += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) {
+          term6 = 1.0/pow(term1,3);
+          term1inv = 1.0/term1;
+          evdwl = AA[itype][jtype]*exp(-1.0*r*term4);
+          evdwl -= BB[itype][jtype]*term6*(1.0+(2.709+3.0*aaij*aaij)*term1inv);
+	}
+
+	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairBeckOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairBeck::memory_usage();
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_beck_omp.h
+++ b/src/USER-OMP/pair_beck_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(beck/omp,PairBeckOMP)
+
+#else
+
+#ifndef LMP_PAIR_BECK_OMP_H
+#define LMP_PAIR_BECK_OMP_H
+
+#include "pair_beck.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairBeckOMP : public PairBeck, public ThrOMP {
+
+ public:
+  PairBeckOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_meam_spline_omp.cpp
+++ b/src/USER-OMP/pair_meam_spline_omp.cpp
@ -0,0 +1,324 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "string.h"
+
+#include "pair_meam_spline_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+#include "suffix.h"
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairMEAMSplineOMP::PairMEAMSplineOMP(LAMMPS *lmp) :
+  PairMEAMSpline(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  suffix_flag |= Suffix::OMP;
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairMEAMSplineOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = eflag_global = vflag_global = eflag_atom = vflag_atom = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = listfull->inum;
+
+  if (listhalf->inum != inum)
+    error->warning(FLERR,"inconsistent half and full neighborlist");
+
+  // Grow per-atom array if necessary.
+
+  if (atom->nmax > nmax) {
+    memory->destroy(Uprime_values);
+    nmax = atom->nmax;
+    memory->create(Uprime_values,nmax*nthreads,"pair:Uprime");
+  }
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+    
+    thr->init_eam(nall,Uprime_values);
+
+    if (evflag) {
+      if (eflag) {
+	eval<1,1>(ifrom, ito, thr);
+      } else {
+	eval<1,0>(ifrom, ito, thr);
+      }
+    } else {
+      eval<0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG>
+void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  const int* const ilist_full = listfull->ilist;
+  const int* const numneigh_full = listfull->numneigh;
+  const int* const * const firstneigh_full = listfull->firstneigh;
+  
+  // Determine the maximum number of neighbors a single atom has.
+  int myMaxNeighbors = 0;
+  for(int ii = iifrom; ii < iito; ii++) {
+    int jnum = numneigh_full[ilist_full[ii]];
+    if(jnum > myMaxNeighbors) myMaxNeighbors = jnum;
+  }
+
+  // Allocate array for temporary bond info.
+  MEAM2Body *myTwoBodyInfo = new MEAM2Body[myMaxNeighbors];
+
+  const double * const * const x = atom->x;
+  double * const * const forces = thr->get_f();
+  double * const Uprime_thr = thr->get_rho();
+  const int tid = thr->get_tid();
+  const int nthreads = comm->nthreads;
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
+
+  const double cutforcesq = cutoff*cutoff;
+
+  // Sum three-body contributions to charge density and compute embedding energies.
+  for(int ii = iifrom; ii < iito; ii++) {
+
+    const int i = ilist_full[ii];
+    const double xtmp = x[i][0];
+    const double ytmp = x[i][1];
+    const double ztmp = x[i][2];
+    const int* const jlist = firstneigh_full[i];
+    const int jnum = numneigh_full[i];
+    double rho_value = 0;
+    int numBonds = 0;
+    MEAM2Body* nextTwoBodyInfo = myTwoBodyInfo;
+
+    for(int jj = 0; jj < jnum; jj++) {
+      const int j = jlist[jj] & NEIGHMASK;
+
+      const double jdelx = x[j][0] - xtmp;
+      const double jdely = x[j][1] - ytmp;
+      const double jdelz = x[j][2] - ztmp;
+      const double rij_sq = jdelx*jdelx + jdely*jdely + jdelz*jdelz;
+
+      if (rij_sq < cutforcesq) {
+	const double rij = sqrt(rij_sq);
+	double partial_sum = 0;
+
+	nextTwoBodyInfo->tag = j;
+	nextTwoBodyInfo->r = rij;
+	nextTwoBodyInfo->f = f.eval(rij, nextTwoBodyInfo->fprime);
+	nextTwoBodyInfo->del[0] = jdelx / rij;
+	nextTwoBodyInfo->del[1] = jdely / rij;
+	nextTwoBodyInfo->del[2] = jdelz / rij;
+
+	for(int kk = 0; kk < numBonds; kk++) {
+	  const MEAM2Body& bondk = myTwoBodyInfo[kk];
+	  double cos_theta = (nextTwoBodyInfo->del[0]*bondk.del[0] + nextTwoBodyInfo->del[1]*bondk.del[1] + nextTwoBodyInfo->del[2]*bondk.del[2]);
+	  partial_sum += bondk.f * g.eval(cos_theta);
+	}
+
+	rho_value += nextTwoBodyInfo->f * partial_sum;
+	rho_value += rho.eval(rij);
+
+	numBonds++;
+	nextTwoBodyInfo++;
+      }
+    }
+
+    // Compute embedding energy and its derivative.
+    double Uprime_i;
+    double embeddingEnergy = U.eval(rho_value, Uprime_i) - zero_atom_energy;
+    Uprime_thr[i] = Uprime_i;
+    if (EFLAG) {
+      if (eflag_global) eng_vdwl += embeddingEnergy;
+      if (eflag_atom) eatom[i] += embeddingEnergy;
+    }
+
+    double forces_i[3] = {0.0, 0.0, 0.0};
+
+    // Compute three-body contributions to force.
+    for(int jj = 0; jj < numBonds; jj++) {
+      const MEAM2Body bondj = myTwoBodyInfo[jj];
+      const double rij = bondj.r;
+      const int j = bondj.tag;
+
+      const double f_rij_prime = bondj.fprime;
+      const double f_rij = bondj.f;
+
+      double forces_j[3] = {0.0, 0.0, 0.0};
+
+      MEAM2Body const* bondk = myTwoBodyInfo;
+      for(int kk = 0; kk < jj; kk++, ++bondk) {
+	const double rik = bondk->r;
+
+	const double cos_theta = (bondj.del[0]*bondk->del[0] + bondj.del[1]*bondk->del[1] + bondj.del[2]*bondk->del[2]);
+	double g_prime;
+	double g_value = g.eval(cos_theta, g_prime);
+	const double f_rik_prime = bondk->fprime;
+	const double f_rik = bondk->f;
+
+	double fij = -Uprime_i * g_value * f_rik * f_rij_prime;
+	double fik = -Uprime_i * g_value * f_rij * f_rik_prime;
+
+	const double prefactor = Uprime_i * f_rij * f_rik * g_prime;
+	const double prefactor_ij = prefactor / rij;
+	const double prefactor_ik = prefactor / rik;
+	fij += prefactor_ij * cos_theta;
+	fik += prefactor_ik * cos_theta;
+
+	double fj[3], fk[3];
+
+	fj[0] = bondj.del[0] * fij - bondk->del[0] * prefactor_ij;
+	fj[1] = bondj.del[1] * fij - bondk->del[1] * prefactor_ij;
+	fj[2] = bondj.del[2] * fij - bondk->del[2] * prefactor_ij;
+	forces_j[0] += fj[0];
+	forces_j[1] += fj[1];
+	forces_j[2] += fj[2];
+
+	fk[0] = bondk->del[0] * fik - bondj.del[0] * prefactor_ik;
+	fk[1] = bondk->del[1] * fik - bondj.del[1] * prefactor_ik;
+	fk[2] = bondk->del[2] * fik - bondj.del[2] * prefactor_ik;
+	forces_i[0] -= fk[0];
+	forces_i[1] -= fk[1];
+	forces_i[2] -= fk[2];
+
+	const int k = bondk->tag;
+	forces[k][0] += fk[0];
+	forces[k][1] += fk[1];
+	forces[k][2] += fk[2];
+
+	if(EVFLAG) {
+	  double delta_ij[3];
+	  double delta_ik[3];
+	  delta_ij[0] = bondj.del[0] * rij;
+	  delta_ij[1] = bondj.del[1] * rij;
+	  delta_ij[2] = bondj.del[2] * rij;
+	  delta_ik[0] = bondk->del[0] * rik;
+	  delta_ik[1] = bondk->del[1] * rik;
+	  delta_ik[2] = bondk->del[2] * rik;
+	  ev_tally3_thr(this,i,j,k,0.0,0.0,fj,fk,delta_ij,delta_ik,thr);
+	}
+      }
+
+      forces[i][0] -= forces_j[0];
+      forces[i][1] -= forces_j[1];
+      forces[i][2] -= forces_j[2];
+      forces[j][0] += forces_j[0];
+      forces[j][1] += forces_j[1];
+      forces[j][2] += forces_j[2];
+    }
+
+    forces[i][0] += forces_i[0];
+    forces[i][1] += forces_i[1];
+    forces[i][2] += forces_i[2];
+  }
+
+  delete[] myTwoBodyInfo;
+
+  sync_threads();  
+
+  // reduce per thread density
+  data_reduce_thr(Uprime_values, nall, nthreads, 1, tid);
+
+  // wait until reduction is complete so that master thread
+  // can communicate U'(rho) values.
+  sync_threads();
+
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+  { comm->forward_comm_pair(this); }
+
+  // wait until master thread is done with communication
+  sync_threads();
+
+  const int* const ilist_half = listhalf->ilist;
+  const int* const numneigh_half = listhalf->numneigh;
+  const int* const * const firstneigh_half = listhalf->firstneigh;
+
+  // Compute two-body pair interactions.
+  for(int ii = iifrom; ii < iito; ii++) {
+    const int i = ilist_half[ii];
+    const double xtmp = x[i][0];
+    const double ytmp = x[i][1];
+    const double ztmp = x[i][2];
+    const int* const jlist = firstneigh_half[i];
+    const int jnum = numneigh_half[i];
+
+    for(int jj = 0; jj < jnum; jj++) {
+      const int j = jlist[jj] & NEIGHMASK;
+
+      double jdel[3];
+      jdel[0] = x[j][0] - xtmp;
+      jdel[1] = x[j][1] - ytmp;
+      jdel[2] = x[j][2] - ztmp;
+      double rij_sq = jdel[0]*jdel[0] + jdel[1]*jdel[1] + jdel[2]*jdel[2];
+
+      if(rij_sq < cutforcesq) {
+	double rij = sqrt(rij_sq);
+
+	double rho_prime;
+	rho.eval(rij, rho_prime);
+	double fpair = rho_prime * (Uprime_values[i] + Uprime_values[j]);
+
+	double pair_pot_deriv;
+	double pair_pot = phi.eval(rij, pair_pot_deriv);
+	fpair += pair_pot_deriv;
+
+	// Divide by r_ij to get forces from gradient.
+	fpair /= rij;
+
+	forces[i][0] += jdel[0]*fpair;
+	forces[i][1] += jdel[1]*fpair;
+	forces[i][2] += jdel[2]*fpair;
+	forces[j][0] -= jdel[0]*fpair;
+	forces[j][1] -= jdel[1]*fpair;
+	forces[j][2] -= jdel[2]*fpair;
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal, 1 /* newton_pair */, 
+				 pair_pot,0.0,-fpair,jdel[0],jdel[1],jdel[2],thr);
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairMEAMSplineOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairMEAMSpline::memory_usage();
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_meam_spline_omp.h
+++ b/src/USER-OMP/pair_meam_spline_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(meam/spline/omp,PairMEAMSplineOMP)
+
+#else
+
+#ifndef LMP_PAIR_MEAM_SPLINE_OMP_H
+#define LMP_PAIR_MEAM_SPLINE_OMP_H
+
+#include "pair_meam_spline.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairMEAMSplineOMP : public PairMEAMSpline, public ThrOMP {
+
+ public:
+  PairMEAMSplineOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG>
+  void eval(int iifrom, int iito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pppm_tip4p_omp.cpp
+++ b/src/USER-OMP/pppm_tip4p_omp.cpp
@ -0,0 +1,398 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "pppm_tip4p_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "domain.h"
+#include "force.h"
+#include "memory.h"
+#include "math_const.h"
+
+#include <string.h>
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define OFFSET 16384
+
+#ifdef FFT_SINGLE
+#define ZEROF 0.0f
+#define ONEF  1.0f
+#else
+#define ZEROF 0.0
+#define ONEF  1.0
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+PPPMTIP4POMP::PPPMTIP4POMP(LAMMPS *lmp, int narg, char **arg) :
+  PPPMOMP(lmp, narg, arg) {}
+
+/* ---------------------------------------------------------------------- */
+
+// NOTE: special version of reduce_data for FFT_SCALAR data type.
+// reduce per thread data into the first part of the data
+// array that is used for the non-threaded parts and reset
+// the temporary storage to 0.0. this routine depends on
+// multi-dimensional arrays like force stored in this order
+// x1,y1,z1,x2,y2,z2,...
+// we need to post a barrier to wait until all threads are done
+// writing to the array.
+static void data_reduce_fft(FFT_SCALAR *dall, int nall, int nthreads, int ndim, int tid)
+{
+#if defined(_OPENMP)
+  // NOOP in non-threaded execution.
+  if (nthreads == 1) return;
+#pragma omp barrier
+  {
+    const int nvals = ndim*nall;
+    const int idelta = nvals/nthreads + 1;
+    const int ifrom = tid*idelta;
+    const int ito   = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta);
+
+    // this if protects against having more threads than atoms
+    if (ifrom < nall) { 
+      for (int m = ifrom; m < ito; ++m) {
+	for (int n = 1; n < nthreads; ++n) {
+	  dall[m] += dall[n*nvals + m];
+	  dall[n*nvals + m] = 0.0;
+	}
+      }
+    }
+  }
+#else
+  // NOOP in non-threaded execution.
+  return;
+#endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMTIP4POMP::init()
+{
+  // TIP4P PPPM requires newton on, b/c it computes forces on ghost atoms
+
+  if (force->newton == 0)
+    error->all(FLERR,"Kspace style pppm/tip4p/omp requires newton on");
+
+  PPPMOMP::init();
+}
+
+/* ----------------------------------------------------------------------
+   find center grid pt for each of my particles
+   check that full stencil for the particle will fit in my 3d brick
+   store central grid pt indices in part2grid array 
+------------------------------------------------------------------------- */
+
+void PPPMTIP4POMP::particle_map()
+{
+  const int * const type = atom->type;
+  const double * const * const x = atom->x;
+  const int nlocal = atom->nlocal;
+
+  int i, flag = 0;
+#if defined(_OPENMP)
+#pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static)
+#endif
+  for (i = 0; i < nlocal; i++) {
+    int nx,ny,nz,iH1,iH2;
+    double xM[3];
+
+    if (type[i] == typeO) {
+      find_M(i,iH1,iH2,xM);      
+    } else {
+      xM[0] = x[i][0];
+      xM[1] = x[i][1];
+      xM[2] = x[i][2];
+    }
+
+    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+    // current particle coord can be outside global and local box
+    // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
+
+    nx = static_cast<int> ((xM[0]-boxlo[0])*delxinv+shift) - OFFSET;
+    ny = static_cast<int> ((xM[1]-boxlo[1])*delyinv+shift) - OFFSET;
+    nz = static_cast<int> ((xM[2]-boxlo[2])*delzinv+shift) - OFFSET;
+
+    part2grid[i][0] = nx;
+    part2grid[i][1] = ny;
+    part2grid[i][2] = nz;
+
+    // check that entire stencil around nx,ny,nz will fit in my 3d brick
+
+    if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
+	ny+nlower < nylo_out || ny+nupper > nyhi_out ||
+	nz+nlower < nzlo_out || nz+nupper > nzhi_out) flag++;
+  }
+
+  int flag_all;
+  MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
+  if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPM");
+}
+
+/* ----------------------------------------------------------------------
+   create discretized "density" on section of global grid due to my particles
+   density(x,y,z) = charge "density" at grid points of my 3d brick
+   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+   in global grid 
+------------------------------------------------------------------------- */
+
+void PPPMTIP4POMP::make_rho()
+{
+  const double * const q = atom->q;
+  const double * const * const x = atom->x;
+  const int * const type = atom->type;
+  const int nthreads = comm->nthreads;
+  const int nlocal = atom->nlocal;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {  
+#if defined(_OPENMP)
+    // each thread works on a fixed chunk of atoms.
+    const int tid = omp_get_thread_num();
+    const int inum = nlocal;
+    const int idelta = 1 + inum/nthreads;
+    const int ifrom = tid*idelta;
+    const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
+#else
+    const int tid = 0;
+    const int ifrom = 0;
+    const int ito = nlocal;
+#endif
+
+    int l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2;
+    FFT_SCALAR dx,dy,dz,x0,y0,z0;
+    double xM[3];
+
+    // set up clear 3d density array
+    const int nzoffs = (nzhi_out-nzlo_out+1)*tid;
+    FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]);
+    memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR));
+
+    ThrData *thr = fix->get_thr(tid);
+    FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
+
+    // loop over my charges, add their contribution to nearby grid points
+    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+    // (dx,dy,dz) = distance to "lower left" grid pt
+    // (mx,my,mz) = global coords of moving stencil pt
+    
+    // this if protects against having more threads than local atoms
+    if (ifrom < nlocal) { 
+      for (int i = ifrom; i < ito; i++) {
+
+	if (type[i] == typeO) {
+	  find_M(i,iH1,iH2,xM);      
+	} else {
+	  xM[0] = x[i][0];
+	  xM[1] = x[i][1];
+	  xM[2] = x[i][2];
+	}
+
+	nx = part2grid[i][0];
+	ny = part2grid[i][1];
+	nz = part2grid[i][2];
+	dx = nx+shiftone - (xM[0]-boxlo[0])*delxinv;
+	dy = ny+shiftone - (xM[1]-boxlo[1])*delyinv;
+	dz = nz+shiftone - (xM[2]-boxlo[2])*delzinv;
+
+	compute_rho1d_thr(r1d,dx,dy,dz);
+
+	z0 = delvolinv * q[i];
+	for (n = nlower; n <= nupper; n++) {
+	  mz = n+nz;
+	  y0 = z0*r1d[2][n];
+	  for (m = nlower; m <= nupper; m++) {
+	    my = m+ny;
+	    x0 = y0*r1d[1][m];
+	    for (l = nlower; l <= nupper; l++) {
+	      mx = l+nx;
+	      db[mz][my][mx] += x0*r1d[0][l];
+	    }
+	  }
+	}
+      }
+    }
+#if defined(_OPENMP)
+    // reduce 3d density array
+    if (nthreads > 1) {
+      data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid);
+    }
+#endif
+  }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get electric field & force on my particles 
+------------------------------------------------------------------------- */
+
+void PPPMTIP4POMP::fieldforce()
+{
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of E-field on particle
+
+  const double * const q = atom->q;
+  const double * const * const x = atom->x;
+  const int * const type = atom->type;
+  const int nthreads = comm->nthreads;
+  const int nlocal = atom->nlocal;
+  const double qqrd2e = force->qqrd2e;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {  
+#if defined(_OPENMP)
+    // each thread works on a fixed chunk of atoms.
+    const int tid = omp_get_thread_num();
+    const int inum = nlocal;
+    const int idelta = 1 + inum/nthreads;
+    const int ifrom = tid*idelta;
+    const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
+#else
+    const int ifrom = 0;
+    const int ito = nlocal;
+    const int tid = 0;
+#endif
+    ThrData *thr = fix->get_thr(tid);
+    double * const * const f = thr->get_f();
+    FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d());
+    
+    int l,m,n,nx,ny,nz,mx,my,mz;
+    FFT_SCALAR dx,dy,dz,x0,y0,z0;
+    FFT_SCALAR ekx,eky,ekz;
+    int iH1,iH2;
+    double xM[3], fx,fy,fz;
+    double ddotf, rOMx, rOMy, rOMz, f1x, f1y, f1z;
+
+    // this if protects against having more threads than local atoms
+    if (ifrom < nlocal) { 
+      for (int i = ifrom; i < ito; i++) {
+
+	if (type[i] == typeO) {
+	  find_M(i,iH1,iH2,xM);      
+	} else {
+	  xM[0] = x[i][0];
+	  xM[1] = x[i][1];
+	  xM[2] = x[i][2];
+	}
+
+	nx = part2grid[i][0];
+	ny = part2grid[i][1];
+	nz = part2grid[i][2];
+	dx = nx+shiftone - (xM[0]-boxlo[0])*delxinv;
+	dy = ny+shiftone - (xM[1]-boxlo[1])*delyinv;
+	dz = nz+shiftone - (xM[2]-boxlo[2])*delzinv;
+
+	compute_rho1d_thr(r1d,dx,dy,dz);
+
+	ekx = eky = ekz = ZEROF;
+	for (n = nlower; n <= nupper; n++) {
+	  mz = n+nz;
+	  z0 = r1d[2][n];
+	  for (m = nlower; m <= nupper; m++) {
+	    my = m+ny;
+	    y0 = z0*r1d[1][m];
+	    for (l = nlower; l <= nupper; l++) {
+	      mx = l+nx;
+	      x0 = y0*r1d[0][l];
+	      ekx -= x0*vdx_brick[mz][my][mx];
+	      eky -= x0*vdy_brick[mz][my][mx];
+	      ekz -= x0*vdz_brick[mz][my][mx];
+	    }
+	  }
+	}
+
+	// convert E-field to force
+	const double qfactor = qqrd2e*scale*q[i];
+
+	if (type[i] != typeO) {
+	  f[i][0] += qfactor*ekx;
+	  f[i][1] += qfactor*eky;
+	  f[i][2] += qfactor*ekz;
+
+	} else {
+	  fx = qfactor * ekx;
+	  fy = qfactor * eky;
+	  fz = qfactor * ekz;
+	  find_M(i,iH1,iH2,xM);
+
+	  rOMx = xM[0] - x[i][0];
+	  rOMy = xM[1] - x[i][1];
+	  rOMz = xM[2] - x[i][2];
+  
+	  ddotf = (rOMx * fx + rOMy * fy + rOMz * fz) / (qdist * qdist);
+
+	  f1x = ddotf * rOMx;
+	  f1y = ddotf * rOMy;
+	  f1z = ddotf * rOMz;
+
+	  f[i][0] += fx - alpha * (fx - f1x);
+	  f[i][1] += fy - alpha * (fy - f1y);
+	  f[i][2] += fz - alpha * (fz - f1z);
+
+	  f[iH1][0] += 0.5*alpha*(fx - f1x);
+	  f[iH1][1] += 0.5*alpha*(fy - f1y);
+	  f[iH1][2] += 0.5*alpha*(fz - f1z);
+
+	  f[iH2][0] += 0.5*alpha*(fx - f1x);
+	  f[iH2][1] += 0.5*alpha*(fy - f1y);
+	  f[iH2][2] += 0.5*alpha*(fz - f1z);
+	}
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+  find 2 H atoms bonded to O atom i
+  compute position xM of fictitious charge site for O atom
+  also return local indices iH1,iH2 of H atoms
+------------------------------------------------------------------------- */
+
+void PPPMTIP4POMP::find_M(int i, int &iH1, int &iH2, double *xM)
+{
+  iH1 = atom->map(atom->tag[i] + 1);
+  iH2 = atom->map(atom->tag[i] + 2);
+
+  if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing");
+  if (atom->type[iH1] != typeH || atom->type[iH2] != typeH)
+    error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
+
+  const double * const * const x = atom->x; 
+
+  double delx1 = x[iH1][0] - x[i][0];
+  double dely1 = x[iH1][1] - x[i][1];
+  double delz1 = x[iH1][2] - x[i][2];
+  domain->minimum_image(delx1,dely1,delz1);
+
+  double delx2 = x[iH2][0] - x[i][0];
+  double dely2 = x[iH2][1] - x[i][1];
+  double delz2 = x[iH2][2] - x[i][2];
+  domain->minimum_image(delx2,dely2,delz2);
+
+  xM[0] = x[i][0] + alpha * 0.5 * (delx1 + delx2);
+  xM[1] = x[i][1] + alpha * 0.5 * (dely1 + dely2);
+  xM[2] = x[i][2] + alpha * 0.5 * (delz1 + delz2);
+}
--- a/src/USER-OMP/pppm_tip4p_omp.h
+++ b/src/USER-OMP/pppm_tip4p_omp.h
@ -0,0 +1,56 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef KSPACE_CLASS
+
+KSpaceStyle(pppm/tip4p/omp,PPPMTIP4POMP)
+
+#else
+
+#ifndef LMP_PPPM_TIP4P_OMP_H
+#define LMP_PPPM_TIP4P_OMP_H
+
+#include "pppm_omp.h"
+
+namespace LAMMPS_NS {
+
+class PPPMTIP4POMP : public PPPMOMP {
+ public:
+  PPPMTIP4POMP(class LAMMPS *, int, char **);
+  virtual ~PPPMTIP4POMP () {};
+  virtual void init();
+    
+ protected:
+  virtual void particle_map();
+  virtual void fieldforce();
+  virtual void make_rho();
+
+ private:
+  void find_M(int, int &, int &, double *); 
+
+//  void slabcorr(int);
+
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Kspace style pppm/tip4p/omp requires newton on
+
+Self-explanatory.
+
+*/