git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7466 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2012-01-09 15:22:33 +00:00 · 2012-01-09 15:22:33 +00:00 · 9e4fd659e9
parent 619f3e50e2
commit 9e4fd659e9
16 changed files with 3035 additions and 0 deletions
--- a/src/USER-OMP/pair_born_coul_wolf_omp.cpp
+++ b/src/USER-OMP/pair_born_coul_wolf_omp.cpp
@ -0,0 +1,202 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_born_coul_wolf_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "math_const.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+/* ---------------------------------------------------------------------- */
+
+PairBornCoulWolfOMP::PairBornCoulWolfOMP(LAMMPS *lmp) :
+  PairBornCoulWolf(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairBornCoulWolfOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairBornCoulWolfOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
+  double prefactor;
+  double r,rexp;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  double erfcc,erfcd,v_sh,dvdrr,e_self,e_shift,f_shift,qisq;
+
+  evdwl = ecoul = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
+  double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
+  double fxtmp,fytmp,fztmp;
+
+  // self and shifted coulombic energy
+
+  e_self = v_sh = 0.0; 
+  e_shift = erfc(alf*cut_coul)/cut_coul;
+  f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) / 
+    cut_coul; 
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    qisq = qtmp*qtmp;
+    e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
+    if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0,thr);
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r2inv = 1.0/rsq;
+	r = sqrt(rsq);
+
+	if (rsq < cut_coulsq) {
+          prefactor = qqrd2e*qtmp*q[j]/r;
+          erfcc = erfc(alf*r); 
+          erfcd = exp(-alf*alf*r*r);
+          v_sh = (erfcc - e_shift*r) * prefactor; 
+          dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
+          forcecoul = dvdrr*rsq*prefactor;
+	  if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
+	  forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
+	    + born3[itype][jtype]*r2inv*r6inv;
+	} else forceborn = 0.0;
+
+	fpair = (forcecoul + factor_lj*forceborn)*r2inv;
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) {
+	  if (rsq < cut_coulsq) {
+	    ecoul = v_sh;
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
+	      d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
+	}
+
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairBornCoulWolfOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairBornCoulWolf::memory_usage();
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_born_coul_wolf_omp.h
+++ b/src/USER-OMP/pair_born_coul_wolf_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(born/coul/wolf/omp,PairBornCoulWolfOMP)
+
+#else
+
+#ifndef LMP_PAIR_BORN_COUL_WOLF_OMP_H
+#define LMP_PAIR_BORN_COUL_WOLF_OMP_H
+
+#include "pair_born_coul_wolf.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairBornCoulWolfOMP : public PairBornCoulWolf, public ThrOMP {
+
+ public:
+  PairBornCoulWolfOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_brownian_omp.cpp
+++ b/src/USER-OMP/pair_brownian_omp.cpp
@ -0,0 +1,343 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_brownian_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "update.h"
+#include "random_mars.h"
+#include "math_const.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define EPSILON 1.0e-10
+
+/* ---------------------------------------------------------------------- */
+
+PairBrownianOMP::PairBrownianOMP(LAMMPS *lmp) :
+  PairBrownian(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+  random_thr = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairBrownianOMP::~PairBrownianOMP() 
+{
+  if (random_thr) {
+    for (int i=1; i < comm->nthreads; ++i)
+      delete random_thr[i];
+
+    delete[] random_thr;
+    random_thr = NULL;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairBrownianOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+  if (!random_thr)
+    random_thr = new RanMars*[nthreads];
+
+  // to ensure full compatibility with the serial Brownian style
+  // we use is random number generator instance for thread 0
+  random_thr[0] = random;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    // generate a random number generator instance for
+    // all threads != 0. make sure we use unique seeds.
+    if (random_thr && tid > 0)
+      random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me 
+				    + comm->nprocs*tid);
+    if (flaglog) {
+      if (evflag) {
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else { 
+      if (evflag) {
+	if (force->newton_pair) eval<0,1,1>(ifrom, ito, thr);
+	else eval<0,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+	else eval<0,0,0>(ifrom, ito, thr);
+      }
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int FLAGLOG, int EVFLAG, int NEWTON_PAIR>
+void PairBrownianOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
+  double rsq,r,h_sep,radi;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const * const torque = thr->get_torque();
+  const double * const radius = atom->radius;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+
+  RanMars &rng = *random_thr[thr->get_tid()];
+
+  double vxmu2f = force->vxmu2f;
+  double randr;
+  double prethermostat;
+  double xl[3],a_sq,a_sh,a_pu,Fbmag;
+  double p1[3],p2[3],p3[3];
+  int overlaps = 0;
+
+  // scale factor for Brownian moments
+
+  prethermostat = sqrt(24.0*force->boltz*t_target/update->dt);
+  prethermostat *= sqrt(force->vxmu2f/force->ftm2v/force->mvv2e);
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    radi = radius[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];       
+    
+    // FLD contribution to force and torque due to isotropic terms
+
+    if (flagfld) {
+      f[i][0] += prethermostat*sqrt(R0)*(rng.uniform()-0.5);
+      f[i][1] += prethermostat*sqrt(R0)*(rng.uniform()-0.5);
+      f[i][2] += prethermostat*sqrt(R0)*(rng.uniform()-0.5);
+      if (FLAGLOG) {
+	torque[i][0] += prethermostat*sqrt(RT0)*(rng.uniform()-0.5);
+	torque[i][1] += prethermostat*sqrt(RT0)*(rng.uniform()-0.5);
+	torque[i][2] += prethermostat*sqrt(RT0)*(rng.uniform()-0.5);
+      }
+    }
+    
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+        r = sqrt(rsq);      
+          
+        // scalar resistances a_sq and a_sh
+
+        h_sep = r - 2.0*radi;
+        
+        // check for overlaps
+
+        if (h_sep < 0.0) overlaps++;
+        
+        // if less than minimum gap, use minimum gap instead
+
+        if (r < cut_inner[itype][jtype])
+          h_sep = cut_inner[itype][jtype] - 2.0*radi;          
+        
+        // scale h_sep by radi
+
+        h_sep = h_sep/radi;
+  
+        // scalar resistances
+
+        if (FLAGLOG) {
+          a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep + 9.0/40.0*log(1.0/h_sep));
+          a_sh = 6.0*MY_PI*mu*radi*(1.0/6.0*log(1.0/h_sep));
+          a_pu = 8.0*MY_PI*mu*pow(radi,3)*(3.0/160.0*log(1.0/h_sep));
+        } else
+          a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep);
+         
+        // generate the Pairwise Brownian Force: a_sq
+
+        Fbmag = prethermostat*sqrt(a_sq);
+        
+        // generate a random number
+
+        randr = rng.uniform()-0.5;
+          
+        // contribution due to Brownian motion
+
+        fx = Fbmag*randr*delx/r;
+        fy = Fbmag*randr*dely/r;
+        fz = Fbmag*randr*delz/r;
+        
+        // add terms due to a_sh
+
+        if (FLAGLOG) {
+
+          // generate two orthogonal vectors to the line of centers
+
+          p1[0] = delx/r; p1[1] = dely/r; p1[2] = delz/r;                    
+          set_3_orthogonal_vectors(p1,p2,p3);
+          
+          // magnitude
+
+          Fbmag = prethermostat*sqrt(a_sh);          
+          
+          // force in each of the two directions
+
+          randr = rng.uniform()-0.5;
+          fx += Fbmag*randr*p2[0];
+          fy += Fbmag*randr*p2[1];
+          fz += Fbmag*randr*p2[2];
+          
+          randr = rng.uniform()-0.5;
+          fx += Fbmag*randr*p3[0];
+          fy += Fbmag*randr*p3[1];
+          fz += Fbmag*randr*p3[2];          
+        }       
+        
+        // scale forces to appropriate units
+
+        fx = vxmu2f*fx;
+        fy = vxmu2f*fy;
+        fz = vxmu2f*fz;
+        
+        // sum to total force
+
+        f[i][0] -= fx;
+        f[i][1] -= fy;
+        f[i][2] -= fz;   
+
+	if (NEWTON_PAIR || j < nlocal) {
+	  //randr = rng.uniform()-0.5;
+	  //fx = Fbmag*randr*delx/r;
+	  //fy = Fbmag*randr*dely/r;
+	  //fz = Fbmag*randr*delz/r;
+
+	  f[j][0] += fx;
+	  f[j][1] += fy;
+	  f[j][2] += fz;  
+	}
+        
+        // torque due to the Brownian Force
+
+        if (FLAGLOG) {
+
+          // location of the point of closest approach on I from its center
+
+          xl[0] = -delx/r*radi;
+          xl[1] = -dely/r*radi;
+          xl[2] = -delz/r*radi;
+          
+          // torque = xl_cross_F
+
+          tx = xl[1]*fz - xl[2]*fy;
+          ty = xl[2]*fx - xl[0]*fz;
+          tz = xl[0]*fy - xl[1]*fx;              
+        
+          // torque is same on both particles
+
+          torque[i][0] -= tx;
+          torque[i][1] -= ty;
+          torque[i][2] -= tz;          
+        
+	  if (NEWTON_PAIR || j < nlocal) {
+	    torque[j][0] -= tx;
+	    torque[j][1] -= ty;
+	    torque[j][2] -= tz;       
+	  }
+
+          // torque due to a_pu
+
+          Fbmag = prethermostat*sqrt(a_pu);         
+          
+          // force in each direction
+
+          randr = rng.uniform()-0.5;
+          tx = Fbmag*randr*p2[0];
+          ty = Fbmag*randr*p2[1];
+          tz = Fbmag*randr*p2[2];
+          
+          randr = rng.uniform()-0.5;
+          tx += Fbmag*randr*p3[0];
+          ty += Fbmag*randr*p3[1];
+          tz += Fbmag*randr*p3[2];    
+          
+          // torque has opposite sign on two particles
+
+          torque[i][0] -= tx;
+          torque[i][1] -= ty;
+          torque[i][2] -= tz;          
+        
+	  if (NEWTON_PAIR || j < nlocal) {
+	    torque[j][0] += tx;
+	    torque[j][1] += ty;
+	    torque[j][2] += tz;  
+	  }     
+        }
+
+        if (EVFLAG) ev_tally_xyz(i,j,nlocal,NEWTON_PAIR,
+				 0.0,0.0,-fx,-fy,-fz,delx,dely,delz);
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairBrownianOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairBrownian::memory_usage();
+  bytes += comm->nthreads * sizeof(RanMars*);
+  bytes += comm->nthreads * sizeof(RanMars);
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_brownian_omp.h
+++ b/src/USER-OMP/pair_brownian_omp.h
@ -0,0 +1,52 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(brownian/omp,PairBrownianOMP)
+
+#else
+
+#ifndef LMP_PAIR_BROWNIAN_OMP_H
+#define LMP_PAIR_BROWNIAN_OMP_H
+
+#include "pair_brownian.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairBrownianOMP : public PairBrownian, public ThrOMP {
+
+ public:
+  PairBrownianOMP(class LAMMPS *);
+  virtual ~PairBrownianOMP();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ protected:
+  class RanMars **random_thr;
+
+ private:
+  template <int LOGFLAG, int EVFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_brownian_poly_omp.cpp
+++ b/src/USER-OMP/pair_brownian_poly_omp.cpp
@ -0,0 +1,330 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_brownian_poly_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "update.h"
+#include "random_mars.h"
+#include "math_const.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define EPSILON 1.0e-10
+
+/* ---------------------------------------------------------------------- */
+
+PairBrownianPolyOMP::PairBrownianPolyOMP(LAMMPS *lmp) :
+  PairBrownianPoly(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+  random_thr = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairBrownianPolyOMP::~PairBrownianPolyOMP() 
+{
+  if (random_thr) {
+    for (int i=1; i < comm->nthreads; ++i)
+      delete random_thr[i];
+
+    delete[] random_thr;
+    random_thr = NULL;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairBrownianPolyOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+  if (!random_thr)
+    random_thr = new RanMars*[nthreads];
+
+  // to ensure full compatibility with the serial BrownianPoly style
+  // we use is random number generator instance for thread 0
+  random_thr[0] = random;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    // generate a random number generator instance for
+    // all threads != 0. make sure we use unique seeds.
+    if (random_thr && tid > 0)
+      random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me 
+				    + comm->nprocs*tid);
+    if (flaglog) {
+      if (evflag)
+	eval<1,1>(ifrom, ito, thr);
+      else
+	eval<1,0>(ifrom, ito, thr);
+    } else { 
+      if (evflag)
+	eval<0,1>(ifrom, ito, thr);
+      else eval<0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int FLAGLOG, int EVFLAG>
+void PairBrownianPolyOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
+  double rsq,r,h_sep,beta0,beta1,radi,radj;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const * const torque = thr->get_torque();
+  const double * const radius = atom->radius;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+
+  RanMars &rng = *random_thr[thr->get_tid()];
+
+  double vxmu2f = force->vxmu2f;
+  int overlaps = 0;
+  double randr;
+  double prethermostat;
+  double xl[3],a_sq,a_sh,a_pu,Fbmag;
+  double p1[3],p2[3],p3[3];
+  
+  // scale factor for Brownian moments
+
+  prethermostat = sqrt(24.0*force->boltz*t_target/update->dt);
+  prethermostat *= sqrt(force->vxmu2f/force->ftm2v/force->mvv2e);
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    radi = radius[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];       
+    
+    // FLD contribution to force and torque due to isotropic terms
+
+    if (flagfld) {
+      f[i][0] += prethermostat*sqrt(R0)*(rng.uniform()-0.5);
+      f[i][1] += prethermostat*sqrt(R0)*(rng.uniform()-0.5);
+      f[i][2] += prethermostat*sqrt(R0)*(rng.uniform()-0.5);
+      if (FLAGLOG) {
+	const double rad3 = radi*radi*radi;
+	torque[i][0] += prethermostat*sqrt(RT0*rad3)*(rng.uniform()-0.5);
+	torque[i][1] += prethermostat*sqrt(RT0*rad3)*(rng.uniform()-0.5);
+	torque[i][2] += prethermostat*sqrt(RT0*rad3)*(rng.uniform()-0.5);
+      }
+    }
+    
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+      radj = radius[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+        r = sqrt(rsq);      
+          
+        // scalar resistances a_sq and a_sh
+
+        h_sep = r - radi-radj;
+        
+        // check for overlaps
+
+        if (h_sep < 0.0) overlaps++;
+        
+        // if less than minimum gap, use minimum gap instead
+
+        if (r < cut_inner[itype][jtype])
+          h_sep = cut_inner[itype][jtype] - radi-radj;
+        
+        // scale h_sep by radi
+
+        h_sep = h_sep/radi;
+        beta0 = radj/radi;
+        beta1 = 1.0 + beta0;
+  
+        // scalar resistances
+
+        if (FLAGLOG) {
+          a_sq = beta0*beta0/beta1/beta1/h_sep + 
+	    (1.0+7.0*beta0+beta0*beta0)/5.0/pow(beta1,3)*log(1.0/h_sep);
+          a_sq += (1.0+18.0*beta0-29.0*beta0*beta0+18.0*pow(beta0,3) + 
+		   pow(beta0,4))/21.0/pow(beta1,4)*h_sep*log(1.0/h_sep);
+          a_sq *= 6.0*MY_PI*mu*radi;
+          a_sh = 4.0*beta0*(2.0+beta0+2.0*beta0*beta0)/15.0/pow(beta1,3) * 
+	    log(1.0/h_sep);
+          a_sh += 4.0*(16.0-45.0*beta0+58.0*beta0*beta0-45.0*pow(beta0,3) + 
+		       16.0*pow(beta0,4))/375.0/pow(beta1,4) * 
+	    h_sep*log(1.0/h_sep);
+          a_sh *= 6.0*MY_PI*mu*radi;
+          a_pu = beta0*(4.0+beta0)/10.0/beta1/beta1*log(1.0/h_sep);
+          a_pu += (32.0-33.0*beta0+83.0*beta0*beta0+43.0 * 
+		   pow(beta0,3))/250.0/pow(beta1,3)*h_sep*log(1.0/h_sep);
+          a_pu *= 8.0*MY_PI*mu*pow(radi,3);
+
+        } else a_sq = 6.0*MY_PI*mu*radi*(beta0*beta0/beta1/beta1/h_sep);
+         
+        // generate the Pairwise Brownian Force: a_sq
+
+        Fbmag = prethermostat*sqrt(a_sq);
+        
+        // generate a random number
+
+        randr = rng.uniform()-0.5;
+          
+        // contribution due to Brownian motion
+
+        fx = Fbmag*randr*delx/r;
+        fy = Fbmag*randr*dely/r;
+        fz = Fbmag*randr*delz/r;
+        
+        // add terms due to a_sh
+
+        if (FLAGLOG) {
+
+          // generate two orthogonal vectors to the line of centers
+
+          p1[0] = delx/r; p1[1] = dely/r; p1[2] = delz/r;                    
+          set_3_orthogonal_vectors(p1,p2,p3);
+          
+          // magnitude
+
+          Fbmag = prethermostat*sqrt(a_sh);          
+          
+          // force in each of the two directions
+
+          randr = rng.uniform()-0.5;
+          fx += Fbmag*randr*p2[0];
+          fy += Fbmag*randr*p2[1];
+          fz += Fbmag*randr*p2[2];
+          
+          randr = rng.uniform()-0.5;
+          fx += Fbmag*randr*p3[0];
+          fy += Fbmag*randr*p3[1];
+          fz += Fbmag*randr*p3[2];          
+        }       
+        
+        // scale forces to appropriate units
+
+        fx = vxmu2f*fx;
+        fy = vxmu2f*fy;
+        fz = vxmu2f*fz;
+        
+        // sum to total force
+
+        f[i][0] -= fx;
+        f[i][1] -= fy;
+        f[i][2] -= fz;   
+
+        // torque due to the Brownian Force
+
+        if (FLAGLOG) {
+
+          // location of the point of closest approach on I from its center
+
+          xl[0] = -delx/r*radi;
+          xl[1] = -dely/r*radi;
+          xl[2] = -delz/r*radi;
+          
+          // torque = xl_cross_F
+
+          tx = xl[1]*fz - xl[2]*fy;
+          ty = xl[2]*fx - xl[0]*fz;
+          tz = xl[0]*fy - xl[1]*fx;              
+        
+          // torque is same on both particles
+
+          torque[i][0] -= tx;
+          torque[i][1] -= ty;
+          torque[i][2] -= tz;          
+        
+          // torque due to a_pu
+
+          Fbmag = prethermostat*sqrt(a_pu);         
+          
+          // force in each direction
+
+          randr = rng.uniform()-0.5;
+          tx = Fbmag*randr*p2[0];
+          ty = Fbmag*randr*p2[1];
+          tz = Fbmag*randr*p2[2];
+          
+          randr = rng.uniform()-0.5;
+          tx += Fbmag*randr*p3[0];
+          ty += Fbmag*randr*p3[1];
+          tz += Fbmag*randr*p3[2];    
+          
+          // torque has opposite sign on two particles
+
+          torque[i][0] -= tx;
+          torque[i][1] -= ty;
+          torque[i][2] -= tz;          
+        
+        }
+
+        if (EVFLAG) ev_tally_xyz(i,j,nlocal,/* newton_pair */ 0,
+				 0.0,0.0,-fx,-fy,-fz,delx,dely,delz);
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairBrownianPolyOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairBrownianPoly::memory_usage();
+  bytes += comm->nthreads * sizeof(RanMars*);
+  bytes += comm->nthreads * sizeof(RanMars);
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_brownian_poly_omp.h
+++ b/src/USER-OMP/pair_brownian_poly_omp.h
@ -0,0 +1,52 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(brownian/poly/omp,PairBrownianPolyOMP)
+
+#else
+
+#ifndef LMP_PAIR_BROWNIAN_POLY_OMP_H
+#define LMP_PAIR_BROWNIAN_POLY_OMP_H
+
+#include "pair_brownian_poly.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairBrownianPolyOMP : public PairBrownianPoly, public ThrOMP {
+
+ public:
+  PairBrownianPolyOMP(class LAMMPS *);
+  virtual ~PairBrownianPolyOMP();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ protected:
+  class RanMars **random_thr;
+
+ private:
+  template <int LOGFLAG, int EVFLAG>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_coul_wolf_omp.cpp
+++ b/src/USER-OMP/pair_coul_wolf_omp.cpp
@ -0,0 +1,183 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_coul_wolf_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "math_const.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+/* ---------------------------------------------------------------------- */
+
+PairCoulWolfOMP::PairCoulWolfOMP(LAMMPS *lmp) :
+  PairCoulWolf(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairCoulWolfOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairCoulWolfOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
+  double rsq,forcecoul,factor_coul;
+  double prefactor;
+  double r,rexp;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  double erfcc,erfcd,v_sh,dvdrr,e_self,e_shift,f_shift,qisq;
+
+  ecoul = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double qqrd2e = force->qqrd2e;
+  double fxtmp,fytmp,fztmp;
+
+  // self and shifted coulombic energy
+
+  e_self = v_sh = 0.0; 
+  e_shift = erfc(alf*cut_coul)/cut_coul;
+  f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) / 
+    cut_coul; 
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    qisq = qtmp*qtmp;
+    e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
+    if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0,thr);
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cut_coulsq) {
+	r = sqrt(rsq);
+	prefactor = qqrd2e*qtmp*q[j]/r;
+	erfcc = erfc(alf*r); 
+	erfcd = exp(-alf*alf*r*r);
+	v_sh = (erfcc - e_shift*r) * prefactor; 
+	dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
+	forcecoul = dvdrr*rsq*prefactor;
+	if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	fpair = forcecoul / rsq;
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) {
+	  if (rsq < cut_coulsq) {
+	    ecoul = v_sh;
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+	}
+
+	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
+				 0.0,ecoul,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairCoulWolfOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairCoulWolf::memory_usage();
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_coul_wolf_omp.h
+++ b/src/USER-OMP/pair_coul_wolf_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(coul/wolf/omp,PairCoulWolfOMP)
+
+#else
+
+#ifndef LMP_PAIR_COUL_WOLF_OMP_H
+#define LMP_PAIR_COUL_WOLF_OMP_H
+
+#include "pair_coul_wolf.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairCoulWolfOMP : public PairCoulWolf, public ThrOMP {
+
+ public:
+  PairCoulWolfOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_lj_class2_coul_pppm_omp.cpp
+++ b/src/USER-OMP/pair_lj_class2_coul_pppm_omp.cpp
@ -0,0 +1,226 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_lj_class2_coul_pppm_omp.h"
+#include "pppm_proxy.h"
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "update.h"
+
+#include <string.h>
+
+using namespace LAMMPS_NS;
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ---------------------------------------------------------------------- */
+
+PairLJClass2CoulPPPMOMP::PairLJClass2CoulPPPMOMP(LAMMPS *lmp) :
+  PairLJClass2CoulLong(lmp), ThrOMP(lmp, THR_PAIR|THR_PROXY)
+{
+  respa_enable = 0;
+  nproxy=1;
+  
+  kspace = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJClass2CoulPPPMOMP::init_style()
+{
+  if (comm->nthreads < 2)
+    error->all(FLERR,"need at least two threads per MPI task for this pair style");
+
+  if (strcmp(force->kspace_style,"pppm/proxy") != 0)
+    error->all(FLERR,"kspace style pppm/proxy is required with this pair style");
+
+  kspace = static_cast<PPPMProxy *>(force->kspace);
+
+  PairLJClass2CoulLong::init_style();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJClass2CoulPPPMOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads, nproxy);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    // thread id 0 runs pppm, the rest the pair style
+    if (tid < nproxy) {
+      kspace->compute_proxy(eflag,vflag);
+    } else {
+      if (evflag) {
+	if (eflag) {
+	  if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	  else eval<1,1,0>(ifrom, ito, thr);
+	} else {
+	  if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	  else eval<1,0,0>(ifrom, ito, thr);
+	}
+      } else {
+	if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+	else eval<0,0,0>(ifrom, ito, thr);
+      }
+    }
+    
+    sync_threads();
+    reduce_thr(this, eflag, vflag, thr, nproxy);
+  } // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLJClass2CoulPPPMOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double r,rsq,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = ecoul = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r2inv = 1.0/rsq;
+
+	if (rsq < cut_coulsq) {
+	  r = sqrt(rsq);
+	  grij = g_ewald * r;
+	  expm2 = exp(-grij*grij);
+	  t = 1.0 / (1.0 + EWALD_P*grij);
+	  erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	  prefactor = qqrd2e * qtmp*q[j]/r;
+	  forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	  if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  rinv = sqrt(r2inv);
+	  r3inv = r2inv*rinv;
+	  r6inv = r3inv*r3inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
+	  forcelj *= factor_lj;
+	} else forcelj = 0.0;
+
+	fpair = (forcecoul + forcelj) * r2inv;
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) {
+	  if (rsq < cut_coulsq) {
+	    ecoul = prefactor*erfc;
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
+	      offset[itype][jtype];
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
+	}
+	
+	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLJClass2CoulPPPMOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLJClass2CoulLong::memory_usage();
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_lj_class2_coul_pppm_omp.h
+++ b/src/USER-OMP/pair_lj_class2_coul_pppm_omp.h
@ -0,0 +1,54 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/class2/coul/pppm/omp,PairLJClass2CoulPPPMOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CLASS2_COUL_PPPM_OMP_H
+#define LMP_PAIR_LJ_CLASS2_COUL_PPPM_OMP_H
+
+#include "pair_lj_class2_coul_long.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJClass2CoulPPPMOMP : public PairLJClass2CoulLong, public ThrOMP {
+
+ public:
+  PairLJClass2CoulPPPMOMP(class LAMMPS *);
+  virtual ~PairLJClass2CoulPPPMOMP() {};
+
+  virtual void init_style();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+
+  class PPPMProxy *kspace;
+  int nproxy;
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_lubricate_omp.cpp
+++ b/src/USER-OMP/pair_lubricate_omp.cpp
@ -0,0 +1,413 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_lubricate_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "domain.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "update.h"
+#include "random_mars.h"
+#include "math_const.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+/* ---------------------------------------------------------------------- */
+
+PairLubricateOMP::PairLubricateOMP(LAMMPS *lmp) :
+  PairLubricate(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairLubricateOMP::~PairLubricateOMP() 
+{}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLubricateOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (flaglog) {
+      if (evflag) {
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else { 
+      if (evflag) {
+	if (force->newton_pair) eval<0,1,1>(ifrom, ito, thr);
+	else eval<0,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+	else eval<0,0,0>(ifrom, ito, thr);
+      }
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int FLAGLOG, int EVFLAG, int NEWTON_PAIR>
+void PairLubricateOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
+  double rsq,r,h_sep,radi;
+  double vr1,vr2,vr3,vnnr,vn1,vn2,vn3;
+  double vt1,vt2,vt3,wt1,wt2,wt3,wdotn;  
+  double vRS0;
+  double vi[3],vj[3],wi[3],wj[3],xl[3];
+  double a_sq,a_sh,a_pu;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  double lamda[3],vstream[3];
+
+  double vxmu2f = force->vxmu2f;
+
+  double * const * const x = atom->x;
+  double * const * const v = atom->v;
+  double * const * const f = thr->get_f();
+  double * const * const omega = atom->omega;
+  double * const * const torque = thr->get_torque();
+  const double * const radius = atom->radius;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+
+  int overlaps = 0;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // subtract streaming component of velocity, omega, angmom
+  // assume fluid streaming velocity = box deformation rate
+  // vstream = (ux,uy,uz)
+  // ux = h_rate[0]*x + h_rate[5]*y + h_rate[4]*z
+  // uy = h_rate[1]*y + h_rate[3]*z
+  // uz = h_rate[2]*z
+  // omega_new = omega - curl(vstream)/2
+  // angmom_new = angmom - I*curl(vstream)/2
+  // Ef = (grad(vstream) + (grad(vstream))^T) / 2
+
+  if (shearing) {
+    double *h_rate = domain->h_rate;
+    double *h_ratelo = domain->h_ratelo;
+
+    for (ii = iifrom; ii < iito; ii++) {
+      i = ilist[ii];
+      itype = type[i];
+      radi = radius[i];
+      
+      domain->x2lamda(x[i],lamda);
+      vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] + 
+	h_rate[4]*lamda[2] + h_ratelo[0];
+      vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1];
+      vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2];
+      v[i][0] -= vstream[0];
+      v[i][1] -= vstream[1];
+      v[i][2] -= vstream[2];
+
+      omega[i][0] += 0.5*h_rate[3];
+      omega[i][1] -= 0.5*h_rate[4];
+      omega[i][2] += 0.5*h_rate[5];
+    }
+
+    // set Ef from h_rate in strain units
+
+    Ef[0][0] = h_rate[0]/domain->xprd;
+    Ef[1][1] = h_rate[1]/domain->yprd;
+    Ef[2][2] = h_rate[2]/domain->zprd; 
+    Ef[0][1] = Ef[1][0] = 0.5 * h_rate[5]/domain->yprd;
+    Ef[0][2] = Ef[2][0] = 0.5 * h_rate[4]/domain->zprd;
+    Ef[1][2] = Ef[2][1] = 0.5 * h_rate[3]/domain->zprd;
+
+    // copy updated velocity/omega/angmom to the ghost particles
+    // no need to do this if not shearing since comm->ghost_velocity is set
+
+    sync_threads();
+    comm->forward_comm_pair(this);
+  }
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    radi = radius[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    
+    // angular velocity
+
+    wi[0] = omega[i][0];
+    wi[1] = omega[i][1];
+    wi[2] = omega[i][2];          
+     
+    // FLD contribution to force and torque due to isotropic terms
+    // FLD contribution to stress from isotropic RS0
+
+    if (flagfld) {
+      f[i][0] -= vxmu2f*R0*v[i][0];
+      f[i][1] -= vxmu2f*R0*v[i][1];
+      f[i][2] -= vxmu2f*R0*v[i][2];    
+      torque[i][0] -= vxmu2f*RT0*wi[0];
+      torque[i][1] -= vxmu2f*RT0*wi[1];
+      torque[i][2] -= vxmu2f*RT0*wi[2];   
+
+      if (shearing && vflag_either) {
+	vRS0 = -vxmu2f * RS0;
+	v_tally_tensor(i,i,nlocal,NEWTON_PAIR,
+		       vRS0*Ef[0][0],vRS0*Ef[1][1],vRS0*Ef[2][2],
+		       vRS0*Ef[0][1],vRS0*Ef[0][2],vRS0*Ef[1][2]);
+      }
+    }
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+        r = sqrt(rsq);  
+  
+        // angular momentum = I*omega = 2/5 * M*R^2 * omega
+	
+	wj[0] = omega[j][0];
+	wj[1] = omega[j][1];
+	wj[2] = omega[j][2];              
+
+        // xl = point of closest approach on particle i from its center
+
+        xl[0] = -delx/r*radi;
+        xl[1] = -dely/r*radi;
+        xl[2] = -delz/r*radi;
+  
+        // velocity at the point of closest approach on both particles
+        // v = v + omega_cross_xl - Ef.xl
+
+        // particle i
+
+        vi[0] = v[i][0] + (wi[1]*xl[2] - wi[2]*xl[1])
+                        - (Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]);
+        
+        vi[1] = v[i][1] + (wi[2]*xl[0] - wi[0]*xl[2])
+                        - (Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]);
+        
+        vi[2] = v[i][2] + (wi[0]*xl[1] - wi[1]*xl[0])
+                        - (Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]);
+  
+        // particle j
+
+        vj[0] = v[j][0] - (wj[1]*xl[2] - wj[2]*xl[1])
+                        + (Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]);
+        
+        vj[1] = v[j][1] - (wj[2]*xl[0] - wj[0]*xl[2])
+                        + (Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]);
+        
+        vj[2] = v[j][2] - (wj[0]*xl[1] - wj[1]*xl[0])
+                        + (Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]);
+        
+        // scalar resistances XA and YA
+
+        h_sep = r - 2.0*radi;
+        
+        // check for overlaps
+
+        if (h_sep < 0.0) overlaps++;
+        
+        // if less than the minimum gap use the minimum gap instead
+
+        if (r < cut_inner[itype][jtype])
+          h_sep = cut_inner[itype][jtype] - 2.0*radi;          
+        
+        // scale h_sep by radi
+
+        h_sep = h_sep/radi;
+  
+        // scalar resistances
+
+        if (FLAGLOG) {
+          a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep + 9.0/40.0*log(1.0/h_sep));
+          a_sh = 6.0*MY_PI*mu*radi*(1.0/6.0*log(1.0/h_sep));
+          a_pu = 8.0*MY_PI*mu*pow(radi,3)*(3.0/160.0*log(1.0/h_sep));
+        } else
+          a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep);
+  
+        // relative velocity at the point of closest approach
+	// includes fluid velocity
+
+        vr1 = vi[0] - vj[0];
+        vr2 = vi[1] - vj[1];
+        vr3 = vi[2] - vj[2];
+        
+        // normal component (vr.n)n
+
+        vnnr = (vr1*delx + vr2*dely + vr3*delz)/r;
+        vn1 = vnnr*delx/r;
+        vn2 = vnnr*dely/r;
+        vn3 = vnnr*delz/r;
+
+        // tangential component vr - (vr.n)n
+
+        vt1 = vr1 - vn1;
+        vt2 = vr2 - vn2;
+        vt3 = vr3 - vn3;
+
+        // force due to squeeze type motion
+
+        fx  = a_sq*vn1;
+        fy  = a_sq*vn2;
+        fz  = a_sq*vn3;
+
+        // force due to all shear kind of motions
+
+        if (FLAGLOG) {
+          fx = fx + a_sh*vt1;
+          fy = fy + a_sh*vt2;
+          fz = fz + a_sh*vt3;                  
+        }
+
+        // scale forces for appropriate units
+
+        fx *= vxmu2f;
+        fy *= vxmu2f;
+        fz *= vxmu2f;
+        
+        // add to total force
+
+        f[i][0] -= fx;
+        f[i][1] -= fy;
+        f[i][2] -= fz;    
+        
+        if (NEWTON_PAIR || j < nlocal) {
+          f[j][0] += fx;
+          f[j][1] += fy;
+          f[j][2] += fz;    
+        }
+  
+        // torque due to this force
+
+        if (FLAGLOG) {
+          tx = xl[1]*fz - xl[2]*fy;
+          ty = xl[2]*fx - xl[0]*fz;
+          tz = xl[0]*fy - xl[1]*fx;                  
+    
+          torque[i][0] -= vxmu2f*tx;
+          torque[i][1] -= vxmu2f*ty;
+          torque[i][2] -= vxmu2f*tz;        
+            
+          if (NEWTON_PAIR || j < nlocal) {
+            torque[j][0] -= vxmu2f*tx;
+            torque[j][1] -= vxmu2f*ty;
+            torque[j][2] -= vxmu2f*tz;
+          }
+          
+          // torque due to a_pu
+
+          wdotn = ((wi[0]-wj[0])*delx + (wi[1]-wj[1])*dely + 
+		   (wi[2]-wj[2])*delz)/r;
+          wt1 = (wi[0]-wj[0]) - wdotn*delx/r;
+          wt2 = (wi[1]-wj[1]) - wdotn*dely/r;
+          wt3 = (wi[2]-wj[2]) - wdotn*delz/r;
+          
+          tx = a_pu*wt1;
+          ty = a_pu*wt2;
+          tz = a_pu*wt3;
+          
+          torque[i][0] -= vxmu2f*tx;
+          torque[i][1] -= vxmu2f*ty;
+          torque[i][2] -= vxmu2f*tz;        
+            
+          if (NEWTON_PAIR || j < nlocal) {
+            torque[j][0] += vxmu2f*tx;
+            torque[j][1] += vxmu2f*ty;
+            torque[j][2] += vxmu2f*tz;
+          }
+        }        
+
+        if (EVFLAG) ev_tally_xyz(i,j,nlocal,NEWTON_PAIR,
+				 0.0,0.0,-fx,-fy,-fz,delx,dely,delz);
+      }
+    }
+  }
+  
+  // restore streaming component of velocity, omega, angmom
+
+  if (shearing) {
+    double *h_rate = domain->h_rate;
+    double *h_ratelo = domain->h_ratelo;
+
+    for (ii = iifrom; ii < iito; ii++) {
+      i = ilist[ii];
+      itype = type[i];
+      radi = radius[i];
+      
+      domain->x2lamda(x[i],lamda);
+      vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] + 
+	h_rate[4]*lamda[2] + h_ratelo[0];
+      vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1];
+      vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2];
+      v[i][0] += vstream[0];
+      v[i][1] += vstream[1];
+      v[i][2] += vstream[2];
+
+      omega[i][0] -= 0.5*h_rate[3];
+      omega[i][1] += 0.5*h_rate[4];
+      omega[i][2] -= 0.5*h_rate[5];
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLubricateOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLubricate::memory_usage();
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_lubricate_omp.h
+++ b/src/USER-OMP/pair_lubricate_omp.h
@ -0,0 +1,49 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lubricate/omp,PairLubricateOMP)
+
+#else
+
+#ifndef LMP_PAIR_LUBRICATE_OMP_H
+#define LMP_PAIR_LUBRICATE_OMP_H
+
+#include "pair_lubricate.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLubricateOMP : public PairLubricate, public ThrOMP {
+
+ public:
+  PairLubricateOMP(class LAMMPS *);
+  virtual ~PairLubricateOMP();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int LOGFLAG, int EVFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_lubricate_poly_omp.cpp
+++ b/src/USER-OMP/pair_lubricate_poly_omp.cpp
@ -0,0 +1,418 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_lubricate_poly_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "domain.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "update.h"
+#include "random_mars.h"
+#include "math_const.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+/* ---------------------------------------------------------------------- */
+
+PairLubricatePolyOMP::PairLubricatePolyOMP(LAMMPS *lmp) :
+  PairLubricatePoly(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairLubricatePolyOMP::~PairLubricatePolyOMP() 
+{}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLubricatePolyOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (flaglog) {
+      if (shearing) {
+	if (evflag)
+	  eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (evflag)
+	  eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else { 
+      if (shearing) {
+	if (evflag)
+	  eval<0,1,1>(ifrom, ito, thr);
+	else eval<0,1,0>(ifrom, ito, thr);
+      } else {
+	if (evflag)
+	  eval<0,0,1>(ifrom, ito, thr);
+	else eval<0,0,0>(ifrom, ito, thr);
+      }
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int FLAGLOG, int SHEARING, int EVFLAG>
+void PairLubricatePolyOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
+  double rsq,r,h_sep,beta0,beta1,radi,radj;
+  double vr1,vr2,vr3,vnnr,vn1,vn2,vn3;
+  double vt1,vt2,vt3,wt1,wt2,wt3,wdotn;  
+  double vRS0;
+  double vi[3],vj[3],wi[3],wj[3],xl[3],jl[3];
+  double a_sq,a_sh,a_pu;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  double lamda[3],vstream[3];
+
+  double vxmu2f = force->vxmu2f;
+
+  double * const * const x = atom->x;
+  double * const * const v = atom->v;
+  double * const * const f = thr->get_f();
+  double * const * const omega = atom->omega;
+  double * const * const torque = thr->get_torque();
+  const double * const radius = atom->radius;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+
+  int overlaps = 0;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // subtract streaming component of velocity, omega, angmom
+  // assume fluid streaming velocity = box deformation rate
+  // vstream = (ux,uy,uz)
+  // ux = h_rate[0]*x + h_rate[5]*y + h_rate[4]*z
+  // uy = h_rate[1]*y + h_rate[3]*z
+  // uz = h_rate[2]*z
+  // omega_new = omega - curl(vstream)/2
+  // angmom_new = angmom - I*curl(vstream)/2
+  // Ef = (grad(vstream) + (grad(vstream))^T) / 2
+
+  if (shearing) {
+    double *h_rate = domain->h_rate;
+    double *h_ratelo = domain->h_ratelo;
+
+    for (ii = iifrom; ii < iito; ii++) {
+      i = ilist[ii];
+      itype = type[i];
+      radi = radius[i];
+      domain->x2lamda(x[i],lamda);
+      vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] + 
+	h_rate[4]*lamda[2] + h_ratelo[0];
+      vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1];
+      vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2];
+      v[i][0] -= vstream[0];
+      v[i][1] -= vstream[1];
+      v[i][2] -= vstream[2];
+
+      omega[i][0] += 0.5*h_rate[3];
+      omega[i][1] -= 0.5*h_rate[4];
+      omega[i][2] += 0.5*h_rate[5];
+    }
+
+    // set Ef from h_rate in strain units
+
+    Ef[0][0] = h_rate[0]/domain->xprd;
+    Ef[1][1] = h_rate[1]/domain->yprd;
+    Ef[2][2] = h_rate[2]/domain->zprd; 
+    Ef[0][1] = Ef[1][0] = 0.5 * h_rate[5]/domain->yprd;
+    Ef[0][2] = Ef[2][0] = 0.5 * h_rate[4]/domain->zprd;
+    Ef[1][2] = Ef[2][1] = 0.5 * h_rate[3]/domain->zprd;
+
+    // copy updated omega to the ghost particles
+    // no need to do this if not shearing since comm->ghost_velocity is set
+
+    sync_threads();
+    comm->forward_comm_pair(this);
+  }
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    radi = radius[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    
+    // angular velocity
+
+    wi[0] = omega[i][0];
+    wi[1] = omega[i][1];
+    wi[2] = omega[i][2];          
+     
+    // FLD contribution to force and torque due to isotropic terms
+    // FLD contribution to stress from isotropic RS0
+
+    if (flagfld) {
+      f[i][0] -= vxmu2f*R0*radi*v[i][0];
+      f[i][1] -= vxmu2f*R0*radi*v[i][1];
+      f[i][2] -= vxmu2f*R0*radi*v[i][2];
+      const double rad3 = radi*radi*radi;
+      torque[i][0] -= vxmu2f*RT0*rad3*wi[0];
+      torque[i][1] -= vxmu2f*RT0*rad3*wi[1];
+      torque[i][2] -= vxmu2f*RT0*rad3*wi[2];   
+    
+      if (SHEARING && vflag_either) {
+	vRS0 = -vxmu2f * RS0*rad3;
+	v_tally_tensor(i,i,nlocal,/* newton_pair */ 0,
+		       vRS0*Ef[0][0],vRS0*Ef[1][1],vRS0*Ef[2][2],
+		       vRS0*Ef[0][1],vRS0*Ef[0][2],vRS0*Ef[1][2]);
+      }
+    }
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+      radj = atom->radius[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+        r = sqrt(rsq);  
+  
+        // angular momentum = I*omega = 2/5 * M*R^2 * omega
+	
+	wj[0] = omega[j][0];
+	wj[1] = omega[j][1];
+	wj[2] = omega[j][2];              
+
+        // xl = point of closest approach on particle i from its center
+
+        xl[0] = -delx/r*radi;
+        xl[1] = -dely/r*radi;
+        xl[2] = -delz/r*radi;
+        jl[0] = -delx/r*radj;
+        jl[1] = -dely/r*radj;
+        jl[2] = -delz/r*radj;
+  
+        // velocity at the point of closest approach on both particles
+        // v = v + omega_cross_xl - Ef.xl
+
+        // particle i
+
+        vi[0] = v[i][0] + (wi[1]*xl[2] - wi[2]*xl[1])
+                        - (Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]);
+        
+        vi[1] = v[i][1] + (wi[2]*xl[0] - wi[0]*xl[2])
+                        - (Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]);
+        
+        vi[2] = v[i][2] + (wi[0]*xl[1] - wi[1]*xl[0])
+                        - (Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]);
+  
+        // particle j
+
+        vj[0] = v[j][0] - (wj[1]*jl[2] - wj[2]*jl[1])
+                        + (Ef[0][0]*jl[0] + Ef[0][1]*jl[1] + Ef[0][2]*jl[2]);
+        
+        vj[1] = v[j][1] - (wj[2]*jl[0] - wj[0]*jl[2])
+                        + (Ef[1][0]*jl[0] + Ef[1][1]*jl[1] + Ef[1][2]*jl[2]);
+        
+        vj[2] = v[j][2] - (wj[0]*jl[1] - wj[1]*jl[0])
+                        + (Ef[2][0]*jl[0] + Ef[2][1]*jl[1] + Ef[2][2]*jl[2]);
+        
+        // scalar resistances XA and YA
+
+        h_sep = r - radi-radj;
+        
+        // check for overlaps
+
+        if (h_sep < 0.0) overlaps++;
+        
+        // if less than the minimum gap use the minimum gap instead
+
+        if (r < cut_inner[itype][jtype])
+          h_sep = cut_inner[itype][jtype] - radi-radj;          
+        
+        // scale h_sep by radi
+
+        h_sep = h_sep/radi;
+        beta0 = radj/radi;
+        beta1 = 1.0 + beta0;
+  
+        // scalar resistances
+
+        if (FLAGLOG) {
+          a_sq = beta0*beta0/beta1/beta1/h_sep + 
+	    (1.0+7.0*beta0+beta0*beta0)/5.0/pow(beta1,3)*log(1.0/h_sep);
+          a_sq += (1.0+18.0*beta0-29.0*beta0*beta0+18.0 * 
+		   pow(beta0,3)+pow(beta0,4))/21.0/pow(beta1,4) * 
+	    h_sep*log(1.0/h_sep);
+          a_sq *= 6.0*MY_PI*mu*radi;
+          a_sh = 4.0*beta0*(2.0+beta0+2.0*beta0*beta0)/15.0/pow(beta1,3) * 
+	    log(1.0/h_sep);
+          a_sh += 4.0*(16.0-45.0*beta0+58.0*beta0*beta0-45.0*pow(beta0,3) + 
+		       16.0*pow(beta0,4))/375.0/pow(beta1,4) * 
+	    h_sep*log(1.0/h_sep);
+          a_sh *= 6.0*MY_PI*mu*radi;
+          a_pu = beta0*(4.0+beta0)/10.0/beta1/beta1*log(1.0/h_sep);
+          a_pu += (32.0-33.0*beta0+83.0*beta0*beta0+43.0 * 
+		   pow(beta0,3))/250.0/pow(beta1,3)*h_sep*log(1.0/h_sep);
+          a_pu *= 8.0*MY_PI*mu*pow(radi,3);
+        } else a_sq = 6.0*MY_PI*mu*radi*(beta0*beta0/beta1/beta1/h_sep);
+  
+        // relative velocity at the point of closest approach
+	// includes fluid velocity
+
+        vr1 = vi[0] - vj[0];
+        vr2 = vi[1] - vj[1];
+        vr3 = vi[2] - vj[2];
+        
+        // normal component (vr.n)n
+
+        vnnr = (vr1*delx + vr2*dely + vr3*delz)/r;
+        vn1 = vnnr*delx/r;
+        vn2 = vnnr*dely/r;
+        vn3 = vnnr*delz/r;
+
+        // tangential component vr - (vr.n)n
+
+        vt1 = vr1 - vn1;
+        vt2 = vr2 - vn2;
+        vt3 = vr3 - vn3;
+
+        // force due to squeeze type motion
+
+        fx  = a_sq*vn1;
+        fy  = a_sq*vn2;
+        fz  = a_sq*vn3;
+
+        // force due to all shear kind of motions
+
+        if (FLAGLOG) {
+          fx = fx + a_sh*vt1;
+          fy = fy + a_sh*vt2;
+          fz = fz + a_sh*vt3;                  
+        }
+
+        // scale forces for appropriate units
+
+        fx *= vxmu2f;
+        fy *= vxmu2f;
+        fz *= vxmu2f;
+        
+        // add to total force
+
+        f[i][0] -= fx;
+        f[i][1] -= fy;
+        f[i][2] -= fz;    
+        
+        // torque due to this force
+
+        if (FLAGLOG) {
+          tx = xl[1]*fz - xl[2]*fy;
+          ty = xl[2]*fx - xl[0]*fz;
+          tz = xl[0]*fy - xl[1]*fx;                  
+    
+          torque[i][0] -= vxmu2f*tx;
+          torque[i][1] -= vxmu2f*ty;
+          torque[i][2] -= vxmu2f*tz;        
+
+          // torque due to a_pu
+
+          wdotn = ((wi[0]-wj[0])*delx + (wi[1]-wj[1])*dely + 
+		   (wi[2]-wj[2])*delz)/r;
+          wt1 = (wi[0]-wj[0]) - wdotn*delx/r;
+          wt2 = (wi[1]-wj[1]) - wdotn*dely/r;
+          wt3 = (wi[2]-wj[2]) - wdotn*delz/r;
+          
+          tx = a_pu*wt1;
+          ty = a_pu*wt2;
+          tz = a_pu*wt3;
+          
+          torque[i][0] -= vxmu2f*tx;
+          torque[i][1] -= vxmu2f*ty;
+          torque[i][2] -= vxmu2f*tz;        
+
+        }        
+
+        if (EVFLAG) ev_tally_xyz(i,nlocal,nlocal, /* newton_pair */ 0,
+				 0.0,0.0,-fx,-fy,-fz,delx,dely,delz);
+      }
+    }
+  }
+  
+  // restore streaming component of velocity, omega, angmom
+
+  if (SHEARING) {
+    double *h_rate = domain->h_rate;
+    double *h_ratelo = domain->h_ratelo;
+
+    for (ii = iifrom; ii < iito; ii++) {
+      i = ilist[ii];
+      itype = type[i];
+      radi = radius[i];
+      
+      domain->x2lamda(x[i],lamda);
+      vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] + 
+	h_rate[4]*lamda[2] + h_ratelo[0];
+      vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1];
+      vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2];
+      v[i][0] += vstream[0];
+      v[i][1] += vstream[1];
+      v[i][2] += vstream[2];
+
+      omega[i][0] -= 0.5*h_rate[3];
+      omega[i][1] += 0.5*h_rate[4];
+      omega[i][2] -= 0.5*h_rate[5];
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLubricatePolyOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLubricatePoly::memory_usage();
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_lubricate_poly_omp.h
+++ b/src/USER-OMP/pair_lubricate_poly_omp.h
@ -0,0 +1,49 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lubricate/poly/omp,PairLubricateOMP)
+
+#else
+
+#ifndef LMP_PAIR_LUBRICATE_POLY_OMP_H
+#define LMP_PAIR_LUBRICATE_POLY_OMP_H
+
+#include "pair_lubricate_poly.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLubricatePolyOMP : public PairLubricatePoly, public ThrOMP {
+
+ public:
+  PairLubricatePolyOMP(class LAMMPS *);
+  virtual ~PairLubricatePolyOMP();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int LOGFLAG, int EVFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pppm_cg_omp.cpp
+++ b/src/USER-OMP/pppm_cg_omp.cpp
@ -0,0 +1,517 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "pppm_cg_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "domain.h"
+#include "force.h"
+#include "memory.h"
+#include "math_const.h"
+
+#include <string.h>
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALLQ 0.00001
+#if defined(FFT_SINGLE)
+#define ZEROF 0.0f
+#else
+#define ZEROF 0.0
+#endif
+
+#define EPS_HOC 1.0e-7
+
+/* ---------------------------------------------------------------------- */
+
+PPPMCGOMP::PPPMCGOMP(LAMMPS *lmp, int narg, char **arg) :
+  PPPMCG(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE)
+{
+}
+
+/* ----------------------------------------------------------------------
+   allocate memory that depends on # of K-vectors and order 
+------------------------------------------------------------------------- */
+
+void PPPMCGOMP::allocate()
+{
+  PPPMCG::allocate();
+
+  const int nthreads = comm->nthreads;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    FFT_SCALAR **rho1d_thr;
+    memory->create2d_offset(rho1d_thr,3,-order/2,order/2,"pppm:rho1d_thr");
+    ThrData *thr = fix->get_thr(tid);
+    thr->init_pppm(static_cast<void *>(rho1d_thr));
+  }
+
+  const int nzend = (nzhi_out-nzlo_out+1)*nthreads + nzlo_out -1;
+
+  // reallocate density brick, so it fits our needs
+  memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->create3d_offset(density_brick,nzlo_out,nzend,nylo_out,nyhi_out,
+			  nxlo_out,nxhi_out,"pppm:density_brick");
+}
+
+// NOTE: special version of reduce_data for FFT_SCALAR data type.
+// reduce per thread data into the first part of the data
+// array that is used for the non-threaded parts and reset
+// the temporary storage to 0.0. this routine depends on
+// multi-dimensional arrays like force stored in this order
+// x1,y1,z1,x2,y2,z2,...
+// we need to post a barrier to wait until all threads are done
+// writing to the array.
+static void data_reduce_fft(FFT_SCALAR *dall, int nall, int nthreads, int ndim, int tid)
+{
+#if defined(_OPENMP)
+  // NOOP in non-threaded execution.
+  if (nthreads == 1) return;
+#pragma omp barrier
+  {
+    const int nvals = ndim*nall;
+    const int idelta = nvals/nthreads + 1;
+    const int ifrom = tid*idelta;
+    const int ito   = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta);
+
+    // this if protects against having more threads than atoms
+    if (ifrom < nall) { 
+      for (int m = ifrom; m < ito; ++m) {
+	for (int n = 1; n < nthreads; ++n) {
+	  dall[m] += dall[n*nvals + m];
+	  dall[n*nvals + m] = 0.0;
+	}
+      }
+    }
+  }
+#else
+  // NOOP in non-threaded execution.
+  return;
+#endif
+}
+
+/* ----------------------------------------------------------------------
+   free memory that depends on # of K-vectors and order 
+------------------------------------------------------------------------- */
+
+void PPPMCGOMP::deallocate()
+{
+  PPPMCG::deallocate();
+  for (int i=0; i < comm->nthreads; ++i) {
+    ThrData * thr = fix->get_thr(i);
+    FFT_SCALAR ** rho1d_thr = static_cast<FFT_SCALAR **>(thr->get_rho1d());
+    memory->destroy2d_offset(rho1d_thr,-order/2);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   adjust PPPMCG coeffs, called initially and whenever volume has changed 
+------------------------------------------------------------------------- */
+
+void PPPMCGOMP::setup()
+{
+  int i,j,k,n;
+  double *prd;
+
+  // volume-dependent factors
+  // adjust z dimension for 2d slab PPPM
+  // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
+
+  if (triclinic == 0) prd = domain->prd;
+  else prd = domain->prd_lamda;
+
+  const double xprd = prd[0];
+  const double yprd = prd[1];
+  const double zprd = prd[2];
+  const double zprd_slab = zprd*slab_volfactor;
+  volume = xprd * yprd * zprd_slab;
+    
+  delxinv = nx_pppm/xprd;
+  delyinv = ny_pppm/yprd;
+  delzinv = nz_pppm/zprd_slab;
+
+  delvolinv = delxinv*delyinv*delzinv;
+
+  const double unitkx = (2.0*MY_PI/xprd);
+  const double unitky = (2.0*MY_PI/yprd);
+  const double unitkz = (2.0*MY_PI/zprd_slab);
+
+  // fkx,fky,fkz for my FFT grid pts
+
+  double per;
+
+  for (i = nxlo_fft; i <= nxhi_fft; i++) {
+    per = i - nx_pppm*(2*i/nx_pppm);
+    fkx[i] = unitkx*per;
+  }
+
+  for (i = nylo_fft; i <= nyhi_fft; i++) {
+    per = i - ny_pppm*(2*i/ny_pppm);
+    fky[i] = unitky*per;
+  }
+
+  for (i = nzlo_fft; i <= nzhi_fft; i++) {
+    per = i - nz_pppm*(2*i/nz_pppm);
+    fkz[i] = unitkz*per;
+  }
+
+  // virial coefficients
+
+  double sqk,vterm;
+
+  n = 0;
+  for (k = nzlo_fft; k <= nzhi_fft; k++) {
+    for (j = nylo_fft; j <= nyhi_fft; j++) {
+      for (i = nxlo_fft; i <= nxhi_fft; i++) {
+	sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
+	if (sqk == 0.0) {
+	  vg[n][0] = 0.0;
+	  vg[n][1] = 0.0;
+	  vg[n][2] = 0.0;
+	  vg[n][3] = 0.0;
+	  vg[n][4] = 0.0;
+	  vg[n][5] = 0.0;
+	} else {
+	  vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
+	  vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
+	  vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
+	  vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
+	  vg[n][3] = vterm*fkx[i]*fky[j];
+	  vg[n][4] = vterm*fkx[i]*fkz[k];
+	  vg[n][5] = vterm*fky[j]*fkz[k];
+	}
+	n++;
+      }
+    }
+  }
+
+  // modified (Hockney-Eastwood) Coulomb Green's function
+
+  const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) * 
+				    pow(-log(EPS_HOC),0.25));
+  const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) * 
+				    pow(-log(EPS_HOC),0.25));
+  const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * 
+				    pow(-log(EPS_HOC),0.25));
+
+  const double form = 1.0;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+    int tid,nn,nnfrom,nnto,nx,ny,nz,k,l,m;
+    double snx,sny,snz,sqk;
+    double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
+    double sum1,dot1,dot2;
+    double numerator,denominator;
+    const double gew2 = -4.0*g_ewald*g_ewald;
+    
+    const int nnx = nxhi_fft-nxlo_fft+1;
+    const int nny = nyhi_fft-nylo_fft+1;
+    
+    loop_setup_thr(nnfrom, nnto, tid, nfft, comm->nthreads);
+    
+    for (m = nzlo_fft; m <= nzhi_fft; m++) {
+      
+      const double fkzm = fkz[m];
+      snz = sin(0.5*fkzm*zprd_slab/nz_pppm);
+      snz *= snz;
+
+      for (l = nylo_fft; l <= nyhi_fft; l++) {
+	const double fkyl = fky[l];
+	sny = sin(0.5*fkyl*yprd/ny_pppm);
+	sny *= sny;
+
+	for (k = nxlo_fft; k <= nxhi_fft; k++) {
+
+	  /* only compute the part designated to this thread */
+	  nn = k-nxlo_fft + nnx*(l-nylo_fft + nny*(m-nzlo_fft));
+	  if ((nn < nnfrom) || (nn >=nnto)) continue;
+
+	  const double fkxk = fkx[k];
+	  snx = sin(0.5*fkxk*xprd/nx_pppm);
+	  snx *= snx;
+      
+	  sqk = fkxk*fkxk + fkyl*fkyl + fkzm*fkzm;
+
+	  if (sqk != 0.0) {
+	    numerator = form*MY_4PI/sqk;
+	    denominator = gf_denom(snx,sny,snz);  
+	    sum1 = 0.0;
+	    for (nx = -nbx; nx <= nbx; nx++) {
+	      qx = fkxk + unitkx*nx_pppm*nx;
+	      sx = exp(qx*qx/gew2);
+	      wx = 1.0;
+	      argx = 0.5*qx*xprd/nx_pppm;
+	      if (argx != 0.0) wx = pow(sin(argx)/argx,order);
+	      wx *=wx;
+
+	      for (ny = -nby; ny <= nby; ny++) {
+		qy = fkyl + unitky*ny_pppm*ny;
+		sy = exp(qy*qy/gew2);
+		wy = 1.0;
+		argy = 0.5*qy*yprd/ny_pppm;
+		if (argy != 0.0) wy = pow(sin(argy)/argy,order);
+		wy *= wy;
+
+		for (nz = -nbz; nz <= nbz; nz++) {
+		  qz = fkzm + unitkz*nz_pppm*nz;
+		  sz = exp(qz*qz/gew2);
+		  wz = 1.0;
+		  argz = 0.5*qz*zprd_slab/nz_pppm;
+		  if (argz != 0.0) wz = pow(sin(argz)/argz,order);
+		  wz *= wz;
+
+		  dot1 = fkxk*qx + fkyl*qy + fkzm*qz;
+		  dot2 = qx*qx+qy*qy+qz*qz;
+		  sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
+		}
+	      }
+	    }
+	    greensfn[nn] = numerator*sum1/denominator;
+	  } else greensfn[nn] = 0.0;
+	}
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   run the regular toplevel compute method from plain PPPPMCG 
+   which will have individual methods replaced by our threaded
+   versions and then call the obligatory force reduction.
+------------------------------------------------------------------------- */
+
+void PPPMCGOMP::compute(int eflag, int vflag)
+{
+
+  PPPMCG::compute(eflag,vflag);
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+    ThrData *thr = fix->get_thr(tid);
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+/* ----------------------------------------------------------------------
+   create discretized "density" on section of global grid due to my particles
+   density(x,y,z) = charge "density" at grid points of my 3d brick
+   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+   in global grid 
+------------------------------------------------------------------------- */
+
+void PPPMCGOMP::make_rho()
+{
+  const double * const q = atom->q;
+  const double * const * const x = atom->x;
+  const int nthreads = comm->nthreads;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+  {  
+    // each thread works on a fixed chunk of atoms.
+    const int tid = omp_get_thread_num();
+    const int inum = num_charged;
+    const int idelta = 1 + inum/nthreads;
+    const int ifrom = tid*idelta;
+    const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
+#else
+    const int tid = 0;
+    const int ifrom = 0;
+    const int ito = num_charged;
+#endif
+
+    int i,j,l,m,n,nx,ny,nz,mx,my,mz;
+    FFT_SCALAR dx,dy,dz,x0,y0,z0;
+
+    // set up clear 3d density array
+    const int nzoffs = (nzhi_out-nzlo_out+1)*tid;
+    FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]);
+    memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR));
+
+    ThrData *thr = fix->get_thr(tid);
+    FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
+
+    // loop over my charges, add their contribution to nearby grid points
+    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+    // (dx,dy,dz) = distance to "lower left" grid pt
+    // (mx,my,mz) = global coords of moving stencil pt
+
+    // this if protects against having more threads than charged local atoms
+    if (ifrom < num_charged) {
+      for (j = ifrom; j < ito; j++) {
+	i = is_charged[j];
+
+	nx = part2grid[i][0];
+	ny = part2grid[i][1];
+	nz = part2grid[i][2];
+	dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+	dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+	dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+	compute_rho1d_thr(r1d,dx,dy,dz);
+
+	z0 = delvolinv * q[i];
+	for (n = nlower; n <= nupper; n++) {
+	  mz = n+nz;
+	  y0 = z0*r1d[2][n];
+	  for (m = nlower; m <= nupper; m++) {
+	    my = m+ny;
+	    x0 = y0*r1d[1][m];
+	    for (l = nlower; l <= nupper; l++) {
+	      mx = l+nx;
+	      db[mz][my][mx] += x0*r1d[0][l];
+	    }
+	  }
+	}
+      }
+    }
+#if defined(_OPENMP)
+    // reduce 3d density array
+    if (nthreads > 1) {
+      data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid);
+    }
+  }
+#endif
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get electric field & force on my particles 
+------------------------------------------------------------------------- */
+
+void PPPMCGOMP::fieldforce()
+{
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of E-field on particle
+
+  const double * const q = atom->q;
+  const double * const * const x = atom->x;
+  const int nthreads = comm->nthreads;
+  const double qqrd2e = force->qqrd2e;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+  {  
+    // each thread works on a fixed chunk of atoms.
+    const int tid = omp_get_thread_num();
+    const int inum = num_charged;
+    const int idelta = 1 + inum/nthreads;
+    const int ifrom = tid*idelta;
+    const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
+#else
+    const int ifrom = 0;
+    const int ito = num_charged;
+    const int tid = 0;
+#endif
+    ThrData *thr = fix->get_thr(tid);
+    double * const * const f = thr->get_f();
+    FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d());
+    
+    int i,j,l,m,n,nx,ny,nz,mx,my,mz;
+    FFT_SCALAR dx,dy,dz,x0,y0,z0;
+    FFT_SCALAR ekx,eky,ekz;
+
+    // this if protects against having more threads than charged local atoms
+    if (ifrom < num_charged) { 
+      for (j = ifrom; j < ito; j++) {
+	i = is_charged[j];
+
+	nx = part2grid[i][0];
+	ny = part2grid[i][1];
+	nz = part2grid[i][2];
+	dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+	dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+	dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+	compute_rho1d_thr(r1d,dx,dy,dz);
+
+	ekx = eky = ekz = ZEROF;
+	for (n = nlower; n <= nupper; n++) {
+	  mz = n+nz;
+	  z0 = r1d[2][n];
+	  for (m = nlower; m <= nupper; m++) {
+	    my = m+ny;
+	    y0 = z0*r1d[1][m];
+	    for (l = nlower; l <= nupper; l++) {
+	      mx = l+nx;
+	      x0 = y0*r1d[0][l];
+	      ekx -= x0*vdx_brick[mz][my][mx];
+	      eky -= x0*vdy_brick[mz][my][mx];
+	      ekz -= x0*vdz_brick[mz][my][mx];
+	    }
+	  }
+	}
+
+	// convert E-field to force
+	const double qfactor = qqrd2e*scale*q[i];
+	f[i][0] += qfactor*ekx;
+	f[i][1] += qfactor*eky;
+	f[i][2] += qfactor*ekz;
+      }
+    }
+#if defined(_OPENMP)
+  }
+#endif
+}
+
+/* ----------------------------------------------------------------------
+   charge assignment into rho1d
+   dx,dy,dz = distance of particle from "lower left" grid point 
+------------------------------------------------------------------------- */
+void PPPMCGOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx,
+				  const FFT_SCALAR &dy, const FFT_SCALAR &dz)
+{
+  int k,l;
+  FFT_SCALAR r1,r2,r3;
+
+  for (k = (1-order)/2; k <= order/2; k++) {
+    r1 = r2 = r3 = ZEROF;
+
+    for (l = order-1; l >= 0; l--) {
+      r1 = rho_coeff[l][k] + r1*dx;
+      r2 = rho_coeff[l][k] + r2*dy;
+      r3 = rho_coeff[l][k] + r3*dz;
+    }
+    r1d[0][k] = r1;
+    r1d[1][k] = r2;
+    r1d[2][k] = r3;
+  }
+}
+
--- a/src/USER-OMP/pppm_cg_omp.h
+++ b/src/USER-OMP/pppm_cg_omp.h
@ -0,0 +1,51 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef KSPACE_CLASS
+
+KSpaceStyle(pppm/cg/omp,PPPMCGOMP)
+
+#else
+
+#ifndef LMP_PPPM_CG_OMP_H
+#define LMP_PPPM_CG_OMP_H
+
+#include "pppm_cg.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+  class PPPMCGOMP : public PPPMCG, public ThrOMP {
+ public:
+  PPPMCGOMP(class LAMMPS *, int, char **);
+  virtual void compute(int, int);
+  virtual void setup();
+  virtual ~PPPMCGOMP () {};
+
+ protected:
+  virtual void allocate();
+  virtual void deallocate();
+  virtual void fieldforce();
+  virtual void make_rho();
+
+  void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &,
+			 const FFT_SCALAR &, const FFT_SCALAR &);
+//  void compute_rho_coeff();
+//  void slabcorr(int);
+
+};
+
+}
+
+#endif
+#endif