git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7265 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2011-12-01 17:05:34 +00:00 · 2011-12-01 17:05:34 +00:00 · 57a20c2706
parent 3895849986
commit 57a20c2706
94 changed files with 15319 additions and 0 deletions
--- a/src/USER-OMP/angle_charmm_omp.cpp
+++ b/src/USER-OMP/angle_charmm_omp.cpp
@ -0,0 +1,191 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_charmm_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleCharmmOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleCharmmOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double eangle,f1[3],f3[3];
+  double dtheta,tk;
+  double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22;
+  double delxUB,delyUB,delzUB,rsqUB,rUB,dr,rk,forceUB;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // Urey-Bradley bond
+
+    delxUB = x[i3][0] - x[i1][0];
+    delyUB = x[i3][1] - x[i1][1];
+    delzUB = x[i3][2] - x[i1][2];
+    domain->minimum_image(delxUB,delyUB,delzUB);
+
+    rsqUB = delxUB*delxUB + delyUB*delyUB + delzUB*delzUB;
+    rUB = sqrt(rsqUB);
+
+    // Urey-Bradley force & energy
+
+    dr = rUB - r_ub[type];
+    rk = k_ub[type] * dr;
+
+    if (rUB > 0.0) forceUB = -2.0*rk/rUB;
+    else forceUB = 0.0;
+
+    if (EFLAG) eangle = rk*dr;
+
+    // angle (cos and sin)
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+        
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+        
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0/s;
+
+    // harmonic force & energy
+
+    dtheta = acos(c) - theta0[type];
+    tk = k[type] * dtheta;
+
+    if (EFLAG) eangle += tk*dtheta;
+
+    a = -2.0 * tk * s;
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+
+    f1[0] = a11*delx1 + a12*delx2 - delxUB*forceUB;
+    f1[1] = a11*dely1 + a12*dely2 - delyUB*forceUB;
+    f1[2] = a11*delz1 + a12*delz2 - delzUB*forceUB;
+
+    f3[0] = a22*delx2 + a12*delx1 + delxUB*forceUB;
+    f3[1] = a22*dely2 + a12*dely1 + delyUB*forceUB;
+    f3[2] = a22*delz2 + a12*delz1 + delzUB*forceUB;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
--- a/src/USER-OMP/angle_charmm_omp.h
+++ b/src/USER-OMP/angle_charmm_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(charmm/omp,AngleCharmmOMP)
+
+#else
+
+#ifndef LMP_ANGLE_CHARMM_OMP_H
+#define LMP_ANGLE_CHARMM_OMP_H
+
+#include "angle_charmm.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleCharmmOMP : public AngleCharmm, public ThrOMP {
+
+ public:
+    AngleCharmmOMP(class LAMMPS *lmp) : 
+      AngleCharmm(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/angle_class2_omp.cpp
+++ b/src/USER-OMP/angle_class2_omp.cpp
@ -0,0 +1,234 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_class2_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleClass2OMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleClass2OMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double eangle,f1[3],f3[3];
+  double dtheta,dtheta2,dtheta3,dtheta4,de_angle;
+  double dr1,dr2,tk1,tk2,aa1,aa2,aa11,aa12,aa21,aa22;
+  double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22,b1,b2;
+  double vx11,vx12,vy11,vy12,vz11,vz12,vx21,vx22,vy21,vy22,vz21,vz22;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+        
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+        
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0/s;
+
+    // force & energy for angle term
+
+    dtheta = acos(c) - theta0[type];
+    dtheta2 = dtheta*dtheta;
+    dtheta3 = dtheta2*dtheta;
+    dtheta4 = dtheta3*dtheta;
+
+    de_angle = 2.0*k2[type]*dtheta + 3.0*k3[type]*dtheta2 + 
+      4.0*k4[type]*dtheta3;
+
+    a = -de_angle*s;
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    if (EFLAG) eangle = k2[type]*dtheta2 + k3[type]*dtheta3 + k4[type]*dtheta4;
+
+    // force & energy for bond-bond term
+
+    dr1 = r1 - bb_r1[type];
+    dr2 = r2 - bb_r2[type];
+    tk1 = bb_k[type] * dr1;
+    tk2 = bb_k[type] * dr2;
+
+    f1[0] -= delx1*tk2/r1;
+    f1[1] -= dely1*tk2/r1;
+    f1[2] -= delz1*tk2/r1;
+
+    f3[0] -= delx2*tk1/r2;
+    f3[1] -= dely2*tk1/r2;
+    f3[2] -= delz2*tk1/r2;
+
+    if (EFLAG) eangle += bb_k[type]*dr1*dr2;
+
+    // force & energy for bond-angle term
+
+    aa1 = s * dr1 * ba_k1[type];
+    aa2 = s * dr2 * ba_k2[type];
+
+    aa11 = aa1 * c / rsq1;
+    aa12 = -aa1 / (r1 * r2);
+    aa21 = aa2 * c / rsq1;
+    aa22 = -aa2 / (r1 * r2);
+
+    vx11 = (aa11 * delx1) + (aa12 * delx2);
+    vx12 = (aa21 * delx1) + (aa22 * delx2);
+    vy11 = (aa11 * dely1) + (aa12 * dely2);
+    vy12 = (aa21 * dely1) + (aa22 * dely2);
+    vz11 = (aa11 * delz1) + (aa12 * delz2);
+    vz12 = (aa21 * delz1) + (aa22 * delz2);
+
+    aa11 = aa1 * c / rsq2;
+    aa21 = aa2 * c / rsq2;
+
+    vx21 = (aa11 * delx2) + (aa12 * delx1);
+    vx22 = (aa21 * delx2) + (aa22 * delx1);
+    vy21 = (aa11 * dely2) + (aa12 * dely1);
+    vy22 = (aa21 * dely2) + (aa22 * dely1);
+    vz21 = (aa11 * delz2) + (aa12 * delz1);
+    vz22 = (aa21 * delz2) + (aa22 * delz1);
+
+    b1 = ba_k1[type] * dtheta / r1;
+    b2 = ba_k2[type] * dtheta / r2;
+
+    f1[0] -= vx11 + b1*delx1 + vx12;
+    f1[1] -= vy11 + b1*dely1 + vy12;
+    f1[2] -= vz11 + b1*delz1 + vz12;
+
+    f3[0] -= vx21 + b2*delx2 + vx22;
+    f3[1] -= vy21 + b2*dely2 + vy22;
+    f3[2] -= vz21 + b2*delz2 + vz22;
+
+    if (EFLAG) eangle += ba_k1[type]*dr1*dtheta + ba_k2[type]*dr2*dtheta;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
--- a/src/USER-OMP/angle_class2_omp.h
+++ b/src/USER-OMP/angle_class2_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(class2/omp,AngleClass2OMP)
+
+#else
+
+#ifndef LMP_ANGLE_CLASS2_OMP_H
+#define LMP_ANGLE_CLASS2_OMP_H
+
+#include "angle_class2.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleClass2OMP : public AngleClass2, public ThrOMP {
+
+ public:
+    AngleClass2OMP(class LAMMPS *lmp) : 
+      AngleClass2(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/angle_cosine_delta_omp.cpp
+++ b/src/USER-OMP/angle_cosine_delta_omp.cpp
@ -0,0 +1,183 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_cosine_delta_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleCosineDeltaOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleCosineDeltaOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2,theta,dtheta,dcostheta,tk;
+  double eangle,f1[3],f3[3];
+  double rsq1,rsq2,r1,r2,c,a,cot,a11,a12,a22,b11,b12,b22,c0,s0,s;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+        
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+        
+    theta = acos(c);
+ 
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0/s;
+   
+    cot = c/s;
+ 
+    // force & energy
+	
+    dtheta = theta - theta0[type]; 
+    dcostheta = cos(dtheta);
+    tk = k[type] * (1.0-dcostheta);
+
+    if (EFLAG) eangle = tk;
+
+    a = -k[type];
+
+    // expand dtheta for cos and sin contribution to force
+    
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+    
+    b11 = -a*c*cot / rsq1;
+    b12 = a*cot / (r1*r2);
+    b22 = -a*c*cot / rsq2;
+        
+    c0 = cos(theta0[type]);
+    s0 = sin(theta0[type]);
+
+    f1[0] = (a11*delx1 + a12*delx2)*c0 + (b11*delx1 + b12*delx2)*s0;
+    f1[1] = (a11*dely1 + a12*dely2)*c0 + (b11*dely1 + b12*dely2)*s0;
+    f1[2] = (a11*delz1 + a12*delz2)*c0 + (b11*delz1 + b12*delz2)*s0;
+    f3[0] = (a22*delx2 + a12*delx1)*c0 + (b22*delx2 + b12*delx1)*s0;
+    f3[1] = (a22*dely2 + a12*dely1)*c0 + (b22*dely2 + b12*dely1)*s0;
+    f3[2] = (a22*delz2 + a12*delz1)*c0 + (b22*delz2 + b12*delz1)*s0;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
--- a/src/USER-OMP/angle_cosine_delta_omp.h
+++ b/src/USER-OMP/angle_cosine_delta_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(cosine/delta/omp,AngleCosineDeltaOMP)
+
+#else
+
+#ifndef LMP_ANGLE_COSINE_DELTA_OMP_H
+#define LMP_ANGLE_COSINE_DELTA_OMP_H
+
+#include "angle_cosine_delta.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleCosineDeltaOMP : public AngleCosineDelta, public ThrOMP {
+
+ public:
+    AngleCosineDeltaOMP(class LAMMPS *lmp) : 
+      AngleCosineDelta(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/angle_cosine_omp.cpp
+++ b/src/USER-OMP/angle_cosine_omp.cpp
@ -0,0 +1,160 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_cosine_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleCosineOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleCosineOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double eangle,f1[3],f3[3];
+  double rsq1,rsq2,r1,r2,c,a,a11,a12,a22;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // c = cosine of angle
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    // force & energy
+
+    if (EFLAG) eangle = k[type]*(1.0+c);
+
+    a = k[type];
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
--- a/src/USER-OMP/angle_cosine_omp.h
+++ b/src/USER-OMP/angle_cosine_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(cosine/omp,AngleCosineOMP)
+
+#else
+
+#ifndef LMP_ANGLE_COSINE_OMP_H
+#define LMP_ANGLE_COSINE_OMP_H
+
+#include "angle_cosine.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleCosineOMP : public AngleCosine, public ThrOMP {
+
+ public:
+    AngleCosineOMP(class LAMMPS *lmp) : 
+      AngleCosine(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/angle_cosine_periodic_omp.cpp
+++ b/src/USER-OMP/angle_cosine_periodic_omp.cpp
@ -0,0 +1,198 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_cosine_periodic_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleCosinePeriodicOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleCosinePeriodicOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i,i1,i2,i3,n,m,type,b_factor;
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double eangle,f1[3],f3[3];
+  double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22;
+  double tn,tn_1,tn_2,un,un_1,un_2;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // c = cosine of angle
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    m = multiplicity[type];
+    b_factor = b[type];
+
+    // cos(n*x) = Tn(cos(x))
+    // Tn(x) = Chebyshev polynomials of the first kind: T_0 = 1, T_1 = x, ...
+    // recurrence relationship:
+    // Tn(x) = 2*x*T[n-1](x) - T[n-2](x) where T[-1](x) = 0
+    // also, dTn(x)/dx = n*U[n-1](x)
+    // where Un(x) = 2*x*U[n-1](x) - U[n-2](x) and U[-1](x) = 0
+    // finally need to handle special case for n = 1
+
+    tn = 1.0;
+    tn_1 = 1.0;
+    tn_2 = 0.0;
+    un = 1.0; 
+    un_1 = 2.0;
+    un_2 = 0.0;
+
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0/s;
+
+    // force & energy
+
+    tn_2 = c;
+    for (i = 1; i <= m; i++) {
+      tn = 2*c*tn_1 - tn_2;
+      tn_2 = tn_1;
+      tn_1 = tn;
+    }
+
+    for (i = 2; i <= m; i++) {
+      un = 2*c*un_1 - un_2;
+      un_2 = un_1;
+      un_1 = un;
+    }
+    tn = b_factor*pow(-1.0,m)*tn;
+    un = b_factor*pow(-1.0,m)*m*un;
+
+    if (EFLAG) eangle = 2*k[type]*(1.0 - tn);
+
+    a = -k[type]*un;
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+        
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
--- a/src/USER-OMP/angle_cosine_periodic_omp.h
+++ b/src/USER-OMP/angle_cosine_periodic_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(cosine/periodic/omp,AngleCosinePeriodicOMP)
+
+#else
+
+#ifndef LMP_ANGLE_COSINE_PERIODIC_OMP_H
+#define LMP_ANGLE_COSINE_PERIODIC_OMP_H
+
+#include "angle_cosine_periodic.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleCosinePeriodicOMP : public AngleCosinePeriodic, public ThrOMP {
+
+ public:
+    AngleCosinePeriodicOMP(class LAMMPS *lmp) : 
+      AngleCosinePeriodic(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/angle_cosine_shift_exp_omp.cpp
+++ b/src/USER-OMP/angle_cosine_shift_exp_omp.cpp
@ -0,0 +1,181 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_cosine_shift_exp_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleCosineShiftExpOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleCosineShiftExpOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double eangle,f1[3],f3[3],ff;
+  double rsq1,rsq2,r1,r2,c,s,a11,a12,a22;
+  double exp2,aa,uumin,cccpsss,cssmscc;            
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // c = cosine of angle
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    // C= sine of angle
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    
+    // force & energy
+        
+    aa=a[type];
+    uumin=umin[type];
+
+    cccpsss = c*cost[type]+s*sint[type];
+    cssmscc = c*sint[type]-s*cost[type];
+
+    if (doExpansion[type])
+       {  //  |a|<0.01 so use expansions relative precision <1e-5
+//         std::cout << "Using expansion\n";
+            if (EFLAG) eangle = -0.125*(1+cccpsss)*(4+aa*(cccpsss-1))*uumin;
+            ff=0.25*uumin*cssmscc*(2+aa*cccpsss)/s;   
+       }
+     else
+       {
+//   std::cout << "Not using expansion\n";            
+            exp2=exp(0.5*aa*(1+cccpsss));
+            if (EFLAG) eangle = opt1[type]*(1-exp2);
+            ff=0.5*a[type]*opt1[type]*exp2*cssmscc/s;       
+       }
+
+    a11 =   ff*c/ rsq1;
+    a12 =  -ff  / (r1*r2);
+    a22 =   ff*c/ rsq2;      
+
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
--- a/src/USER-OMP/angle_cosine_shift_exp_omp.h
+++ b/src/USER-OMP/angle_cosine_shift_exp_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(cosine/shift/exp/omp,AngleCosineShiftExpOMP)
+
+#else
+
+#ifndef LMP_ANGLE_COSINE_SHIFT_EXP_OMP_H
+#define LMP_ANGLE_COSINE_SHIFT_EXP_OMP_H
+
+#include "angle_cosine_shift_exp.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleCosineShiftExpOMP : public AngleCosineShiftExp, public ThrOMP {
+
+ public:
+    AngleCosineShiftExpOMP(class LAMMPS *lmp) : 
+      AngleCosineShiftExp(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/angle_cosine_shift_omp.cpp
+++ b/src/USER-OMP/angle_cosine_shift_omp.cpp
@ -0,0 +1,165 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_cosine_shift_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleCosineShiftOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleCosineShiftOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double eangle,f1[3],f3[3];
+  double rsq1,rsq2,r1,r2,c,s,cps,kcos,ksin,a11,a12,a22;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // c = cosine of angle
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    // C= sine of angle
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    
+    // force & energy
+    if (EFLAG) eangle = -k[type]-kcos*c-ksin*s;  
+
+    kcos=kcost[type];
+    ksin=ksint[type];
+    cps = c/s;          // NOTE absorbed one c
+
+    a11 = (-kcos +ksin*cps )*c/ rsq1;
+    a12 = ( kcos -ksin*cps )  / (r1*r2);
+    a22 = (-kcos +ksin*cps )*c/ rsq2;
+
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
--- a/src/USER-OMP/angle_cosine_shift_omp.h
+++ b/src/USER-OMP/angle_cosine_shift_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(cosine/shift/omp,AngleCosineShiftOMP)
+
+#else
+
+#ifndef LMP_ANGLE_COSINE_SHIFT_OMP_H
+#define LMP_ANGLE_COSINE_SHIFT_OMP_H
+
+#include "angle_cosine_shift.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleCosineShiftOMP : public AngleCosineShift, public ThrOMP {
+
+ public:
+    AngleCosineShiftOMP(class LAMMPS *lmp) : 
+      AngleCosineShift(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/angle_cosine_squared_omp.cpp
+++ b/src/USER-OMP/angle_cosine_squared_omp.cpp
@ -0,0 +1,165 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_cosine_squared_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleCosineSquaredOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleCosineSquaredOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double eangle,f1[3],f3[3];
+  double dcostheta,tk;
+  double rsq1,rsq2,r1,r2,c,a,a11,a12,a22;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+        
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+        
+    // force & energy
+	
+    dcostheta = c - cos(theta0[type]);
+    tk = k[type] * dcostheta;
+
+    if (EFLAG) eangle = tk*dcostheta;
+
+    a = 2.0 * tk;
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
--- a/src/USER-OMP/angle_cosine_squared_omp.h
+++ b/src/USER-OMP/angle_cosine_squared_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(cosine/squared/omp,AngleCosineSquaredOMP)
+
+#else
+
+#ifndef LMP_ANGLE_COSINE_SQUARED_OMP_H
+#define LMP_ANGLE_COSINE_SQUARED_OMP_H
+
+#include "angle_cosine_squared.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleCosineSquaredOMP : public AngleCosineSquared, public ThrOMP {
+
+ public:
+    AngleCosineSquaredOMP(class LAMMPS *lmp) : 
+      AngleCosineSquared(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/angle_harmonic_omp.cpp
+++ b/src/USER-OMP/angle_harmonic_omp.cpp
@ -0,0 +1,169 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_harmonic_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleHarmonicOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double eangle,f1[3],f3[3];
+  double dtheta,tk;
+  double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+        
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+        
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0/s;
+
+    // force & energy
+
+    dtheta = acos(c) - theta0[type];
+    tk = k[type] * dtheta;
+
+    if (EFLAG) eangle = tk*dtheta;
+
+    a = -2.0 * tk * s;
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
--- a/src/USER-OMP/angle_harmonic_omp.h
+++ b/src/USER-OMP/angle_harmonic_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(harmonic/omp,AngleHarmonicOMP)
+
+#else
+
+#ifndef LMP_ANGLE_HARMONIC_OMP_H
+#define LMP_ANGLE_HARMONIC_OMP_H
+
+#include "angle_harmonic.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleHarmonicOMP : public AngleHarmonic, public ThrOMP {
+
+ public:
+    AngleHarmonicOMP(class LAMMPS *lmp) : 
+      AngleHarmonic(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/angle_sdk_omp.cpp
+++ b/src/USER-OMP/angle_sdk_omp.cpp
@ -0,0 +1,225 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_sdk_omp.h"
+#include "atom.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "comm.h"
+#include "force.h"
+#include "math_const.h"
+
+#include <math.h>
+
+#include "lj_sdk_common.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+using namespace LJSDKParms;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleSDKOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleSDKOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double delx1,dely1,delz1,delx2,dely2,delz2,delx3,dely3,delz3;
+  double eangle,f1[3],f3[3],e13,f13;
+  double dtheta,tk;
+  double rsq1,rsq2,rsq3,r1,r2,c,s,a,a11,a12,a22;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+        
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+        
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0/s;
+
+    // 1-3 LJ interaction. 
+    // we only want to use the repulsive part,
+    // and it can be scaled (or off).
+    // so this has to be done here and not in the
+    // general non-bonded code.
+
+    if (repflag) {
+      
+      delx3 = x[i1][0] - x[i3][0];
+      dely3 = x[i1][1] - x[i3][1];
+      delz3 = x[i1][2] - x[i3][2];
+      domain->minimum_image(delx3,dely3,delz3);
+      rsq3 = delx3*delx3 + dely3*dely3 + delz3*delz3;
+
+      const int type1 = atom->type[i1];
+      const int type3 = atom->type[i3];
+      
+      f13=0.0;
+      e13=0.0;
+    
+      if (rsq3 < rminsq[type1][type3]) {
+	const int ljt = lj_type[type1][type3];
+	const double r2inv = 1.0/rsq3;
+
+	if (ljt == LJ12_4) {
+	  const double r4inv=r2inv*r2inv;
+
+	  f13 = r4inv*(lj1[type1][type3]*r4inv*r4inv - lj2[type1][type3]);
+	  if (EFLAG) e13 = r4inv*(lj3[type1][type3]*r4inv*r4inv - lj4[type1][type3]);
+	  
+	} else if (ljt == LJ9_6) {
+	  const double r3inv = r2inv*sqrt(r2inv);
+	  const double r6inv = r3inv*r3inv;
+
+	  f13 = r6inv*(lj1[type1][type3]*r3inv - lj2[type1][type3]);
+	  if (EFLAG) e13 = r6inv*(lj3[type1][type3]*r3inv - lj4[type1][type3]);
+
+	} else if (ljt == LJ12_6) {
+	  const double r6inv = r2inv*r2inv*r2inv;
+
+	  f13 = r6inv*(lj1[type1][type3]*r6inv - lj2[type1][type3]);
+	  if (EFLAG) e13 = r6inv*(lj3[type1][type3]*r6inv - lj4[type1][type3]);
+	}
+
+	// make sure energy is 0.0 at the cutoff.
+	if (EFLAG) e13 -= emin[type1][type3];
+
+	f13 *= r2inv;
+      }
+    }
+
+    // force & energy
+
+    dtheta = acos(c) - theta0[type];
+    tk = k[type] * dtheta;
+
+    if (EFLAG) eangle = tk*dtheta;
+
+    a = -2.0 * tk * s;
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    // apply force to each of the 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0] + f13*delx3;
+      f[i1][1] += f1[1] + f13*dely3;
+      f[i1][2] += f1[2] + f13*delz3;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0] - f13*delx3;
+      f[i3][1] += f3[1] - f13*dely3;
+      f[i3][2] += f3[2] - f13*delz3;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+    if (EVFLAG) ev_tally13_thr(this,i1,i3,nlocal,NEWTON_BOND,
+			       e13,f13,delx3,dely3,delz3,thr);
+
+  }
+}
--- a/src/USER-OMP/angle_sdk_omp.h
+++ b/src/USER-OMP/angle_sdk_omp.h
@ -0,0 +1,49 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(sdk/omp,AngleSDKOMP)
+AngleStyle(cg/cmm/omp,AngleSDKOMP)
+
+#else
+
+#ifndef LMP_ANGLE_SDK_OMP_H
+#define LMP_ANGLE_SDK_OMP_H
+
+#include "angle_sdk.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleSDKOMP : public AngleSDK, public ThrOMP {
+
+ public:
+    AngleSDKOMP(class LAMMPS *lmp) : 
+      AngleSDK(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/angle_table_omp.cpp
+++ b/src/USER-OMP/angle_table_omp.cpp
@ -0,0 +1,169 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "angle_table_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include "math_const.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+void AngleTableOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nanglelist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void AngleTableOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,n,type;
+  double eangle,f1[3],f3[3];
+  double delx1,dely1,delz1,delx2,dely2,delz2;
+  double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22;
+  double theta,u,mdu; //mdu: minus du, -du/dx=f
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const anglelist = neighbor->anglelist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = anglelist[n][0];
+    i2 = anglelist[n][1];
+    i3 = anglelist[n][2];
+    type = anglelist[n][3];
+
+    // 1st bond
+
+    delx1 = x[i1][0] - x[i2][0];
+    dely1 = x[i1][1] - x[i2][1];
+    delz1 = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx1,dely1,delz1);
+
+    rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+    r1 = sqrt(rsq1);
+
+    // 2nd bond
+
+    delx2 = x[i3][0] - x[i2][0];
+    dely2 = x[i3][1] - x[i2][1];
+    delz2 = x[i3][2] - x[i2][2];
+    domain->minimum_image(delx2,dely2,delz2);
+
+    rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+    r2 = sqrt(rsq2);
+
+    // angle (cos and sin)
+
+    c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+    c /= r1*r2;
+        
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+        
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    s = 1.0/s;
+
+    // tabulated force & energy
+
+    theta = acos(c);
+    uf_lookup(type,theta,u,mdu);
+    
+    if (EFLAG) eangle = u;
+
+    a = mdu * s;               
+    a11 = a*c / rsq1;
+    a12 = -a / (r1*r2);
+    a22 = a*c / rsq2;
+
+    f1[0] = a11*delx1 + a12*delx2;
+    f1[1] = a11*dely1 + a12*dely2;
+    f1[2] = a11*delz1 + a12*delz2;
+    f3[0] = a22*delx2 + a12*delx1;
+    f3[1] = a22*dely2 + a12*dely1;
+    f3[2] = a22*delz2 + a12*delz1;
+
+    // apply force to each of 3 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= f1[0] + f3[0];
+      f[i2][1] -= f1[1] + f3[1];
+      f[i2][2] -= f1[2] + f3[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3,
+			     delx1,dely1,delz1,delx2,dely2,delz2,thr);
+  }
+}
--- a/src/USER-OMP/angle_table_omp.h
+++ b/src/USER-OMP/angle_table_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+
+AngleStyle(table/omp,AngleTableOMP)
+
+#else
+
+#ifndef LMP_ANGLE_TABLE_OMP_H
+#define LMP_ANGLE_TABLE_OMP_H
+
+#include "angle_table.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class AngleTableOMP : public AngleTable, public ThrOMP {
+
+ public:
+    AngleTableOMP(class LAMMPS *lmp) : 
+      AngleTable(lmp), ThrOMP(lmp,THR_ANGLE) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/bond_class2_omp.cpp
+++ b/src/USER-OMP/bond_class2_omp.cpp
@ -0,0 +1,124 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_class2_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondClass2OMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondClass2OMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r,dr,dr2,dr3,dr4,de_bond;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+    dr = r - r0[type];
+    dr2 = dr*dr;
+    dr3 = dr2*dr;
+    dr4 = dr3*dr;
+
+    // force & energy
+
+    de_bond = 2.0*k2[type]*dr + 3.0*k3[type]*dr2 + 4.0*k4[type]*dr3;
+    if (r > 0.0) fbond = -de_bond/r;
+    else fbond = 0.0;
+
+    if (EFLAG) ebond = k2[type]*dr2 + k3[type]*dr3 + k4[type]*dr4;
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
--- a/src/USER-OMP/bond_class2_omp.h
+++ b/src/USER-OMP/bond_class2_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(class2/omp,BondClass2OMP)
+
+#else
+
+#ifndef LMP_BOND_CLASS2_OMP_H
+#define LMP_BOND_CLASS2_OMP_H
+
+#include "bond_class2.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondClass2OMP : public BondClass2, public ThrOMP {
+
+ public:
+    BondClass2OMP(class LAMMPS *lmp) : 
+      BondClass2(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/bond_fene_expand_omp.cpp
+++ b/src/USER-OMP/bond_fene_expand_omp.cpp
@ -0,0 +1,150 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_fene_expand_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "error.h"
+#include "update.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondFENEExpandOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondFENEExpandOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r0sq,rlogarg,sr2,sr6;
+  double r,rshift,rshiftsq;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+    rshift = r - shift[type];
+    rshiftsq = rshift*rshift;
+    r0sq = r0[type] * r0[type];
+    rlogarg = 1.0 - rshiftsq/r0sq;
+
+    // if r -> r0, then rlogarg < 0.0 which is an error
+    // issue a warning and reset rlogarg = epsilon
+    // if r > 2*r0 something serious is wrong, abort
+
+    if (rlogarg < 0.1) {
+      char str[128];
+      sprintf(str,"FENE bond too long: " BIGINT_FORMAT " %d %d %g",
+	      update->ntimestep,atom->tag[i1],atom->tag[i2],sqrt(rsq));
+      error->warning(FLERR,str,0);
+      if (rlogarg <= -3.0) error->one(FLERR,"Bad FENE bond");
+      rlogarg = 0.1;
+    }
+
+    fbond = -k[type]*rshift/rlogarg/r;
+
+    // force from LJ term
+
+    if (rshiftsq < TWO_1_3*sigma[type]*sigma[type]) {
+      sr2 = sigma[type]*sigma[type]/rshiftsq;
+      sr6 = sr2*sr2*sr2;
+      fbond += 48.0*epsilon[type]*sr6*(sr6-0.5)/rshift/r;
+    }
+
+    // energy
+
+    if (EFLAG) {
+      ebond = -0.5 * k[type]*r0sq*log(rlogarg);
+      if (rshiftsq < TWO_1_3*sigma[type]*sigma[type])
+	ebond += 4.0*epsilon[type]*sr6*(sr6-1.0) + epsilon[type];
+    }
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
--- a/src/USER-OMP/bond_fene_expand_omp.h
+++ b/src/USER-OMP/bond_fene_expand_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(fene/expand/omp,BondFENEExpandOMP)
+
+#else
+
+#ifndef LMP_BOND_FENE_EXPAND_OMP_H
+#define LMP_BOND_FENE_EXPAND_OMP_H
+
+#include "bond_fene_expand.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondFENEExpandOMP : public BondFENEExpand, public ThrOMP {
+
+ public:
+    BondFENEExpandOMP(class LAMMPS *lmp) : 
+      BondFENEExpand(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/bond_fene_omp.cpp
+++ b/src/USER-OMP/bond_fene_omp.cpp
@ -0,0 +1,146 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_fene_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "error.h"
+#include "update.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondFENEOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondFENEOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r0sq,rlogarg,sr2,sr6;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r0sq = r0[type] * r0[type];
+    rlogarg = 1.0 - rsq/r0sq;
+
+    // if r -> r0, then rlogarg < 0.0 which is an error
+    // issue a warning and reset rlogarg = epsilon
+    // if r > 2*r0 something serious is wrong, abort
+
+    if (rlogarg < 0.1) {
+      char str[128];
+      sprintf(str,"FENE bond too long: " BIGINT_FORMAT " %d %d %g",
+	      update->ntimestep,atom->tag[i1],atom->tag[i2],sqrt(rsq));
+      error->warning(FLERR,str,0);
+      if (rlogarg <= -3.0) error->one(FLERR,"Bad FENE bond");
+      rlogarg = 0.1;
+    }
+
+    fbond = -k[type]/rlogarg;
+
+    // force from LJ term
+
+    if (rsq < TWO_1_3*sigma[type]*sigma[type]) {
+      sr2 = sigma[type]*sigma[type]/rsq;
+      sr6 = sr2*sr2*sr2;
+      fbond += 48.0*epsilon[type]*sr6*(sr6-0.5)/rsq;
+    }
+
+    // energy
+
+    if (EFLAG) {
+      ebond = -0.5 * k[type]*r0sq*log(rlogarg);
+      if (rsq < TWO_1_3*sigma[type]*sigma[type])
+	ebond += 4.0*epsilon[type]*sr6*(sr6-1.0) + epsilon[type];
+    }
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
--- a/src/USER-OMP/bond_fene_omp.h
+++ b/src/USER-OMP/bond_fene_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(fene/omp,BondFENEOMP)
+
+#else
+
+#ifndef LMP_BOND_FENE_OMP_H
+#define LMP_BOND_FENE_OMP_H
+
+#include "bond_fene.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondFENEOMP : public BondFENE, public ThrOMP {
+
+ public:
+    BondFENEOMP(class LAMMPS *lmp) : 
+      BondFENE(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/bond_harmonic_omp.cpp
+++ b/src/USER-OMP/bond_harmonic_omp.cpp
@ -0,0 +1,121 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_harmonic_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondHarmonicOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r,dr,rk;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+    dr = r - r0[type];
+    rk = k[type] * dr;
+
+    // force & energy
+
+    if (r > 0.0) fbond = -2.0*rk/r;
+    else fbond = 0.0;
+
+    if (EFLAG) ebond = rk*dr;
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
--- a/src/USER-OMP/bond_harmonic_omp.h
+++ b/src/USER-OMP/bond_harmonic_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(harmonic/omp,BondHarmonicOMP)
+
+#else
+
+#ifndef LMP_BOND_HARMONIC_OMP_H
+#define LMP_BOND_HARMONIC_OMP_H
+
+#include "bond_harmonic.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondHarmonicOMP : public BondHarmonic, public ThrOMP {
+
+ public:
+    BondHarmonicOMP(class LAMMPS *lmp) : 
+      BondHarmonic(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp
+++ b/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp
@ -0,0 +1,124 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_harmonic_shift_cut_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondHarmonicShiftCutOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondHarmonicShiftCutOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r,dr,rk;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+
+    if (r>r1[type]) continue;
+
+    dr = r - r0[type];
+    rk = k[type] * dr;
+
+    // force & energy
+
+    if (r > 0.0) fbond = -2.0*rk/r;
+    else fbond = 0.0;
+
+    if (EFLAG) ebond = k[type]*(dr*dr -(r0[type]-r1[type])*(r0[type]-r1[type]) );
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
--- a/src/USER-OMP/bond_harmonic_shift_cut_omp.h
+++ b/src/USER-OMP/bond_harmonic_shift_cut_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(harmonic/shift/cut/omp,BondHarmonicShiftCutOMP)
+
+#else
+
+#ifndef LMP_BOND_HARMONIC_SHIFT_CUT_OMP_H
+#define LMP_BOND_HARMONIC_SHIFT_CUT_OMP_H
+
+#include "bond_harmonic_shift_cut.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondHarmonicShiftCutOMP : public BondHarmonicShiftCut, public ThrOMP {
+
+ public:
+    BondHarmonicShiftCutOMP(class LAMMPS *lmp) : 
+      BondHarmonicShiftCut(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/bond_harmonic_shift_omp.cpp
+++ b/src/USER-OMP/bond_harmonic_shift_omp.cpp
@ -0,0 +1,121 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_harmonic_shift_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondHarmonicShiftOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondHarmonicShiftOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r,dr,rk;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+    dr = r - r0[type];
+    rk = k[type] * dr;
+
+    // force & energy
+
+    if (r > 0.0) fbond = -2.0*rk/r;
+    else fbond = 0.0;
+
+    if (EFLAG) ebond = k[type]*(dr*dr -(r0[type]-r1[type])*(r0[type]-r1[type]) );
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
--- a/src/USER-OMP/bond_harmonic_shift_omp.h
+++ b/src/USER-OMP/bond_harmonic_shift_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(harmonic/shift/omp,BondHarmonicShiftOMP)
+
+#else
+
+#ifndef LMP_BOND_HARMONIC_SHIFT_OMP_H
+#define LMP_BOND_HARMONIC_SHIFT_OMP_H
+
+#include "bond_harmonic_shift.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondHarmonicShiftOMP : public BondHarmonicShift, public ThrOMP {
+
+ public:
+    BondHarmonicShiftOMP(class LAMMPS *lmp) : 
+      BondHarmonicShift(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/bond_morse_omp.cpp
+++ b/src/USER-OMP/bond_morse_omp.cpp
@ -0,0 +1,122 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_morse_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondMorseOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondMorseOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r,dr,ralpha;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+
+    dr = r - r0[type];
+    ralpha = exp(-alpha[type]*dr);
+
+    // force & energy
+
+    if (r > 0.0) fbond = -2.0*d0[type]*alpha[type]*(1-ralpha)*ralpha/r;
+    else fbond = 0.0;
+
+    if (EFLAG) ebond = d0[type]*(1-ralpha)*(1-ralpha);
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
--- a/src/USER-OMP/bond_morse_omp.h
+++ b/src/USER-OMP/bond_morse_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(morse/omp,BondMorseOMP)
+
+#else
+
+#ifndef LMP_BOND_MORSE_OMP_H
+#define LMP_BOND_MORSE_OMP_H
+
+#include "bond_morse.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondMorseOMP : public BondMorse, public ThrOMP {
+
+ public:
+    BondMorseOMP(class LAMMPS *lmp) : 
+      BondMorse(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/bond_nonlinear_omp.cpp
+++ b/src/USER-OMP/bond_nonlinear_omp.cpp
@ -0,0 +1,122 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_nonlinear_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondNonlinearOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondNonlinearOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r,dr,drsq,lamdasq,denom,denomsq;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+    dr = r - r0[type];
+    drsq = dr*dr;
+    lamdasq = lamda[type]*lamda[type];
+    denom = lamdasq - drsq;
+    denomsq = denom*denom;
+
+    // force & energy
+
+    fbond = -epsilon[type]/r * 2.0*dr*lamdasq/denomsq;
+    if (EFLAG) ebond = epsilon[type] * drsq / denom;
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
--- a/src/USER-OMP/bond_nonlinear_omp.h
+++ b/src/USER-OMP/bond_nonlinear_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(nonlinear/omp,BondNonlinearOMP)
+
+#else
+
+#ifndef LMP_BOND_NONLINEAR_OMP_H
+#define LMP_BOND_NONLINEAR_OMP_H
+
+#include "bond_nonlinear.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondNonlinearOMP : public BondNonlinear, public ThrOMP {
+
+ public:
+    BondNonlinearOMP(class LAMMPS *lmp) : 
+      BondNonlinear(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/bond_quartic_omp.cpp
+++ b/src/USER-OMP/bond_quartic_omp.cpp
@ -0,0 +1,190 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_quartic_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "pair.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondQuarticOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+ // insure pair->ev_tally() will use 1-4 virial contribution
+
+  if (vflag_global == 2)
+    force->pair->vflag_either = force->pair->vflag_global = 1;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondQuarticOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,m,type,itype,jtype;
+  double delx,dely,delz,ebond,fbond,evdwl,fpair;
+  double r,rsq,dr,r2,ra,rb,sr2,sr6;
+
+  ebond = evdwl = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  int * const * const bondlist = neighbor->bondlist;
+  const double * const * const cutsq = force->pair->cutsq;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+
+    // skip bond if already broken
+
+    if (bondlist[n][2] <= 0) continue;
+
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+
+    // if bond breaks, set type to 0
+    //   both in temporary bondlist and permanent bond_type
+    // if this proc owns both atoms,
+    //   negate bond_type twice if other atom stores it
+    // if other proc owns 2nd atom, other proc will also break bond
+
+    if (rsq > rc[type]*rc[type]) {
+      bondlist[n][2] = 0;
+      for (m = 0; m < atom->num_bond[i1]; m++)
+	if (atom->bond_atom[i1][m] == atom->tag[i2])
+	  atom->bond_type[i1][m] = 0;
+      if (i2 < atom->nlocal)
+	for (m = 0; m < atom->num_bond[i2]; m++)
+	  if (atom->bond_atom[i2][m] == atom->tag[i1])
+	    atom->bond_type[i2][m] = 0;
+      continue;
+    }
+
+    // quartic bond
+    // 1st portion is from quartic term
+    // 2nd portion is from LJ term cut at 2^(1/6) with eps = sigma = 1.0
+
+    r = sqrt(rsq);
+    dr = r - rc[type];
+    r2 = dr*dr;
+    ra = dr - b1[type];
+    rb = dr - b2[type];
+    fbond = -k[type]/r * (r2*(ra+rb) + 2.0*dr*ra*rb);
+    
+    if (rsq < TWO_1_3) {
+      sr2 = 1.0/rsq;
+      sr6 = sr2*sr2*sr2;
+      fbond += 48.0*sr6*(sr6-0.5)/rsq;
+    }
+
+    if (EFLAG) {
+      ebond = k[type]*r2*ra*rb + u0[type];
+      if (rsq < TWO_1_3) ebond += 4.0*sr6*(sr6-1.0) + 1.0;
+    }
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,ebond,fbond,delx,dely,delz,thr);
+
+    // subtract out pairwise contribution from 2 atoms via pair->single()
+    // required since special_bond = 1,1,1
+    // tally energy/virial in pair, using newton_bond as newton flag
+
+    itype = atom->type[i1];
+    jtype = atom->type[i2];
+
+    if (rsq < cutsq[itype][jtype]) {
+      evdwl = -force->pair->single(i1,i2,itype,jtype,rsq,1.0,1.0,fpair);
+      fpair = -fpair;
+
+      if (NEWTON_BOND || i1 < nlocal) {
+	f[i1][0] += delx*fpair;
+	f[i1][1] += dely*fpair;
+	f[i1][2] += delz*fpair;
+      }
+      if (NEWTON_BOND || i2 < nlocal) {
+	f[i2][0] -= delx*fpair;
+	f[i2][1] -= dely*fpair;
+	f[i2][2] -= delz*fpair;
+      }
+
+      if (EVFLAG) ev_tally_thr(force->pair,i1,i2,nlocal,NEWTON_BOND,
+			       evdwl,0.0,fpair,delx,dely,delz,thr);
+    }
+  }
+}
--- a/src/USER-OMP/bond_quartic_omp.h
+++ b/src/USER-OMP/bond_quartic_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(quartic/omp,BondQuarticOMP)
+
+#else
+
+#ifndef LMP_BOND_QUARTIC_OMP_H
+#define LMP_BOND_QUARTIC_OMP_H
+
+#include "bond_quartic.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondQuarticOMP : public BondQuartic, public ThrOMP {
+
+ public:
+    BondQuarticOMP(class LAMMPS *lmp) : 
+      BondQuartic(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/bond_table_omp.cpp
+++ b/src/USER-OMP/bond_table_omp.cpp
@ -0,0 +1,119 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "bond_table_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "domain.h"
+
+#include <math.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void BondTableOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nbondlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void BondTableOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,n,type;
+  double delx,dely,delz,ebond,fbond;
+  double rsq,r;
+  double u,mdu;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const bondlist = neighbor->bondlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = bondlist[n][0];
+    i2 = bondlist[n][1];
+    type = bondlist[n][2];
+
+    delx = x[i1][0] - x[i2][0];
+    dely = x[i1][1] - x[i2][1];
+    delz = x[i1][2] - x[i2][2];
+    domain->minimum_image(delx,dely,delz);
+
+    rsq = delx*delx + dely*dely + delz*delz;
+    r = sqrt(rsq);
+
+    // force & energy
+
+    uf_lookup(type,r,u,mdu);
+    fbond = mdu/r;
+    ebond = u;
+
+    // apply force to each of 2 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += delx*fbond;
+      f[i1][1] += dely*fbond;
+      f[i1][2] += delz*fbond;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] -= delx*fbond;
+      f[i2][1] -= dely*fbond;
+      f[i2][2] -= delz*fbond;
+    }
+
+    if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,
+			     ebond,fbond,delx,dely,delz,thr);
+  }
+}
--- a/src/USER-OMP/bond_table_omp.h
+++ b/src/USER-OMP/bond_table_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef BOND_CLASS
+
+BondStyle(table/omp,BondTableOMP)
+
+#else
+
+#ifndef LMP_BOND_TABLE_OMP_H
+#define LMP_BOND_TABLE_OMP_H
+
+#include "bond_table.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class BondTableOMP : public BondTable, public ThrOMP {
+
+ public:
+    BondTableOMP(class LAMMPS *lmp) : 
+      BondTable(lmp), ThrOMP(lmp,THR_BOND) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/ewald_omp.cpp
+++ b/src/USER-OMP/ewald_omp.cpp
@ -0,0 +1,386 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "ewald_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+
+#include <math.h>
+
+#include "math_const.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.00001
+
+/* ---------------------------------------------------------------------- */
+
+EwaldOMP::EwaldOMP(LAMMPS *lmp, int narg, char **arg) 
+  : Ewald(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE)
+{ }
+
+/* ---------------------------------------------------------------------- */
+void EwaldOMP::allocate()
+{
+  Ewald::allocate();
+
+  // always re-allocate for simplicity.
+  delete[] sfacrl;
+  delete[] sfacim;
+
+  sfacrl = new double[kmax3d*comm->nthreads];
+  sfacim = new double[kmax3d*comm->nthreads];
+}
+
+/* ----------------------------------------------------------------------
+   compute the Ewald long-range force, energy, virial 
+------------------------------------------------------------------------- */
+
+void EwaldOMP::compute(int eflag, int vflag)
+{
+  // clear out global energy/virial
+
+  energy = 0.0;
+  if (vflag) for (int n = 0; n < 6; n++) virial[n] = 0.0;
+
+  // extend size of per-atom arrays if necessary
+
+  if (atom->nlocal > nmax) {
+    memory->destroy(ek);
+    memory->destroy3d_offset(cs,-kmax_created);
+    memory->destroy3d_offset(sn,-kmax_created);
+    nmax = atom->nmax;
+    memory->create(ek,nmax,3,"ewald:ek");
+    memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs");
+    memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn");
+    kmax_created = kmax;
+  }
+
+  // partial structure factors on each processor
+  // total structure factor by summing over procs
+
+  eik_dot_r(); 
+  MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+  MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+
+  // K-space portion of electric field
+  // double loop over K-vectors and local atoms
+
+  double * const * const f = atom->f;
+  const double * const q = atom->q;
+  const int nthreads = comm->nthreads;
+  const int nlocal = atom->nlocal;
+  const double qscale = force->qqrd2e * scale;
+
+  double eng_tmp = 0.0;
+  double v0,v1,v2,v3,v4,v5;
+  v0=v1=v2=v3=v4=v5=0.0;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag) reduction(+:eng_tmp,v0,v1,v2,v3,v4,v5)
+#endif
+  {
+
+    int i,k,ifrom,ito,tid;
+    int kx,ky,kz;
+    double cypz,sypz,exprl,expim,partial;
+
+    loop_setup_thr(ifrom, ito, tid, nlocal, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, 0, NULL, NULL, thr);
+
+    for (i = ifrom; i < ito; i++) {
+      ek[i][0] = 0.0;
+      ek[i][1] = 0.0;
+      ek[i][2] = 0.0;
+    }
+
+    for (k = 0; k < kcount; k++) {
+      kx = kxvecs[k];
+      ky = kyvecs[k];
+      kz = kzvecs[k];
+
+      for (i = ifrom; i < ito; i++) {
+	cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i];
+	sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i];
+	exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz;
+	expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz;
+	partial = expim*sfacrl_all[k] - exprl*sfacim_all[k];
+	ek[i][0] += partial*eg[k][0];
+	ek[i][1] += partial*eg[k][1];
+	ek[i][2] += partial*eg[k][2];
+      }
+    }
+
+    // convert E-field to force
+
+    for (i = ifrom; i < ito; i++) {
+      const double fac = qscale*q[i];
+      f[i][0] += fac*ek[i][0];
+      f[i][1] += fac*ek[i][1];
+      f[i][2] += fac*ek[i][2];
+    }
+ 
+    // energy if requested
+
+    if (eflag) {
+#if defined(_OPENMP)
+#pragma omp for private(k)
+#endif
+      for (k = 0; k < kcount; k++)
+	eng_tmp += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + 
+			    sfacim_all[k]*sfacim_all[k]);
+    }
+
+    // virial if requested
+
+    if (vflag) {
+#if defined(_OPENMP)
+#pragma omp for private(k)
+#endif
+      for (k = 0; k < kcount; k++) {
+	double uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]);
+	v0 += uk*vg[k][0];
+	v1 += uk*vg[k][1];
+	v2 += uk*vg[k][2];
+	v3 += uk*vg[k][3];
+	v4 += uk*vg[k][4];
+	v5 += uk*vg[k][5];
+      }
+    }
+
+    reduce_thr(this, eflag,vflag,thr);
+  } // end of omp parallel region
+
+  if (eflag) {
+    eng_tmp -= g_ewald*qsqsum/MY_PIS +
+      MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
+    energy = eng_tmp * qscale;
+  }
+
+  if (vflag) {
+    virial[0] = v0 * qscale;
+    virial[1] = v1 * qscale;
+    virial[2] = v2 * qscale;
+    virial[3] = v3 * qscale;
+    virial[4] = v4 * qscale;
+    virial[5] = v5 * qscale;
+  }
+
+  if (slabflag) slabcorr(eflag);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void EwaldOMP::eik_dot_r()
+{
+  const double * const * const x = atom->x;
+  const double * const q = atom->q;
+  const int nlocal = atom->nlocal;
+  const int nthreads = comm->nthreads;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+    int i,ifrom,ito,k,l,m,n,ic,tid;
+    double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4;
+    double sqk,clpm,slpm;
+    
+    loop_setup_thr(ifrom, ito, tid, nlocal, nthreads);
+    
+    double * const sfacrl_thr = sfacrl + tid*kmax3d;
+    double * const sfacim_thr = sfacim + tid*kmax3d;
+
+    n = 0;
+
+    // (k,0,0), (0,l,0), (0,0,m)
+
+    for (ic = 0; ic < 3; ic++) {
+      sqk = unitk[ic]*unitk[ic];
+      if (sqk <= gsqmx) {
+	cstr1 = 0.0;
+	sstr1 = 0.0;
+	for (i = ifrom; i < ito; i++) {
+	  cs[0][ic][i] = 1.0;
+	  sn[0][ic][i] = 0.0;
+	  cs[1][ic][i] = cos(unitk[ic]*x[i][ic]);
+	  sn[1][ic][i] = sin(unitk[ic]*x[i][ic]);
+	  cs[-1][ic][i] = cs[1][ic][i];
+	  sn[-1][ic][i] = -sn[1][ic][i];
+	  cstr1 += q[i]*cs[1][ic][i];
+	  sstr1 += q[i]*sn[1][ic][i];
+	}
+	sfacrl_thr[n] = cstr1;
+	sfacim_thr[n++] = sstr1;
+      }
+    }
+
+    for (m = 2; m <= kmax; m++) {
+      for (ic = 0; ic < 3; ic++) {
+	sqk = m*unitk[ic] * m*unitk[ic];
+	if (sqk <= gsqmx) {
+	  cstr1 = 0.0;
+	  sstr1 = 0.0;
+	  for (i = ifrom; i < ito; i++) {
+	    cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - 
+	      sn[m-1][ic][i]*sn[1][ic][i];
+	    sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + 
+	      cs[m-1][ic][i]*sn[1][ic][i];
+	    cs[-m][ic][i] = cs[m][ic][i];
+	    sn[-m][ic][i] = -sn[m][ic][i];
+	    cstr1 += q[i]*cs[m][ic][i];
+	    sstr1 += q[i]*sn[m][ic][i];
+	  }
+	  sfacrl_thr[n] = cstr1;
+	  sfacim_thr[n++] = sstr1;
+	}
+      }
+    }
+
+    // 1 = (k,l,0), 2 = (k,-l,0)
+
+    for (k = 1; k <= kmax; k++) {
+      for (l = 1; l <= kmax; l++) {
+	sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]);
+	if (sqk <= gsqmx) {
+	  cstr1 = 0.0;
+	  sstr1 = 0.0;
+	  cstr2 = 0.0;
+	  sstr2 = 0.0;
+	  for (i = ifrom; i < ito; i++) {
+	    cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]);
+	    sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]);
+	    cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]);
+	    sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]);
+	  }
+	  sfacrl_thr[n] = cstr1;
+	  sfacim_thr[n++] = sstr1;
+	  sfacrl_thr[n] = cstr2;
+	  sfacim_thr[n++] = sstr2;
+	}
+      }
+    }
+
+    // 1 = (0,l,m), 2 = (0,l,-m)
+
+    for (l = 1; l <= kmax; l++) {
+      for (m = 1; m <= kmax; m++) {
+	sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]);
+	if (sqk <= gsqmx) {
+	  cstr1 = 0.0;
+	  sstr1 = 0.0;
+	  cstr2 = 0.0;
+	  sstr2 = 0.0;
+	  for (i = ifrom; i < ito; i++) {
+	    cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]);
+	    sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]);
+	    cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]);
+	    sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]);
+	  }
+	  sfacrl_thr[n] = cstr1;
+	  sfacim_thr[n++] = sstr1;
+	  sfacrl_thr[n] = cstr2;
+	  sfacim_thr[n++] = sstr2;
+	}
+      }
+    }
+
+    // 1 = (k,0,m), 2 = (k,0,-m)
+
+    for (k = 1; k <= kmax; k++) {
+      for (m = 1; m <= kmax; m++) {
+	sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]);
+	if (sqk <= gsqmx) {
+	  cstr1 = 0.0;
+	  sstr1 = 0.0;
+	  cstr2 = 0.0;
+	  sstr2 = 0.0;
+	  for (i = ifrom; i < ito; i++) {
+	    cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]);
+	    sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]);
+	    cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]);
+	    sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]);
+	  }
+	  sfacrl_thr[n] = cstr1;
+	  sfacim_thr[n++] = sstr1;
+	  sfacrl_thr[n] = cstr2;
+	  sfacim_thr[n++] = sstr2;
+	}
+      }
+    }
+
+    // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
+
+    for (k = 1; k <= kmax; k++) {
+      for (l = 1; l <= kmax; l++) {
+	for (m = 1; m <= kmax; m++) {
+	  sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) +
+	    (m*unitk[2] * m*unitk[2]);
+	  if (sqk <= gsqmx) {
+	    cstr1 = 0.0;
+	    sstr1 = 0.0;
+	    cstr2 = 0.0;
+	    sstr2 = 0.0;
+	    cstr3 = 0.0;
+	    sstr3 = 0.0;
+	    cstr4 = 0.0;
+	    sstr4 = 0.0;
+	    for (i = ifrom; i < ito; i++) {
+	      clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
+	      slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
+	      cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+	      sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+	    
+	      clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
+	      slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
+	      cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+	      sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+	    
+	      clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
+	      slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
+	      cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+	      sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+	    
+	      clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
+	      slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
+	      cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+	      sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+	    }
+	    sfacrl_thr[n] = cstr1;
+	    sfacim_thr[n++] = sstr1;
+	    sfacrl_thr[n] = cstr2;
+	    sfacim_thr[n++] = sstr2;
+	    sfacrl_thr[n] = cstr3;
+	    sfacim_thr[n++] = sstr3;
+	    sfacrl_thr[n] = cstr4;
+	    sfacim_thr[n++] = sstr4;
+	  }
+	}
+      }
+    }
+
+    sync_threads();
+    data_reduce_thr(sfacrl,kmax3d,nthreads,1,tid);
+    data_reduce_thr(sfacim,kmax3d,nthreads,1,tid);
+
+  } // end of parallel region
+}
--- a/src/USER-OMP/ewald_omp.h
+++ b/src/USER-OMP/ewald_omp.h
@ -0,0 +1,42 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef KSPACE_CLASS
+
+KSpaceStyle(ewald/omp,EwaldOMP)
+
+#else
+
+#ifndef LMP_EWALD_OMP_H
+#define LMP_EWALD_OMP_H
+
+#include "ewald.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+  class EwaldOMP : public Ewald, public ThrOMP {
+ public:
+  EwaldOMP(class LAMMPS *, int, char **);
+  virtual ~EwaldOMP() { };
+  virtual void allocate();
+  virtual void compute(int, int);
+
+ protected:
+  virtual void eik_dot_r();
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/fix_omp.cpp
+++ b/src/USER-OMP/fix_omp.cpp
@ -0,0 +1,279 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   OpenMP based threading support for LAMMPS
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_request.h"
+#include "update.h"
+#include "integrate.h"
+#include "min.h"
+
+#include "fix_omp.h"
+#include "thr_data.h"
+#include "thr_omp.h"
+
+#include "pair_hybrid.h"
+#include "bond_hybrid.h"
+#include "angle_hybrid.h"
+#include "dihedral_hybrid.h"
+#include "improper_hybrid.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+using namespace LAMMPS_NS;
+
+static int get_tid()
+{
+  int tid = 0;
+#if defined(_OPENMP)
+  tid = omp_get_thread_num();
+#endif
+  return tid;
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) :  Fix(lmp, narg, arg),
+  thr(NULL), last_omp_style(NULL), _nthr(-1), _neighbor(true), _newton(false)
+{
+  if ((narg < 4) || (narg > 6)) error->all(FLERR,"Illegal fix OMP command");
+  if (strcmp(arg[1],"all") != 0) error->all(FLERR,"Illegal fix OMP command");
+
+  int nthreads = 1;
+  if (narg > 3) {
+#if defined(_OPENMP)
+    if (strcmp(arg[3],"*") == 0)
+#pragma omp parallel default(none) shared(nthreads)
+      nthreads = omp_get_num_threads();
+    else
+      nthreads = atoi(arg[3]);
+#endif
+  }
+
+  if (nthreads < 1)
+    error->all(FLERR,"Illegal number of threads requested.");
+
+  if (nthreads != comm->nthreads) {
+#if defined(_OPENMP)
+    omp_set_num_threads(nthreads);
+#endif
+    comm->nthreads = nthreads;
+    if (comm->me == 0) {
+      if (screen)
+	fprintf(screen,"  reset %d OpenMP thread(s) per MPI task\n", nthreads);
+      if (logfile)
+	fprintf(logfile,"  reset %d OpenMP thread(s) per MPI task\n", nthreads);
+    }
+  }
+
+  if (narg > 4) {
+    if (strcmp(arg[4],"force/neigh") == 0)
+      _neighbor = true;
+    else if (strcmp(arg[4],"force") == 0)
+      _neighbor = false;
+    else
+      error->all(FLERR,"Illegal fix omp mode requested.");
+
+    if (comm->me == 0) {
+      const char * const mode = _neighbor ? "OpenMP capable" : "serial";
+      
+      if (screen)
+	fprintf(screen,"  using %s neighbor list subroutines\n", mode);
+      if (logfile)
+	fprintf(logfile,"  using %s neighbor list subroutines\n", mode);
+    }
+  }
+
+#if 0 /* to be enabled when we can switch between half and full neighbor lists */
+  if (narg > 5) {
+    if (strcmp(arg[5],"neigh/half") == 0)
+      _newton = true;
+    else if (strcmp(arg[5],"neigh/full") == 0)
+	_newton = false;
+    else
+      error->all(FLERR,"Illegal fix OMP command");
+
+    if (comm->me == 0) {
+      const char * const mode = _newton ? "half" : "full";
+      
+      if (screen)
+	fprintf(screen,"  using /omp styles with %s neighbor list builds\n", mode);
+      if (logfile)
+	fprintf(logfile,"  using /omp styles with %s neighbor list builds\n", mode);
+    }
+  }
+#endif
+
+  // allocate list for per thread accumulator manager class instances
+  // and then have each thread create an instance of this class to 
+  // encourage the OS to use storage that is "close" to each thread's CPU.
+  thr = new ThrData *[nthreads];
+  _nthr = nthreads;
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+    const int tid = get_tid();
+    thr[tid] = new ThrData(tid);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixOMP::~FixOMP()
+{
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+    const int tid = get_tid();
+    delete thr[tid];
+  }
+  delete[] thr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixOMP::setmask()
+{
+  int mask = 0;
+  mask |= PRE_FORCE;
+  mask |= PRE_FORCE_RESPA;
+  mask |= MIN_PRE_FORCE;
+  return mask;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixOMP::init()
+{
+  if (strstr(update->integrate_style,"respa") != NULL)
+    error->all(FLERR,"Cannot use r-RESPA with /omp styles");
+
+  int check_hybrid;
+  last_omp_style = NULL;
+  char *last_omp_name = NULL;
+
+// determine which is the last force style with OpenMP
+// support as this is the one that has to reduce the forces
+
+#define CheckStyleForOMP(name)						\
+  check_hybrid = 0;							\
+  if (force->name) {							\
+    if ( (strcmp(force->name ## _style,"hybrid") == 0) ||		\
+         (strcmp(force->name ## _style,"hybrid/overlay") == 0) )	\
+      check_hybrid=1;							\
+    int len = strlen(force->name ## _style);				\
+    char *suffix = force->name ## _style + len - 4;			\
+    if (strcmp(suffix,"/omp") == 0) {					\
+      last_omp_name = force->name ## _style;				\
+      last_omp_style = (void *) force->name;				\
+    }									\
+  }
+
+#define CheckHybridForOMP(name,Class)					\
+  if (check_hybrid) {							\
+    Class ## Hybrid *style = (Class ## Hybrid *) force->name;		\
+    for (int i=0; i < style->nstyles; i++) {				\
+      int len = strlen(style->keywords[i]);				\
+      char *suffix = style->keywords[i] + len - 4;			\
+      if (strcmp(suffix,"/omp") == 0) {					\
+	last_omp_name = force->name ## _style;				\
+	last_omp_style = (void *) force->name;				\
+      }									\
+    }									\
+  }
+
+  CheckStyleForOMP(pair);
+  CheckHybridForOMP(pair,Pair);
+
+  CheckStyleForOMP(bond);
+  CheckHybridForOMP(bond,Bond);
+
+  CheckStyleForOMP(angle);
+  CheckHybridForOMP(angle,Angle);
+
+  CheckStyleForOMP(dihedral);
+  CheckHybridForOMP(dihedral,Dihedral);
+
+  CheckStyleForOMP(improper);
+  CheckHybridForOMP(improper,Improper);
+
+  CheckStyleForOMP(kspace);
+
+#undef CheckStyleForOMP
+#undef CheckHybridForOMP
+
+  set_neighbor_omp();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixOMP::set_neighbor_omp()
+{
+  // select or deselect multi-threaded neighbor
+  // list build depending on setting in package omp.
+  // NOTE: since we are at the top of the list of
+  // fixes, we cannot adjust neighbor lists from
+  // other fixes. those have to be re-implemented
+  // as /omp fix styles. :-(
+
+  const int neigh_omp = _neighbor ? 1 : 0;
+  const int nrequest = neighbor->nrequest;
+
+  for (int i = 0; i < nrequest; ++i)
+    neighbor->requests[i]->omp = neigh_omp;
+}
+
+/* ---------------------------------------------------------------------- */
+
+// adjust size and clear out per thread accumulator arrays
+void FixOMP::pre_force(int)
+{
+  const int nall = atom->nlocal + atom->nghost;
+
+  double **f = atom->f;
+  double **torque = atom->torque;
+  double *erforce = atom->erforce;
+  double *de = atom->de;
+  double *drho = atom->drho;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(f,torque,erforce,de,drho)
+#endif
+  {
+    const int tid = get_tid();
+    thr[tid]->check_tid(tid);
+    thr[tid]->init_force(nall,f,torque,erforce,de,drho);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double FixOMP::memory_usage()
+{
+  double bytes = comm->nthreads * (sizeof(ThrData *) + sizeof(ThrData));
+  bytes += comm->nthreads * thr[0]->memory_usage();
+  
+  return bytes;
+}
--- a/src/USER-OMP/fix_omp.h
+++ b/src/USER-OMP/fix_omp.h
@ -0,0 +1,71 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(OMP,FixOMP)
+
+#else
+
+#ifndef LMP_FIX_OMP_H
+#define LMP_FIX_OMP_H
+
+#include "fix.h"
+
+
+namespace LAMMPS_NS {
+
+class ThrData;
+
+class FixOMP : public Fix {
+  friend class ThrOMP;
+
+ public:
+  FixOMP(class LAMMPS *, int, char **);
+  virtual ~FixOMP();
+  virtual int setmask();
+  virtual void init();
+  virtual void pre_force(int);
+
+  virtual void setup_pre_force(int vflag)           { pre_force(vflag); };
+  virtual void min_setup_pre_force(int vflag)       { pre_force(vflag); };
+  virtual void min_pre_force(int vflag)             { pre_force(vflag); };
+  virtual void setup_pre_force_respa(int vflag,int) { pre_force(vflag); };
+  virtual void pre_force_respa(int vflag,int,int)   { pre_force(vflag); };
+
+  virtual double memory_usage();
+
+  ThrData *get_thr(int tid) { return thr[tid]; };
+  int get_nthr() const { return _nthr; }
+
+ protected:
+  ThrData **thr;
+  void *last_omp_style; // pointer to the style that needs
+						// to do the force reduction
+  
+ public:
+  bool get_neighbor() const {return _neighbor;};
+  bool get_newton() const   {return _newton;};
+
+ private:
+  int  _nthr;     // number of currently active ThrData object
+  bool _neighbor; // en/disable threads for neighbor list construction
+  bool _newton;   // en/disable newton's 3rd law for local atoms.
+
+  void set_neighbor_omp();
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/fix_peri_neigh_omp.cpp
+++ b/src/USER-OMP/fix_peri_neigh_omp.cpp
@ -0,0 +1,50 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Mike Parks (SNL)
+------------------------------------------------------------------------- */
+
+#include "fix_peri_neigh_omp.h"
+#include "fix_omp.h"
+#include "modify.h"
+#include "neighbor.h"
+#include "neigh_request.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+void FixPeriNeighOMP::init()
+{
+  if (!first) return;
+
+  // determine status of neighbor flag of the omp package command
+  int ifix = modify->find_fix("package_omp");
+  int use_omp = 0;
+  if (ifix >=0) {
+     FixOMP * fix = static_cast<FixOMP *>(lmp->modify->fix[ifix]);
+     if (fix->get_neighbor()) use_omp = 1;
+  }
+
+  // need a full neighbor list once
+
+  int irequest = neighbor->request((void *) this);
+  neighbor->requests[irequest]->pair = 0;
+  neighbor->requests[irequest]->fix  = 1;
+  neighbor->requests[irequest]->half = 0; 
+  neighbor->requests[irequest]->full = 1;
+  neighbor->requests[irequest]->omp = use_omp;
+  neighbor->requests[irequest]->occasional = 1;
+}
+
--- a/src/USER-OMP/fix_peri_neigh_omp.h
+++ b/src/USER-OMP/fix_peri_neigh_omp.h
@ -0,0 +1,37 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(PERI_NEIGH_OMP,FixPeriNeighOMP)
+
+#else
+
+#ifndef LMP_FIX_PERI_NEIGH_OMP_H
+#define LMP_FIX_PERI_NEIGH_OMP_H
+
+#include "fix_peri_neigh.h"
+
+namespace LAMMPS_NS {
+
+class FixPeriNeighOMP : public FixPeriNeigh {
+
+ public:
+  FixPeriNeighOMP(class LAMMPS *lmp, int narg, char **argv) : FixPeriNeigh(lmp,narg,argv) {};
+  virtual void init();
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/fix_qeq_comb_omp.cpp
+++ b/src/USER-OMP/fix_qeq_comb_omp.cpp
@ -0,0 +1,160 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include <math.h>
+#include "fix_qeq_comb_omp.h"
+#include "atom.h"
+#include "force.h"
+#include "group.h"
+#include "memory.h"
+#include "error.h"
+#include "respa.h"
+#include "update.h"
+#include "pair_comb_omp.h"
+
+#include <string.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+FixQEQCombOMP::FixQEQCombOMP(LAMMPS *lmp, int narg, char **arg) 
+  : FixQEQComb(lmp, narg, arg)
+{
+  if (narg < 5) error->all(FLERR,"Illegal fix qeq/comb/omp command");
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEQCombOMP::init()
+{
+  if (!atom->q_flag)
+    error->all(FLERR,"Fix qeq/comb/omp requires atom attribute q");
+
+  comb = (PairComb *) force->pair_match("comb/omp",1);
+  if (comb == NULL)
+    comb = (PairComb *) force->pair_match("comb",1);
+  if (comb == NULL) error->all(FLERR,"Must use pair_style comb or comb/omp with fix qeq/comb");
+
+  if (strstr(update->integrate_style,"respa"))
+    nlevels_respa = ((Respa *) update->integrate)->nlevels;
+
+  ngroup = group->count(igroup);
+  if (ngroup == 0) error->all(FLERR,"Fix qeq/comb group has no atoms");
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEQCombOMP::post_force(int vflag)
+{
+  int i,iloop,loopmax;
+  double heatpq,qmass,dtq,dtq2;
+  double enegchkall,enegmaxall;
+
+  if (update->ntimestep % nevery) return;
+
+  // reallocate work arrays if necessary
+  // qf = charge force
+  // q1 = charge displacement
+  // q2 = tmp storage of charge force for next iteration
+
+  if (atom->nmax > nmax) {
+    memory->destroy(qf);
+    memory->destroy(q1);
+    memory->destroy(q2);
+    nmax = atom->nmax;
+    memory->create(qf,nmax,"qeq:qf");
+    memory->create(q1,nmax,"qeq:q1");
+    memory->create(q2,nmax,"qeq:q2");
+    vector_atom = qf;
+  }
+
+  // more loops for first-time charge equilibrium
+
+  iloop = 0; 
+  if (firstflag) loopmax = 5000;
+  else loopmax = 2000;
+
+  // charge-equilibration loop
+
+  if (me == 0 && fp)
+    fprintf(fp,"Charge equilibration on step " BIGINT_FORMAT "\n",
+	    update->ntimestep);
+
+  heatpq = 0.05;
+  qmass  = 0.000548580;
+  dtq    = 0.0006;
+  dtq2   = 0.5*dtq*dtq/qmass;
+
+  double enegchk = 0.0;
+  double enegtot = 0.0; 
+  double enegmax = 0.0;
+
+  double *q = atom->q;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++)
+    q1[i] = q2[i] = qf[i] = 0.0;
+
+  for (iloop = 0; iloop < loopmax; iloop ++ ) {
+    for (i = 0; i < nlocal; i++)
+      if (mask[i] & groupbit) {
+	q1[i] += qf[i]*dtq2 - heatpq*q1[i];
+	q[i]  += q1[i]; 
+      }
+
+    enegtot = comb->yasu_char(qf,igroup);
+    enegtot /= ngroup;
+    enegchk = enegmax = 0.0;
+
+    for (i = 0; i < nlocal ; i++)
+      if (mask[i] & groupbit) {
+	q2[i] = enegtot-qf[i];
+	enegmax = MAX(enegmax,fabs(q2[i]));
+	enegchk += fabs(q2[i]);
+	qf[i] = q2[i];
+      }
+
+    MPI_Allreduce(&enegchk,&enegchkall,1,MPI_DOUBLE,MPI_SUM,world);
+    enegchk = enegchkall/ngroup;
+    MPI_Allreduce(&enegmax,&enegmaxall,1,MPI_DOUBLE,MPI_MAX,world);
+    enegmax = enegmaxall;
+
+    if (enegchk <= precision && enegmax <= 100.0*precision) break;
+
+    if (me == 0 && fp)
+      fprintf(fp,"  iteration: %d, enegtot %.6g, "
+	      "enegmax %.6g, fq deviation: %.6g\n",
+	      iloop,enegtot,enegmax,enegchk); 
+
+    for (i = 0; i < nlocal; i++)
+      if (mask[i] & groupbit)
+	q1[i] += qf[i]*dtq2 - heatpq*q1[i]; 
+  } 
+
+  if (me == 0 && fp) {
+    if (iloop == loopmax)
+      fprintf(fp,"Charges did not converge in %d iterations\n",iloop);
+    else
+      fprintf(fp,"Charges converged in %d iterations to %.10f tolerance\n",
+	      iloop,enegchk);
+  }
+}
+
--- a/src/USER-OMP/fix_qeq_comb_omp.h
+++ b/src/USER-OMP/fix_qeq_comb_omp.h
@ -0,0 +1,32 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(qeq/comb/omp,FixQEQCombOMP)
+
+#else
+
+#ifndef LMP_FIX_QEQ_COMB_OMP_H
+#define LMP_FIX_QEQ_COMB_OMP_H
+
+#include "fix_qeq_comb.h"
+
+namespace LAMMPS_NS {
+
+class FixQEQCombOMP : public FixQEQComb {
+ public:
+  FixQEQCombOMP(class LAMMPS *, int, char **);
+  virtual void init();
+  virtual void post_force(int);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/fix_wall_gran_omp.cpp
+++ b/src/USER-OMP/fix_wall_gran_omp.cpp
@ -0,0 +1,152 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "fix_wall_gran_omp.h"
+#include "atom.h"
+#include "update.h"
+
+using namespace LAMMPS_NS;
+
+
+enum{XPLANE,YPLANE,ZPLANE,ZCYLINDER};    // XYZ PLANE need to be 0,1,2
+enum{HOOKE,HOOKE_HISTORY,HERTZ_HISTORY};
+
+#define BIG 1.0e20
+
+/* ---------------------------------------------------------------------- */
+
+FixWallGranOMP::FixWallGranOMP(LAMMPS *lmp, int narg, char **arg) :
+  FixWallGran(lmp, narg, arg) { }
+
+/* ---------------------------------------------------------------------- */
+
+void FixWallGranOMP::post_force(int vflag)
+{
+  double vwall[3];
+
+  // set position of wall to initial settings and velocity to 0.0
+  // if wiggle or shear, set wall position and velocity accordingly
+
+  double wlo = lo;
+  double whi = hi;
+  vwall[0] = vwall[1] = vwall[2] = 0.0;
+  if (wiggle) {
+    double arg = omega * (update->ntimestep - time_origin) * dt;
+    if (wallstyle == axis) {
+      wlo = lo + amplitude - amplitude*cos(arg);
+      whi = hi + amplitude - amplitude*cos(arg);
+    }
+    vwall[axis] = amplitude*omega*sin(arg);
+  } else if (wshear) vwall[axis] = vshear;
+
+  // loop over all my atoms
+  // rsq = distance from wall
+  // dx,dy,dz = signed distance from wall
+  // for rotating cylinder, reset vwall based on particle position
+  // skip atom if not close enough to wall
+  //   if wall was set to NULL, it's skipped since lo/hi are infinity
+  // compute force and torque on atom if close enough to wall
+  //   via wall potential matched to pair potential
+  // set shear if pair potential stores history
+
+  double * const * const x = atom->x;
+  double * const * const v = atom->v;
+  double * const * const f = atom->f;
+  double * const * const omega = atom->omega;
+  double * const * const torque = atom->torque;
+  double * const radius = atom->radius;
+  double * const rmass = atom->rmass;
+  const int * const mask = atom->mask;
+  const int nlocal = atom->nlocal;
+
+  if (update->ntimestep > laststep) shearupdate = 1;
+  else shearupdate = 0;
+
+  int i;
+#if defined(_OPENMP)
+#pragma omp parallel for private(i) default(none) firstprivate(vwall,wlo,whi)
+#endif  
+  for (i = 0; i < nlocal; i++) {
+
+    if (mask[i] & groupbit) {
+      double dx,dy,dz,del1,del2,delxy,delr,rsq;
+
+      dx = dy = dz = 0.0;
+
+      if (wallstyle == XPLANE) {
+	del1 = x[i][0] - wlo;
+	del2 = whi - x[i][0];
+	if (del1 < del2) dx = del1;
+	else dx = -del2;
+      } else if (wallstyle == YPLANE) {
+	del1 = x[i][1] - wlo;
+	del2 = whi - x[i][1];
+	if (del1 < del2) dy = del1;
+	else dy = -del2;
+      } else if (wallstyle == ZPLANE) {
+	del1 = x[i][2] - wlo;
+	del2 = whi - x[i][2];
+	if (del1 < del2) dz = del1;
+	else dz = -del2;
+      } else if (wallstyle == ZCYLINDER) {
+        delxy = sqrt(x[i][0]*x[i][0] + x[i][1]*x[i][1]);
+	delr = cylradius - delxy;
+	if (delr > radius[i]) dz = cylradius;
+	else {
+	  dx = -delr/delxy * x[i][0];
+	  dy = -delr/delxy * x[i][1];
+	  if (wshear && axis != 2) {
+	    vwall[0] = vshear * x[i][1]/delxy;
+	    vwall[1] = -vshear * x[i][0]/delxy;
+	    vwall[2] = 0.0;
+	  }
+	}
+      }
+
+      rsq = dx*dx + dy*dy + dz*dz;
+
+      if (rsq > radius[i]*radius[i]) {
+	if (pairstyle != HOOKE) {
+	  shear[i][0] = 0.0;
+	  shear[i][1] = 0.0;
+	  shear[i][2] = 0.0;
+	}
+      } else {
+	if (pairstyle == HOOKE)
+	  hooke(rsq,dx,dy,dz,vwall,v[i],f[i],omega[i],torque[i],
+		radius[i],rmass[i]);
+	else if (pairstyle == HOOKE_HISTORY)
+	  hooke_history(rsq,dx,dy,dz,vwall,v[i],f[i],omega[i],torque[i],
+			radius[i],rmass[i],shear[i]);
+	else if (pairstyle == HERTZ_HISTORY)
+	  hertz_history(rsq,dx,dy,dz,vwall,v[i],f[i],omega[i],torque[i],
+			radius[i],rmass[i],shear[i]);
+      }
+    }
+  }
+
+  laststep = update->ntimestep;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixWallGranOMP::post_force_respa(int vflag, int ilevel, int iloop)
+{
+  if (ilevel == nlevels_respa-1) post_force(vflag);
+}
+
--- a/src/USER-OMP/fix_wall_gran_omp.h
+++ b/src/USER-OMP/fix_wall_gran_omp.h
@ -0,0 +1,38 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(wall/gran/omp,FixWallGranOMP)
+
+#else
+
+#ifndef LMP_FIX_WALL_GRAN_OMP_H
+#define LMP_FIX_WALL_GRAN_OMP_H
+
+#include "fix_wall_gran.h"
+
+namespace LAMMPS_NS {
+
+class FixWallGranOMP : public FixWallGran {
+
+ public:
+  FixWallGranOMP(class LAMMPS *, int, char **);
+  virtual void post_force(int);
+  virtual void post_force_respa(int, int, int);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/improper_class2_omp.cpp
+++ b/src/USER-OMP/improper_class2_omp.cpp
@ -0,0 +1,697 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "improper_class2_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "force.h"
+#include "update.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define TOLERANCE 0.05
+#define SMALL     0.001
+
+/* ---------------------------------------------------------------------- */
+
+void ImproperClass2OMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nimproperlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <const int EVFLAG, const int EFLAG, const int NEWTON_BOND>
+void ImproperClass2OMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,i4,i,j,k,n,type;
+  double eimproper;
+  double delr[3][3],rmag[3],rinvmag[3],rmag2[3];
+  double theta[3],costheta[3],sintheta[3];
+  double cossqtheta[3],sinsqtheta[3],invstheta[3];
+  double rABxrCB[3],rDBxrAB[3],rCBxrDB[3];
+  double ddelr[3][4],dr[3][4][3],dinvr[3][4][3];
+  double dthetadr[3][4][3],dinvsth[3][4][3];
+  double dinv3r[4][3],dinvs3r[3][4][3];
+  double drCBxrDB[3],rCBxdrDB[3],drDBxrAB[3],rDBxdrAB[3];
+  double drABxrCB[3],rABxdrCB[3];
+  double dot1,dot2,dd[3];
+  double fdot[3][4][3],ftmp,invs3r[3],inv3r;
+  double t,tt1,tt3,sc1;
+  double dotCBDBAB,dotDBABCB,dotABCBDB;
+  double chi,deltachi,d2chi,cossin2;
+  double drAB[3][4][3],drCB[3][4][3],drDB[3][4][3];
+  double dchi[3][4][3],dtotalchi[4][3];
+  double schiABCD,chiABCD,schiCBDA,chiCBDA,schiDBAC,chiDBAC;
+  double fabcd[4][3];
+
+  eimproper = 0.0;
+ 
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const improperlist = neighbor->improperlist;
+  const int nlocal = atom->nlocal;
+
+  for (i = 0; i < 3; i++)
+    for (j = 0; j < 4; j++)
+      for (k = 0; k < 3; k++) {
+	dthetadr[i][j][k] = 0.0;
+	drAB[i][j][k] = 0.0;
+	drCB[i][j][k] = 0.0;
+	drDB[i][j][k] = 0.0;
+      }
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = improperlist[n][0];
+    i2 = improperlist[n][1];
+    i3 = improperlist[n][2];
+    i4 = improperlist[n][3];
+    type = improperlist[n][4];
+
+    if (k0[type] == 0.0) continue;
+
+    // difference vectors
+
+    delr[0][0] = x[i1][0] - x[i2][0];
+    delr[0][1] = x[i1][1] - x[i2][1];
+    delr[0][2] = x[i1][2] - x[i2][2];
+    domain->minimum_image(delr[0]);
+
+    delr[1][0] = x[i3][0] - x[i2][0];
+    delr[1][1] = x[i3][1] - x[i2][1];
+    delr[1][2] = x[i3][2] - x[i2][2];
+    domain->minimum_image(delr[1]);
+
+    delr[2][0] = x[i4][0] - x[i2][0];
+    delr[2][1] = x[i4][1] - x[i2][1];
+    delr[2][2] = x[i4][2] - x[i2][2];
+    domain->minimum_image(delr[2]);
+
+    // bond lengths and associated values
+
+    for (i = 0; i < 3; i++) {
+      rmag2[i] = delr[i][0]*delr[i][0] + delr[i][1]*delr[i][1] + 
+	delr[i][2]*delr[i][2];
+      rmag[i] = sqrt(rmag2[i]);
+      rinvmag[i] = 1.0/rmag[i];
+    }
+
+    // angle ABC, CBD, ABD
+
+    costheta[0] = (delr[0][0]*delr[1][0] + delr[0][1]*delr[1][1] +  
+		   delr[0][2]*delr[1][2]) / (rmag[0]*rmag[1]);
+    costheta[1] = (delr[1][0]*delr[2][0] + delr[1][1]*delr[2][1] + 
+		   delr[1][2]*delr[2][2]) / (rmag[1]*rmag[2]);
+    costheta[2] = (delr[0][0]*delr[2][0] + delr[0][1]*delr[2][1] + 
+		   delr[0][2]*delr[2][2]) / (rmag[0]*rmag[2]);
+
+    // angle error check
+
+    for (i = 0; i < 3; i++) {
+      if (costheta[i] == -1.0) {
+	int me = comm->me;
+	if (screen) {
+	  char str[128];
+	  sprintf(str,
+		  "Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
+		  me, thr->get_tid(),update->ntimestep,
+		  atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	  error->warning(FLERR,str,0);
+	  fprintf(screen,"  1st atom: %d %g %g %g\n",
+		  me,x[i1][0],x[i1][1],x[i1][2]);
+	  fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		  me,x[i2][0],x[i2][1],x[i2][2]);
+	  fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		  me,x[i3][0],x[i3][1],x[i3][2]);
+	  fprintf(screen,"  4th atom: %d %g %g %g\n",
+		  me,x[i4][0],x[i4][1],x[i4][2]);
+	}
+      }
+    }
+
+    for (i = 0; i < 3; i++) {
+      if (costheta[i] > 1.0)  costheta[i] = 1.0;
+      if (costheta[i] < -1.0) costheta[i] = -1.0;
+      theta[i] = acos(costheta[i]);
+      cossqtheta[i] = costheta[i]*costheta[i];
+      sintheta[i] = sin(theta[i]);
+      invstheta[i] = 1.0/sintheta[i];
+      sinsqtheta[i] = sintheta[i]*sintheta[i];
+    }
+
+    // cross & dot products
+
+    cross(delr[0],delr[1],rABxrCB);
+    cross(delr[2],delr[0],rDBxrAB);
+    cross(delr[1],delr[2],rCBxrDB);
+
+    dotCBDBAB = dot(rCBxrDB,delr[0]);
+    dotDBABCB = dot(rDBxrAB,delr[1]);
+    dotABCBDB = dot(rABxrCB,delr[2]);
+
+    t = rmag[0] * rmag[1] * rmag[2];
+    inv3r = 1.0/t;
+    invs3r[0] = invstheta[1] * inv3r;
+    invs3r[1] = invstheta[2] * inv3r;
+    invs3r[2] = invstheta[0] * inv3r;
+
+    // chi ABCD, CBDA, DBAC
+    // final chi is average of three
+
+    schiABCD = dotCBDBAB * invs3r[0];
+    chiABCD = asin(schiABCD);
+    schiCBDA = dotDBABCB * invs3r[1];
+    chiCBDA = asin(schiCBDA);
+    schiDBAC = dotABCBDB * invs3r[2];
+    chiDBAC = asin(schiDBAC);
+
+    chi = (chiABCD + chiCBDA + chiDBAC) / 3.0;
+    deltachi = chi - chi0[type];
+    d2chi = deltachi * deltachi;
+
+    // energy
+
+    if (EFLAG) eimproper = k0[type]*d2chi;
+
+    // forces
+    // define d(delr)
+    // i = bond AB/CB/DB, j = atom A/B/C/D
+
+    ddelr[0][0] = 1.0;
+    ddelr[0][1] = -1.0;
+    ddelr[0][2] = 0.0;
+    ddelr[0][3] = 0.0;
+    ddelr[1][0] = 0.0;
+    ddelr[1][1] = -1.0;
+    ddelr[1][2] = 1.0;
+    ddelr[1][3] = 0.0;
+    ddelr[2][0] = 0.0;
+    ddelr[2][1] = -1.0;
+    ddelr[2][2] = 0.0;
+    ddelr[2][3] = 1.0;
+
+    // compute d(|r|)/dr and d(1/|r|)/dr for each direction, bond and atom
+    // define d(r) for each r
+    // i = bond AB/CB/DB, j = atom A/B/C/D, k = X/Y/Z
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++)
+	for (k = 0; k < 3; k++) {
+	  dr[i][j][k] = delr[i][k] * ddelr[i][j] / rmag[i];
+	  dinvr[i][j][k] = -dr[i][j][k] / rmag2[i];
+	}
+
+    // compute d(1 / (|r_AB| * |r_CB| * |r_DB|) / dr
+    // i = atom A/B/C/D, j = X/Y/Z
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	dinv3r[i][j] = rinvmag[1] * (rinvmag[2] * dinvr[0][i][j] +
+				     rinvmag[0] * dinvr[2][i][j]) +
+	  rinvmag[2] * rinvmag[0] * dinvr[1][i][j];
+
+    // compute d(theta)/d(r) for 3 angles
+    // angleABC
+
+    tt1 = costheta[0] / rmag2[0];
+    tt3 = costheta[0] / rmag2[1];
+    sc1 = 1.0 / sqrt(1.0 - cossqtheta[0]);
+
+    dthetadr[0][0][0] = sc1 * ((tt1 * delr[0][0]) - 
+			       (delr[1][0] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][0][1] = sc1 * ((tt1 * delr[0][1]) - 
+			       (delr[1][1] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][0][2] = sc1 * ((tt1 * delr[0][2]) - 
+			       (delr[1][2] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][1][0] = -sc1 * ((tt1 * delr[0][0]) - 
+				(delr[1][0] * rinvmag[0] * rinvmag[1]) +
+				(tt3 * delr[1][0]) - 
+				(delr[0][0] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][1][1] = -sc1 * ((tt1 * delr[0][1]) - 
+				(delr[1][1] * rinvmag[0] * rinvmag[1]) +
+				(tt3 * delr[1][1]) - 
+				(delr[0][1] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][1][2] = -sc1 * ((tt1 * delr[0][2]) - 
+				(delr[1][2] * rinvmag[0] * rinvmag[1]) +
+				(tt3 * delr[1][2]) - 
+				(delr[0][2] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][2][0] = sc1 * ((tt3 * delr[1][0]) - 
+			       (delr[0][0] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][2][1] = sc1 * ((tt3 * delr[1][1]) - 
+			       (delr[0][1] * rinvmag[0] * rinvmag[1]));
+    dthetadr[0][2][2] = sc1 * ((tt3 * delr[1][2]) - 
+			       (delr[0][2] * rinvmag[0] * rinvmag[1]));
+
+    // angleCBD
+
+    tt1 = costheta[1] / rmag2[1];
+    tt3 = costheta[1] / rmag2[2];
+    sc1 = 1.0 / sqrt(1.0 - cossqtheta[1]);
+
+    dthetadr[1][2][0] = sc1 * ((tt1 * delr[1][0]) - 
+			       (delr[2][0] * rinvmag[1] * rinvmag[2]));
+    dthetadr[1][2][1] = sc1 * ((tt1 * delr[1][1]) - 
+			       (delr[2][1] * rinvmag[1] * rinvmag[2]));
+    dthetadr[1][2][2] = sc1 * ((tt1 * delr[1][2]) - 
+			       (delr[2][2] * rinvmag[1] * rinvmag[2]));
+    dthetadr[1][1][0] = -sc1 * ((tt1 * delr[1][0]) - 
+				(delr[2][0] * rinvmag[1] * rinvmag[2]) +
+				(tt3 * delr[2][0]) - 
+				(delr[1][0] * rinvmag[2] * rinvmag[1]));
+    dthetadr[1][1][1] = -sc1 * ((tt1 * delr[1][1]) - 
+				(delr[2][1] * rinvmag[1] * rinvmag[2]) +
+				(tt3 * delr[2][1]) - 
+				(delr[1][1] * rinvmag[2] * rinvmag[1]));
+    dthetadr[1][1][2] = -sc1 * ((tt1 * delr[1][2]) - 
+				(delr[2][2] * rinvmag[1] * rinvmag[2]) +
+				(tt3 * delr[2][2]) - 
+				(delr[1][2] * rinvmag[2] * rinvmag[1]));
+    dthetadr[1][3][0] = sc1 * ((tt3 * delr[2][0]) - 
+			       (delr[1][0] * rinvmag[2] * rinvmag[1]));
+    dthetadr[1][3][1] = sc1 * ((tt3 * delr[2][1]) - 
+			       (delr[1][1] * rinvmag[2] * rinvmag[1]));
+    dthetadr[1][3][2] = sc1 * ((tt3 * delr[2][2]) - 
+			       (delr[1][2] * rinvmag[2] * rinvmag[1]));
+
+    // angleABD
+
+    tt1 = costheta[2] / rmag2[0];
+    tt3 = costheta[2] / rmag2[2];
+    sc1 = 1.0 / sqrt(1.0 - cossqtheta[2]);
+
+    dthetadr[2][0][0] = sc1 * ((tt1 * delr[0][0]) - 
+			       (delr[2][0] * rinvmag[0] * rinvmag[2]));
+    dthetadr[2][0][1] = sc1 * ((tt1 * delr[0][1]) - 
+			       (delr[2][1] * rinvmag[0] * rinvmag[2]));
+    dthetadr[2][0][2] = sc1 * ((tt1 * delr[0][2]) - 
+			       (delr[2][2] * rinvmag[0] * rinvmag[2]));
+    dthetadr[2][1][0] = -sc1 * ((tt1 * delr[0][0]) - 
+				(delr[2][0] * rinvmag[0] * rinvmag[2]) +
+				(tt3 * delr[2][0]) - 
+				(delr[0][0] * rinvmag[2] * rinvmag[0]));
+    dthetadr[2][1][1] = -sc1 * ((tt1 * delr[0][1]) - 
+				(delr[2][1] * rinvmag[0] * rinvmag[2]) +
+				(tt3 * delr[2][1]) - 
+				(delr[0][1] * rinvmag[2] * rinvmag[0]));
+    dthetadr[2][1][2] = -sc1 * ((tt1 * delr[0][2]) - 
+				(delr[2][2] * rinvmag[0] * rinvmag[2]) +
+				(tt3 * delr[2][2]) - 
+				(delr[0][2] * rinvmag[2] * rinvmag[0]));
+    dthetadr[2][3][0] = sc1 * ((tt3 * delr[2][0]) - 
+			       (delr[0][0] * rinvmag[2] * rinvmag[0]));
+    dthetadr[2][3][1] = sc1 * ((tt3 * delr[2][1]) - 
+			       (delr[0][1] * rinvmag[2] * rinvmag[0]));
+    dthetadr[2][3][2] = sc1 * ((tt3 * delr[2][2]) - 
+			       (delr[0][2] * rinvmag[2] * rinvmag[0]));
+
+    // compute d( 1 / sin(theta))/dr
+    // i = angle, j = atom, k = direction
+
+    for (i = 0; i < 3; i++) {
+      cossin2 = -costheta[i] / sinsqtheta[i];
+      for (j = 0; j < 4; j++)
+	for (k = 0; k < 3; k++)
+	  dinvsth[i][j][k] = cossin2 * dthetadr[i][j][k];
+    }
+
+    // compute d(1 / sin(theta) * |r_AB| * |r_CB| * |r_DB|)/dr
+    // i = angle, j = atom
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++) {
+	dinvs3r[0][i][j] = (invstheta[1] * dinv3r[i][j]) +
+	  (inv3r * dinvsth[1][i][j]);
+	dinvs3r[1][i][j] = (invstheta[2] * dinv3r[i][j]) +
+	  (inv3r * dinvsth[2][i][j]);
+	dinvs3r[2][i][j] = (invstheta[0] * dinv3r[i][j]) +
+	  (inv3r * dinvsth[0][i][j]);
+      }
+
+    // drCB(i,j,k), etc
+    // i = vector X'/Y'/Z', j = atom A/B/C/D, k = direction X/Y/Z
+
+    for (i = 0; i < 3; i++) {
+      drCB[i][1][i] = -1.0;
+      drAB[i][1][i] = -1.0;
+      drDB[i][1][i] = -1.0;
+      drDB[i][3][i] = 1.0;
+      drCB[i][2][i] = 1.0;
+      drAB[i][0][i] = 1.0;
+    }
+
+    // d((r_CB x r_DB) dot r_AB)
+    // r_CB x d(r_DB)
+    // d(r_CB) x r_DB
+    // (r_CB x d(r_DB)) + (d(r_CB) x r_DB)
+    // (r_CB x d(r_DB)) + (d(r_CB) x r_DB) dot r_AB
+    // d(r_AB) dot (r_CB x r_DB)
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++) {
+	cross(delr[1],drDB[i][j],rCBxdrDB);
+	cross(drCB[i][j],delr[2],drCBxrDB);
+	for (k = 0; k < 3; k++) dd[k] = rCBxdrDB[k] + drCBxrDB[k];
+	dot1 = dot(dd,delr[0]);
+	dot2 = dot(rCBxrDB,drAB[i][j]);
+	fdot[0][j][i] = dot1 + dot2;
+      }
+
+    // d((r_DB x r_AB) dot r_CB)
+    // r_DB x d(r_AB)
+    // d(r_DB) x r_AB
+    // (r_DB x d(r_AB)) + (d(r_DB) x r_AB)
+    // (r_DB x d(r_AB)) + (d(r_DB) x r_AB) dot r_CB
+    // d(r_CB) dot (r_DB x r_AB)
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++) {
+	cross(delr[2],drAB[i][j],rDBxdrAB);
+	cross(drDB[i][j],delr[0],drDBxrAB);
+	for (k = 0; k < 3; k++) dd[k] = rDBxdrAB[k] + drDBxrAB[k];
+	dot1 = dot(dd,delr[1]);
+	dot2 = dot(rDBxrAB,drCB[i][j]);
+	fdot[1][j][i] = dot1 + dot2;
+      }
+
+    // d((r_AB x r_CB) dot r_DB)
+    // r_AB x d(r_CB)
+    // d(r_AB) x r_CB
+    // (r_AB x d(r_CB)) + (d(r_AB) x r_CB)
+    // (r_AB x d(r_CB)) + (d(r_AB) x r_CB) dot r_DB
+    // d(r_DB) dot (r_AB x r_CB)
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++) {
+	cross(delr[0],drCB[i][j],rABxdrCB);
+	cross(drAB[i][j],delr[1],drABxrCB);
+	for (k = 0; k < 3; k++) dd[k] = rABxdrCB[k] + drABxrCB[k];
+	dot1 = dot(dd,delr[2]);
+	dot2 = dot(rABxrCB,drDB[i][j]);
+	fdot[2][j][i] = dot1 + dot2;
+      }
+
+    // force on each atom
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++) {
+	ftmp = (fdot[0][i][j] * invs3r[0]) + 
+	  (dinvs3r[0][i][j] * dotCBDBAB);
+	dchi[0][i][j] = ftmp / cos(chiABCD);
+	ftmp = (fdot[1][i][j] * invs3r[1]) + 
+	  (dinvs3r[1][i][j] * dotDBABCB);
+	dchi[1][i][j] = ftmp / cos(chiCBDA);
+	ftmp = (fdot[2][i][j] * invs3r[2]) + 
+	  (dinvs3r[2][i][j] * dotABCBDB);
+	dchi[2][i][j] = ftmp / cos(chiDBAC);
+	dtotalchi[i][j] = (dchi[0][i][j]+dchi[1][i][j]+dchi[2][i][j]) / 3.0;
+      }
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] = -2.0*k0[type] * deltachi*dtotalchi[i][j];
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += fabcd[0][0];
+      f[i1][1] += fabcd[0][1];
+      f[i1][2] += fabcd[0][2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += fabcd[1][0];
+      f[i2][1] += fabcd[1][1];
+      f[i2][2] += fabcd[1][2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += fabcd[2][0];
+      f[i3][1] += fabcd[2][1];
+      f[i3][2] += fabcd[2][2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += fabcd[3][0];
+      f[i4][1] += fabcd[3][1];
+      f[i4][2] += fabcd[3][2];
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,
+		   fabcd[0],fabcd[2],fabcd[3],
+		   delr[0][0],delr[0][1],delr[0][2],
+		   delr[1][0],delr[1][1],delr[1][2],
+		   delr[2][0]-delr[1][0],delr[2][1]-delr[1][1],
+		   delr[2][2]-delr[1][2],thr);
+  }
+
+  // compute angle-angle interactions
+  angleangle_thr<EVFLAG, EFLAG, NEWTON_BOND>(nfrom,nto,thr);
+}
+
+/* ----------------------------------------------------------------------
+   angle-angle interactions within improper
+------------------------------------------------------------------------- */
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void ImproperClass2OMP::angleangle_thr(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,i4,i,j,k,n,type;
+  double eimproper;
+  double delxAB,delyAB,delzAB,rABmag2,rAB;
+  double delxBC,delyBC,delzBC,rBCmag2,rBC;
+  double delxBD,delyBD,delzBD,rBDmag2,rBD;
+  double costhABC,thetaABC,costhABD;
+  double thetaABD,costhCBD,thetaCBD,dthABC,dthCBD,dthABD;
+  double sc1,t1,t3,r12;
+  double dthetadr[3][4][3],fabcd[4][3];
+
+  eimproper = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const improperlist = neighbor->improperlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = improperlist[n][0];
+    i2 = improperlist[n][1];
+    i3 = improperlist[n][2];
+    i4 = improperlist[n][3];
+    type = improperlist[n][4];
+
+    // difference vectors
+
+    delxAB = x[i1][0] - x[i2][0];
+    delyAB = x[i1][1] - x[i2][1];
+    delzAB = x[i1][2] - x[i2][2];
+    domain->minimum_image(delxAB,delyAB,delzAB);
+
+    delxBC = x[i3][0] - x[i2][0];
+    delyBC = x[i3][1] - x[i2][1];
+    delzBC = x[i3][2] - x[i2][2];
+    domain->minimum_image(delxBC,delyBC,delzBC);
+
+    delxBD = x[i4][0] - x[i2][0];
+    delyBD = x[i4][1] - x[i2][1];
+    delzBD = x[i4][2] - x[i2][2];
+    domain->minimum_image(delxBD,delyBD,delzBD);
+
+    // bond lengths
+
+    rABmag2 = delxAB*delxAB + delyAB*delyAB + delzAB*delzAB;
+    rAB = sqrt(rABmag2);
+    rBCmag2 = delxBC*delxBC + delyBC*delyBC + delzBC*delzBC;
+    rBC = sqrt(rBCmag2);
+    rBDmag2 = delxBD*delxBD + delyBD*delyBD + delzBD*delzBD;
+    rBD = sqrt(rBDmag2);
+        
+    // angle ABC, ABD, CBD
+
+    costhABC = (delxAB*delxBC + delyAB*delyBC + delzAB*delzBC) / (rAB * rBC);
+    if (costhABC > 1.0)  costhABC = 1.0;
+    if (costhABC < -1.0) costhABC = -1.0;
+    thetaABC = acos(costhABC);
+
+    costhABD = (delxAB*delxBD + delyAB*delyBD + delzAB*delzBD) / (rAB * rBD);
+    if (costhABD > 1.0)  costhABD = 1.0;
+    if (costhABD < -1.0) costhABD = -1.0;
+    thetaABD = acos(costhABD);
+
+    costhCBD = (delxBC*delxBD + delyBC*delyBD + delzBC*delzBD) /(rBC * rBD);
+    if (costhCBD > 1.0)  costhCBD = 1.0;
+    if (costhCBD < -1.0) costhCBD = -1.0;
+    thetaCBD = acos(costhCBD);
+
+    dthABC = thetaABC - aa_theta0_1[type];
+    dthABD = thetaABD - aa_theta0_2[type];
+    dthCBD = thetaCBD - aa_theta0_3[type];
+
+    // energy
+
+    if (EFLAG) eimproper = aa_k2[type] * dthABC * dthABD + 
+		 aa_k1[type] * dthABC * dthCBD +
+		 aa_k3[type] * dthABD * dthCBD;
+
+    // d(theta)/d(r) array
+    // angle i, atom j, coordinate k
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++)
+	for (k = 0; k < 3; k++)
+	  dthetadr[i][j][k] = 0.0;
+
+    // angle ABC
+
+    sc1 = sqrt(1.0/(1.0 - costhABC*costhABC));
+    t1 = costhABC / rABmag2;
+    t3 = costhABC / rBCmag2;
+    r12 = 1.0 / (rAB * rBC);
+
+    dthetadr[0][0][0] = sc1 * ((t1 * delxAB) - (delxBC * r12));
+    dthetadr[0][0][1] = sc1 * ((t1 * delyAB) - (delyBC * r12));
+    dthetadr[0][0][2] = sc1 * ((t1 * delzAB) - (delzBC * r12));
+    dthetadr[0][1][0] = sc1 * ((-t1 * delxAB) + (delxBC * r12) +
+			       (-t3 * delxBC) + (delxAB * r12));
+    dthetadr[0][1][1] = sc1 * ((-t1 * delyAB) + (delyBC * r12) +
+			       (-t3 * delyBC) + (delyAB * r12));
+    dthetadr[0][1][2] = sc1 * ((-t1 * delzAB) + (delzBC * r12) +
+			       (-t3 * delzBC) + (delzAB * r12));
+    dthetadr[0][2][0] = sc1 * ((t3 * delxBC) - (delxAB * r12));
+    dthetadr[0][2][1] = sc1 * ((t3 * delyBC) - (delyAB * r12));
+    dthetadr[0][2][2] = sc1 * ((t3 * delzBC) - (delzAB * r12));
+
+    // angle CBD
+
+    sc1 = sqrt(1.0/(1.0 - costhCBD*costhCBD));
+    t1 = costhCBD / rBCmag2;
+    t3 = costhCBD / rBDmag2;
+    r12 = 1.0 / (rBC * rBD);
+
+    dthetadr[1][2][0] = sc1 * ((t1 * delxBC) - (delxBD * r12));
+    dthetadr[1][2][1] = sc1 * ((t1 * delyBC) - (delyBD * r12));
+    dthetadr[1][2][2] = sc1 * ((t1 * delzBC) - (delzBD * r12));
+    dthetadr[1][1][0] = sc1 * ((-t1 * delxBC) + (delxBD * r12) +
+			       (-t3 * delxBD) + (delxBC * r12));
+    dthetadr[1][1][1] = sc1 * ((-t1 * delyBC) + (delyBD * r12) +
+			       (-t3 * delyBD) + (delyBC * r12));
+    dthetadr[1][1][2] = sc1 * ((-t1 * delzBC) + (delzBD * r12) +
+			       (-t3 * delzBD) + (delzBC * r12));
+    dthetadr[1][3][0] = sc1 * ((t3 * delxBD) - (delxBC * r12));
+    dthetadr[1][3][1] = sc1 * ((t3 * delyBD) - (delyBC * r12));
+    dthetadr[1][3][2] = sc1 * ((t3 * delzBD) - (delzBC * r12));
+
+    // angle ABD
+
+    sc1 = sqrt(1.0/(1.0 - costhABD*costhABD));
+    t1 = costhABD / rABmag2;
+    t3 = costhABD / rBDmag2;
+    r12 = 1.0 / (rAB * rBD);
+
+    dthetadr[2][0][0] = sc1 * ((t1 * delxAB) - (delxBD * r12));
+    dthetadr[2][0][1] = sc1 * ((t1 * delyAB) - (delyBD * r12));
+    dthetadr[2][0][2] = sc1 * ((t1 * delzAB) - (delzBD * r12));
+    dthetadr[2][1][0] = sc1 * ((-t1 * delxAB) + (delxBD * r12) +
+			       (-t3 * delxBD) + (delxAB * r12));
+    dthetadr[2][1][1] = sc1 * ((-t1 * delyAB) + (delyBD * r12) +
+			       (-t3 * delyBD) + (delyAB * r12));
+    dthetadr[2][1][2] = sc1 * ((-t1 * delzAB) + (delzBD * r12) +
+			       (-t3 * delzBD) + (delzAB * r12));
+    dthetadr[2][3][0] = sc1 * ((t3 * delxBD) - (delxAB * r12));
+    dthetadr[2][3][1] = sc1 * ((t3 * delyBD) - (delyAB * r12));
+    dthetadr[2][3][2] = sc1 * ((t3 * delzBD) - (delzAB * r12));
+
+    // angleangle forces
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] = - 
+	  ((aa_k1[type] * 
+	    (dthABC*dthetadr[1][i][j] + dthCBD*dthetadr[0][i][j])) +
+	   (aa_k2[type] * 
+	    (dthABC*dthetadr[2][i][j] + dthABD*dthetadr[0][i][j])) +
+	   (aa_k3[type] *
+	    (dthABD*dthetadr[1][i][j] + dthCBD*dthetadr[2][i][j])));
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += fabcd[0][0];
+      f[i1][1] += fabcd[0][1];
+      f[i1][2] += fabcd[0][2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += fabcd[1][0];
+      f[i2][1] += fabcd[1][1];
+      f[i2][2] += fabcd[1][2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += fabcd[2][0];
+      f[i3][1] += fabcd[2][1];
+      f[i3][2] += fabcd[2][2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += fabcd[3][0];
+      f[i4][1] += fabcd[3][1];
+      f[i4][2] += fabcd[3][2];
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,
+		   fabcd[0],fabcd[2],fabcd[3],delxAB,delyAB,delzAB,
+		   delxBC,delyBC,delzBC,delxBD,delyBD,delzBD,thr);
+  }
+}
--- a/src/USER-OMP/improper_class2_omp.h
+++ b/src/USER-OMP/improper_class2_omp.h
@ -0,0 +1,52 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef IMPROPER_CLASS
+
+ImproperStyle(class2/omp,ImproperClass2OMP)
+
+#else
+
+#ifndef LMP_IMPROPER_CLASS2_OMP_H
+#define LMP_IMPROPER_CLASS2_OMP_H
+
+#include "improper_class2.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class ImproperClass2OMP : public ImproperClass2, public ThrOMP {
+
+ public:
+    ImproperClass2OMP(class LAMMPS *lmp) : 
+      ImproperClass2(lmp), ThrOMP(lmp,THR_IMPROPER) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void angleangle_thr(int, int, ThrData * const thr);
+  
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/improper_cvff_omp.cpp
+++ b/src/USER-OMP/improper_cvff_omp.cpp
@ -0,0 +1,295 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "improper_cvff_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "force.h"
+#include "update.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define TOLERANCE 0.05
+#define SMALL     0.001
+
+/* ---------------------------------------------------------------------- */
+
+void ImproperCvffOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nimproperlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void ImproperCvffOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,i4,m,n,type;
+  double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm;
+  double eimproper,f1[3],f2[3],f3[3],f4[3];
+  double sb1,sb2,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2;
+  double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2;
+  double c2mag,sc1,sc2,s1,s2,s12,c,p,pd,rc2,a,a11,a22;
+  double a33,a12,a13,a23,sx2,sy2,sz2;
+
+  eimproper = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const improperlist = neighbor->improperlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = improperlist[n][0];
+    i2 = improperlist[n][1];
+    i3 = improperlist[n][2];
+    i4 = improperlist[n][3];
+    type = improperlist[n][4];
+
+    // 1st bond
+
+    vb1x = x[i1][0] - x[i2][0];
+    vb1y = x[i1][1] - x[i2][1];
+    vb1z = x[i1][2] - x[i2][2];
+    domain->minimum_image(vb1x,vb1y,vb1z);
+
+    // 2nd bond
+
+    vb2x = x[i3][0] - x[i2][0];
+    vb2y = x[i3][1] - x[i2][1];
+    vb2z = x[i3][2] - x[i2][2];
+    domain->minimum_image(vb2x,vb2y,vb2z);
+
+    vb2xm = -vb2x;
+    vb2ym = -vb2y;
+    vb2zm = -vb2z;
+    domain->minimum_image(vb2xm,vb2ym,vb2zm);
+
+    // 3rd bond
+
+    vb3x = x[i4][0] - x[i3][0];
+    vb3y = x[i4][1] - x[i3][1];
+    vb3z = x[i4][2] - x[i3][2];
+    domain->minimum_image(vb3x,vb3y,vb3z);
+
+    // c0 calculation
+        
+    sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z);
+    sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z);
+    sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z);
+        
+    rb1 = sqrt(sb1);
+    rb3 = sqrt(sb3);
+        
+    c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3;
+
+    // 1st and 2nd angle
+        
+    b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z;
+    b1mag = sqrt(b1mag2);
+    b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z;
+    b2mag = sqrt(b2mag2);
+    b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z;
+    b3mag = sqrt(b3mag2);
+
+    ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z;
+    r12c1 = 1.0 / (b1mag*b2mag);
+    c1mag = ctmp * r12c1;
+
+    ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z;
+    r12c2 = 1.0 / (b2mag*b3mag);
+    c2mag = ctmp * r12c2;
+
+    // cos and sin of 2 angles and final c
+
+    sc1 = sqrt(1.0 - c1mag*c1mag);
+    if (sc1 < SMALL) sc1 = SMALL;
+    sc1 = 1.0/sc1;
+
+    sc2 = sqrt(1.0 - c2mag*c2mag);
+    if (sc2 < SMALL) sc2 = SMALL;
+    sc2 = 1.0/sc2;
+
+    s1 = sc1 * sc1;
+    s2 = sc2 * sc2;
+    s12 = sc1 * sc2;
+    c = (c0 + c1mag*c2mag) * s12;
+
+    // error check
+
+    if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
+      int me = comm->me;
+
+      if (screen) {
+	char str[128];
+	sprintf(str,"Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
+		me,thr->get_tid(),update->ntimestep,
+		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	error->warning(FLERR,str,0);
+	fprintf(screen,"  1st atom: %d %g %g %g\n",
+		me,x[i1][0],x[i1][1],x[i1][2]);
+	fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		me,x[i2][0],x[i2][1],x[i2][2]);
+	fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		me,x[i3][0],x[i3][1],x[i3][2]);
+	fprintf(screen,"  4th atom: %d %g %g %g\n",
+		me,x[i4][0],x[i4][1],x[i4][2]);
+      }
+    }
+    
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    // force & energy
+    // p = 1 + cos(n*phi) for d = 1
+    // p = 1 - cos(n*phi) for d = -1
+    // pd = dp/dc / 2
+        
+    m = multiplicity[type];
+
+    if (m == 2) {
+      p = 2.0*c*c;
+      pd = 2.0*c;
+    } else if (m == 3) {
+      rc2 = c*c;
+      p = (4.0*rc2-3.0)*c + 1.0;
+      pd = 6.0*rc2 - 1.5;
+    } else if (m == 4) {
+      rc2 = c*c;
+      p = 8.0*(rc2-1)*rc2 + 2.0;
+      pd = (16.0*rc2-8.0)*c;
+    } else if (m == 6) {
+      rc2 = c*c;
+      p = ((32.0*rc2-48.0)*rc2 + 18.0)*rc2;
+      pd = (96.0*(rc2-1.0)*rc2 + 18.0)*c;
+    } else if (m == 1) {
+      p = c + 1.0;
+      pd = 0.5;
+    } else if (m == 5) {
+      rc2 = c*c;
+      p = ((16.0*rc2-20.0)*rc2 + 5.0)*c + 1.0;
+      pd = (40.0*rc2-30.0)*rc2 + 2.5;
+    } else if (m == 0) {
+      p = 2.0;
+      pd = 0.0;
+    }
+    
+    if (sign[type] == -1) {
+      p = 2.0 - p;
+      pd = -pd;
+    }
+
+    if (EFLAG) eimproper = k[type]*p;
+
+    a = 2.0 * k[type] * pd;
+    c = c * a;
+    s12 = s12 * a;
+    a11 = c*sb1*s1;
+    a22 = -sb2*(2.0*c0*s12 - c*(s1+s2));
+    a33 = c*sb3*s2;
+    a12 = -r12c1*(c1mag*c*s1 + c2mag*s12);
+    a13 = -rb1*rb3*s12;
+    a23 = r12c2*(c2mag*c*s2 + c1mag*s12);
+
+    sx2  = a12*vb1x + a22*vb2x + a23*vb3x;
+    sy2  = a12*vb1y + a22*vb2y + a23*vb3y;
+    sz2  = a12*vb1z + a22*vb2z + a23*vb3z;
+
+    f1[0] = a11*vb1x + a12*vb2x + a13*vb3x;
+    f1[1] = a11*vb1y + a12*vb2y + a13*vb3y;
+    f1[2] = a11*vb1z + a12*vb2z + a13*vb3z;
+
+    f2[0] = -sx2 - f1[0];
+    f2[1] = -sy2 - f1[1];
+    f2[2] = -sz2 - f1[2];
+
+    f4[0] = a13*vb1x + a23*vb2x + a33*vb3x;
+    f4[1] = a13*vb1y + a23*vb2y + a33*vb3y;
+    f4[2] = a13*vb1z + a23*vb2z + a33*vb3z;
+
+    f3[0] = sx2 - f4[0];
+    f3[1] = sy2 - f4[1];
+    f3[2] = sz2 - f4[2];
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += f2[0];
+      f[i2][1] += f2[1];
+      f[i2][2] += f2[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += f4[0];
+      f[i4][1] += f4[1];
+      f[i4][2] += f4[2];
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4,
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
+  }
+}
--- a/src/USER-OMP/improper_cvff_omp.h
+++ b/src/USER-OMP/improper_cvff_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef IMPROPER_CLASS
+
+ImproperStyle(cvff/omp,ImproperCvffOMP)
+
+#else
+
+#ifndef LMP_IMPROPER_CVFF_OMP_H
+#define LMP_IMPROPER_CVFF_OMP_H
+
+#include "improper_cvff.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class ImproperCvffOMP : public ImproperCvff, public ThrOMP {
+
+ public:
+    ImproperCvffOMP(class LAMMPS *lmp) : 
+      ImproperCvff(lmp), ThrOMP(lmp,THR_IMPROPER) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/improper_harmonic_omp.cpp
+++ b/src/USER-OMP/improper_harmonic_omp.cpp
@ -0,0 +1,236 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "improper_harmonic_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "force.h"
+#include "update.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define TOLERANCE 0.05
+#define SMALL     0.001
+
+/* ---------------------------------------------------------------------- */
+
+void ImproperHarmonicOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nimproperlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void ImproperHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,i4,n,type;
+  double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z;
+  double eimproper,f1[3],f2[3],f3[3],f4[3];
+  double ss1,ss2,ss3,r1,r2,r3,c0,c1,c2,s1,s2;
+  double s12,c,s,domega,a,a11,a22,a33,a12,a13,a23;
+  double sx2,sy2,sz2;
+
+  eimproper = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const improperlist = neighbor->improperlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = improperlist[n][0];
+    i2 = improperlist[n][1];
+    i3 = improperlist[n][2];
+    i4 = improperlist[n][3];
+    type = improperlist[n][4];
+
+    // geometry of 4-body
+
+    vb1x = x[i1][0] - x[i2][0];
+    vb1y = x[i1][1] - x[i2][1];
+    vb1z = x[i1][2] - x[i2][2];
+    domain->minimum_image(vb1x,vb1y,vb1z);
+
+    vb2x = x[i3][0] - x[i2][0];
+    vb2y = x[i3][1] - x[i2][1];
+    vb2z = x[i3][2] - x[i2][2];
+    domain->minimum_image(vb2x,vb2y,vb2z);
+
+    vb3x = x[i4][0] - x[i3][0];
+    vb3y = x[i4][1] - x[i3][1];
+    vb3z = x[i4][2] - x[i3][2];
+    domain->minimum_image(vb3x,vb3y,vb3z);
+
+    ss1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z);
+    ss2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z);
+    ss3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z);
+        
+    r1 = sqrt(ss1);
+    r2 = sqrt(ss2);
+    r3 = sqrt(ss3);
+        
+    // sin and cos of angle
+        
+    c0 = (vb1x * vb3x + vb1y * vb3y + vb1z * vb3z) * r1 * r3;
+    c1 = (vb1x * vb2x + vb1y * vb2y + vb1z * vb2z) * r1 * r2;
+    c2 = -(vb3x * vb2x + vb3y * vb2y + vb3z * vb2z) * r3 * r2;
+
+    s1 = 1.0 - c1*c1;
+    if (s1 < SMALL) s1 = SMALL;
+    s1 = 1.0 / s1;
+
+    s2 = 1.0 - c2*c2;
+    if (s2 < SMALL) s2 = SMALL;
+    s2 = 1.0 / s2;
+
+    s12 = sqrt(s1*s2);
+    c = (c1*c2 + c0) * s12;
+
+    // error check
+
+    if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
+      int me = comm->me;
+
+      if (screen) {
+	char str[128];
+	sprintf(str,"Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
+		me,thr->get_tid(),update->ntimestep,
+		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	error->warning(FLERR,str,0);
+	fprintf(screen,"  1st atom: %d %g %g %g\n",
+		me,x[i1][0],x[i1][1],x[i1][2]);
+	fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		me,x[i2][0],x[i2][1],x[i2][2]);
+	fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		me,x[i3][0],x[i3][1],x[i3][2]);
+	fprintf(screen,"  4th atom: %d %g %g %g\n",
+		me,x[i4][0],x[i4][1],x[i4][2]);
+      }
+    }
+    
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+
+    // force & energy
+
+    domega = acos(c) - chi[type];
+    a = k[type] * domega;
+
+    if (EFLAG) eimproper = a*domega;
+
+    a = -a * 2.0/s;
+    c = c * a;
+    s12 = s12 * a;
+    a11 = c*ss1*s1;
+    a22 = -ss2 * (2.0*c0*s12 - c*(s1+s2));
+    a33 = c*ss3*s2;
+    a12 = -r1*r2*(c1*c*s1 + c2*s12);
+    a13 = -r1*r3*s12;
+    a23 = r2*r3*(c2*c*s2 + c1*s12);
+
+    sx2  = a22*vb2x + a23*vb3x + a12*vb1x;
+    sy2  = a22*vb2y + a23*vb3y + a12*vb1y;
+    sz2  = a22*vb2z + a23*vb3z + a12*vb1z;
+
+    f1[0] = a12*vb2x + a13*vb3x + a11*vb1x;
+    f1[1] = a12*vb2y + a13*vb3y + a11*vb1y;
+    f1[2] = a12*vb2z + a13*vb3z + a11*vb1z;
+
+    f2[0] = -sx2 - f1[0];
+    f2[1] = -sy2 - f1[1];
+    f2[2] = -sz2 - f1[2];
+
+    f4[0] = a23*vb2x + a33*vb3x + a13*vb1x;
+    f4[1] = a23*vb2y + a33*vb3y + a13*vb1y;
+    f4[2] = a23*vb2z + a33*vb3z + a13*vb1z;
+
+    f3[0] = sx2 - f4[0];
+    f3[1] = sy2 - f4[1];
+    f3[2] = sz2 - f4[2];
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += f2[0];
+      f[i2][1] += f2[1];
+      f[i2][2] += f2[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += f4[0];
+      f[i4][1] += f4[1];
+      f[i4][2] += f4[2];
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4,
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
+  }
+}
--- a/src/USER-OMP/improper_harmonic_omp.h
+++ b/src/USER-OMP/improper_harmonic_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef IMPROPER_CLASS
+
+ImproperStyle(harmonic/omp,ImproperHarmonicOMP)
+
+#else
+
+#ifndef LMP_IMPROPER_HARMONIC_OMP_H
+#define LMP_IMPROPER_HARMONIC_OMP_H
+
+#include "improper_harmonic.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class ImproperHarmonicOMP : public ImproperHarmonic, public ThrOMP {
+
+ public:
+    ImproperHarmonicOMP(class LAMMPS *lmp) : 
+      ImproperHarmonic(lmp), ThrOMP(lmp,THR_IMPROPER) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/improper_umbrella_omp.cpp
+++ b/src/USER-OMP/improper_umbrella_omp.cpp
@ -0,0 +1,252 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "improper_umbrella_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "force.h"
+#include "update.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define TOLERANCE 0.05
+#define SMALL     0.001
+
+/* ---------------------------------------------------------------------- */
+
+void ImproperUmbrellaOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->nimproperlist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void ImproperUmbrellaOMP::eval(int nfrom, int nto, ThrData * const thr)
+{
+  int i1,i2,i3,i4,n,type;
+  double eimproper,f1[3],f2[3],f3[3],f4[3];
+  double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z;
+  double domega,c,a,s,projhfg,dhax,dhay,dhaz,dahx,dahy,dahz,cotphi;
+  double ax,ay,az,ra2,rh2,ra,rh,rar,rhr,arx,ary,arz,hrx,hry,hrz;
+
+  eimproper = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const * const improperlist = neighbor->improperlist;
+  const int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = improperlist[n][0];
+    i2 = improperlist[n][1];
+    i3 = improperlist[n][2];
+    i4 = improperlist[n][3];
+    type = improperlist[n][4];
+
+    // 1st bond
+
+    vb1x = x[i2][0] - x[i1][0];
+    vb1y = x[i2][1] - x[i1][1];
+    vb1z = x[i2][2] - x[i1][2];
+    domain->minimum_image(vb1x,vb1y,vb1z);
+
+    // 2nd bond
+
+    vb2x = x[i3][0] - x[i1][0];
+    vb2y = x[i3][1] - x[i1][1];
+    vb2z = x[i3][2] - x[i1][2];
+    domain->minimum_image(vb2x,vb2y,vb2z);
+
+    // 3rd bond
+
+    vb3x = x[i4][0] - x[i1][0];
+    vb3y = x[i4][1] - x[i1][1];
+    vb3z = x[i4][2] - x[i1][2];
+    domain->minimum_image(vb3x,vb3y,vb3z);
+
+    // c0 calculation
+    // A = vb1 X vb2 is perpendicular to IJK plane
+
+    ax = vb1y*vb2z-vb1z*vb2y;
+    ay = vb1z*vb2x-vb1x*vb2z;
+    az = vb1x*vb2y-vb1y*vb2x;
+    ra2 = ax*ax+ay*ay+az*az;
+    rh2 = vb3x*vb3x+vb3y*vb3y+vb3z*vb3z;
+    ra = sqrt(ra2);
+    rh = sqrt(rh2);
+    if (ra < SMALL) ra = SMALL;
+    if (rh < SMALL) rh = SMALL;
+
+    rar = 1/ra;
+    rhr = 1/rh;
+    arx = ax*rar;
+    ary = ay*rar;
+    arz = az*rar;
+    hrx = vb3x*rhr;
+    hry = vb3y*rhr;
+    hrz = vb3z*rhr;
+
+    c = arx*hrx+ary*hry+arz*hrz;
+
+    // error check
+
+    if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
+      int me = comm->me;
+
+      if (screen) {
+	char str[128];
+	sprintf(str,"Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
+		me,thr->get_tid(),update->ntimestep,
+		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	error->warning(FLERR,str,0);
+	fprintf(screen,"  1st atom: %d %g %g %g\n",
+		me,x[i1][0],x[i1][1],x[i1][2]);
+	fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		me,x[i2][0],x[i2][1],x[i2][2]);
+	fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		me,x[i3][0],x[i3][1],x[i3][2]);
+	fprintf(screen,"  4th atom: %d %g %g %g\n",
+		me,x[i4][0],x[i4][1],x[i4][2]);
+      }
+    }
+    
+    if (c > 1.0) s = 1.0;
+    if (c < -1.0) s = -1.0;
+
+    s = sqrt(1.0 - c*c);
+    if (s < SMALL) s = SMALL;
+    cotphi = c/s;
+
+    projhfg = (vb3x*vb1x+vb3y*vb1y+vb3z*vb1z) / 
+      sqrt(vb1x*vb1x+vb1y*vb1y+vb1z*vb1z); 
+    projhfg += (vb3x*vb2x+vb3y*vb2y+vb3z*vb2z) / 
+      sqrt(vb2x*vb2x+vb2y*vb2y+vb2z*vb2z);
+    if (projhfg > 0.0) {
+      s *= -1.0;
+      cotphi *= -1.0;
+    }
+	
+    //  force and energy
+    // if w0 = 0: E = k * (1 - cos w)
+    // if w0 != 0: E = 0.5 * C (cos w - cos w0)^2, C = k/(sin(w0)^2
+
+    if (w0[type] == 0.0) {
+      if (EFLAG) eimproper = kw[type] * (1.0-s);
+      a = -kw[type];
+    } else {
+      domega = s - cos(w0[type]);
+      a = 0.5 * C[type] * domega;
+      if (EFLAG) eimproper = a * domega;
+      a *= 2.0;
+    }
+
+    // dhax = diffrence between H and A in X direction, etc
+
+    a = a*cotphi;
+    dhax = hrx-c*arx;
+    dhay = hry-c*ary;
+    dhaz = hrz-c*arz;
+
+    dahx = arx-c*hrx;
+    dahy = ary-c*hry;
+    dahz = arz-c*hrz;
+
+    f2[0] = (dhay*vb1z - dhaz*vb1y)*rar;
+    f2[1] = (dhaz*vb1x - dhax*vb1z)*rar;
+    f2[2] = (dhax*vb1y - dhay*vb1x)*rar;
+
+    f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar;
+    f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar;
+    f3[2] = (-dhax*vb2y + dhay*vb2x)*rar;
+
+    f4[0] = dahx*rhr;
+    f4[1] = dahy*rhr;
+    f4[2] = dahz*rhr;
+
+    f1[0] = -(f2[0] + f3[0] + f4[0]);
+    f1[1] = -(f2[1] + f3[1] + f4[1]);
+    f1[2] = -(f2[2] + f3[2] + f4[2]);
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0]*a;
+      f[i1][1] += f1[1]*a;
+      f[i1][2] += f1[2]*a;
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += f3[0]*a;
+      f[i2][1] += f3[1]*a;
+      f[i2][2] += f3[2]*a;
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f2[0]*a;
+      f[i3][1] += f2[1]*a;
+      f[i3][2] += f2[2]*a;
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += f4[0]*a;
+      f[i4][1] += f4[1]*a;
+      f[i4][2] += f4[2]*a;
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4,
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr);
+  }
+}
--- a/src/USER-OMP/improper_umbrella_omp.h
+++ b/src/USER-OMP/improper_umbrella_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef IMPROPER_CLASS
+
+ImproperStyle(umbrella/omp,ImproperUmbrellaOMP)
+
+#else
+
+#ifndef LMP_IMPROPER_UMBRELLA_OMP_H
+#define LMP_IMPROPER_UMBRELLA_OMP_H
+
+#include "improper_umbrella.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class ImproperUmbrellaOMP : public ImproperUmbrella, public ThrOMP {
+
+ public:
+    ImproperUmbrellaOMP(class LAMMPS *lmp) : 
+      ImproperUmbrella(lmp), ThrOMP(lmp,THR_IMPROPER) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_airebo_omp.cpp
+++ b/src/USER-OMP/pair_airebo_omp.cpp
--- a/src/USER-OMP/pair_airebo_omp.h
+++ b/src/USER-OMP/pair_airebo_omp.h
@ -0,0 +1,53 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(airebo/omp,PairAIREBOOMP)
+
+#else
+
+#ifndef LMP_PAIR_AIREBO_OMP_H
+#define LMP_PAIR_AIREBO_OMP_H
+
+#include "pair_airebo.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairAIREBOOMP : public PairAIREBO, public ThrOMP {
+
+ public:
+  PairAIREBOOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ protected:
+  double bondorder_thr(int i, int j, double rij[3], double rijmag,
+		       double VA, int vflag_atom, ThrData * const thr);
+  double bondorderLJ_thr(int i, int j, double rij[3], double rijmag,
+			 double VA, double rij0[3], double rijmag0,
+			 int vflag_atom, ThrData * const thr);
+
+  void FREBO_thr(int ifrom, int ito, int evflag, int eflag,
+		 int vflag_atom, ThrData * const thr);
+  void FLJ_thr(int ifrom, int ito, int evflag, int eflag,
+	       int vflag_atom, ThrData * const thr);
+  void TORSION_thr(int ifrom, int ito, int evflag, int eflag, ThrData * const thr);
+  void REBO_neigh_thr();
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_comb_omp.cpp
+++ b/src/USER-OMP/pair_comb_omp.cpp
@ -0,0 +1,647 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_comb_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "group.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+#if defined(_OPENMP)
+#include <omp.h>
+#endif
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairCombOMP::PairCombOMP(LAMMPS *lmp) :
+  PairComb(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairCombOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = vflag_atom = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+  // grow coordination array if necessary
+
+  if (atom->nmax > nmax) {
+    memory->destroy(NCo);
+    memory->destroy(bbij);
+    nmax = atom->nmax;
+    memory->create(NCo,nmax,"pair:NCo");
+    memory->create(bbij,nmax,nmax,"pair:bbij");
+  }
+
+  // Build short range neighbor list
+  Short_neigh_thr();
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (vflag_atom) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (vflag_atom) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else eval<0,0,0>(ifrom, ito, thr);
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
+void PairCombOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,k,ii,jj,kk,jnum,iparam_i;
+  int itag,jtag,itype,jtype,ktype,iparam_ij,iparam_ijk;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double rsq,rsq1,rsq2;
+  double delr1[3],delr2[3],fi[3],fj[3],fk[3];
+  double zeta_ij,prefactor;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  int mr1,mr2,mr3;
+  int rsc,inty;
+  double elp_ij,filp[3],fjlp[3],fklp[3];
+  double iq,jq; 
+  double yaself;
+  double potal,fac11,fac11e;
+  double vionij,fvionij,sr1,sr2,sr3,Eov,Fov;
+  int sht_jnum, *sht_jlist;
+
+  evdwl = ecoul = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const tag = atom->tag;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  yaself = vionij = fvionij = Eov = Fov = 0.0; 
+
+  double fxtmp,fytmp,fztmp;
+  double fjxtmp,fjytmp,fjztmp;
+
+  // self energy correction term: potal
+
+  potal_calc(potal,fac11,fac11e);
+
+  // loop over full neighbor list of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    itag = tag[i];
+    itype = map[type[i]];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    iq = q[i];
+    NCo[i] = 0;  
+    iparam_i = elem2param[itype][itype][itype];
+
+    // self energy, only on i atom
+
+    yaself = self(&params[iparam_i],iq,potal);
+
+    if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,yaself,
+			     0.0,0.0,0.0,0.0,0.0,thr);
+
+    // two-body interactions (long and short repulsive)
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    sht_jlist = sht_first[i];
+    sht_jnum = sht_num[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtag = tag[j];
+
+      if (itag > jtag) {
+	if ((itag+jtag) % 2 == 0) continue;
+      } else if (itag < jtag) {
+	if ((itag+jtag) % 2 == 1) continue;
+      } else {
+	if (x[j][2] < ztmp) continue;
+	if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
+	if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
+      }
+
+      // Qj calculates 2-body Coulombic 
+
+      jtype = map[type[j]];
+      jq = q[j];
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      iparam_ij = elem2param[itype][jtype][jtype];
+
+      // long range q-dependent
+
+      if (rsq > params[iparam_ij].lcutsq) continue;
+
+      inty = intype[itype][jtype];
+
+      // polynomial three-point interpolation
+
+      tri_point(rsq, mr1, mr2, mr3, sr1, sr2, sr3, itype);
+
+      // 1/r energy and forces
+
+      direct(inty,mr1,mr2,mr3,rsq,sr1,sr2,sr3,iq,jq,
+	     potal,fac11,fac11e,vionij,fvionij);
+
+      // field correction to self energy
+
+      field(&params[iparam_ij],rsq,iq,jq,vionij,fvionij);
+
+      // polarization field
+      // sums up long range forces
+
+      fxtmp += delx*fvionij;
+      fytmp += dely*fvionij;
+      fztmp += delz*fvionij;
+      f[j][0] -= delx*fvionij;
+      f[j][1] -= dely*fvionij;
+      f[j][2] -= delz*fvionij;
+
+      if (EVFLAG) 
+	ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
+		     0.0,vionij,fvionij,delx,dely,delz,thr);
+
+      // short range q-independent
+
+      if (rsq > params[iparam_ij].cutsq) continue;
+
+      repulsive(&params[iparam_ij],rsq,fpair,EFLAG,evdwl,iq,jq);
+
+      // repulsion is pure two-body, sums up pair repulsive forces
+
+      fxtmp += delx*fpair;
+      fytmp += dely*fpair;
+      fztmp += delz*fpair;
+      f[j][0] -= delx*fpair;
+      f[j][1] -= dely*fpair;
+      f[j][2] -= delz*fpair;
+
+      if (EVFLAG)
+	ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
+		     evdwl,0.0,fpair,delx,dely,delz,thr);
+    }
+
+    // accumulate coordination number information
+
+    if (cor_flag) {
+      int numcoor = 0;
+      for (jj = 0; jj < sht_jnum; jj++) {
+        j = sht_jlist[jj];
+	j &= NEIGHMASK;
+	jtype = map[type[j]];
+	iparam_ij = elem2param[itype][jtype][jtype];
+	
+	if(params[iparam_ij].hfocor > 0.0 ) {
+	  delr1[0] = x[j][0] - xtmp;
+	  delr1[1] = x[j][1] - ytmp;
+	  delr1[2] = x[j][2] - ztmp;
+	  rsq1 = vec3_dot(delr1,delr1);
+	  
+	  if (rsq1 > params[iparam_ij].cutsq) continue;
+	  ++numcoor;
+	}
+      }
+      NCo[i] = numcoor; 
+    }
+
+    // three-body interactions 
+    // half i-j loop
+
+    for (jj = 0; jj < sht_jnum; jj++) {
+      j = sht_jlist[jj];
+      j &= NEIGHMASK;
+
+      jtype = map[type[j]];
+      iparam_ij = elem2param[itype][jtype][jtype];
+
+      // this Qj for q-dependent BSi
+
+      jq = q[j];
+
+      delr1[0] = x[j][0] - xtmp;
+      delr1[1] = x[j][1] - ytmp;
+      delr1[2] = x[j][2] - ztmp;
+      rsq1 = vec3_dot(delr1,delr1);
+
+      if (rsq1 > params[iparam_ij].cutsq) continue;
+
+      // accumulate bondorder zeta for each i-j interaction via loop over k
+
+      fjxtmp = fjytmp = fjztmp = 0.0;
+      zeta_ij = 0.0;
+      cuo_flag1 = 0; cuo_flag2 = 0;
+
+      for (kk = 0; kk < sht_jnum; kk++) {
+	k = sht_jlist[kk];
+	if (j == k) continue;
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	iparam_ijk = elem2param[itype][jtype][ktype];
+
+	delr2[0] = x[k][0] - xtmp;
+	delr2[1] = x[k][1] - ytmp;
+	delr2[2] = x[k][2] - ztmp;
+	rsq2 = vec3_dot(delr2,delr2);
+
+	if (rsq2 > params[iparam_ijk].cutsq) continue;
+
+	zeta_ij += zeta(&params[iparam_ijk],rsq1,rsq2,delr1,delr2);
+
+	if (params[iparam_ijk].hfocor == -2.0) cuo_flag1 = 1;
+	if (params[iparam_ijk].hfocor == -1.0) cuo_flag2 = 1;
+      }
+
+      if (cuo_flag1 && cuo_flag2) cuo_flag = 1;
+      else cuo_flag = 0;
+
+      force_zeta(&params[iparam_ij],EFLAG,i,j,rsq1,zeta_ij,
+		 iq,jq,fpair,prefactor,evdwl);
+
+      // over-coordination correction for HfO2
+
+      if (cor_flag && NCo[i] != 0)
+	Over_cor(&params[iparam_ij],rsq1,NCo[i],Eov, Fov);
+      evdwl +=  Eov;
+      fpair +=  Fov;
+
+      fxtmp += delr1[0]*fpair;
+      fytmp += delr1[1]*fpair;
+      fztmp += delr1[2]*fpair;
+      fjxtmp -= delr1[0]*fpair;
+      fjytmp -= delr1[1]*fpair;
+      fjztmp -= delr1[2]*fpair;
+
+      if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,evdwl,0.0,
+			       -fpair,-delr1[0],-delr1[1],-delr1[2],thr);
+
+      // attractive term via loop over k (3-body forces)
+
+      for (kk = 0; kk < sht_jnum; kk++) {
+	k = sht_jlist[kk];
+	if (j == k) continue;
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	iparam_ijk = elem2param[itype][jtype][ktype];
+
+	delr2[0] = x[k][0] - xtmp;
+	delr2[1] = x[k][1] - ytmp;
+	delr2[2] = x[k][2] - ztmp;
+	rsq2 = vec3_dot(delr2,delr2);
+	if (rsq2 > params[iparam_ijk].cutsq) continue;
+
+	for (rsc = 0; rsc < 3; rsc++)
+	  fi[rsc] = fj[rsc] = fk[rsc] = 0.0;
+
+	attractive(&params[iparam_ijk],prefactor,
+		   rsq1,rsq2,delr1,delr2,fi,fj,fk);
+
+	// 3-body LP and BB correction and forces
+
+	elp_ij = elp(&params[iparam_ijk],rsq1,rsq2,delr1,delr2);
+	flp(&params[iparam_ijk],rsq1,rsq2,delr1,delr2,filp,fjlp,fklp); 
+
+	fxtmp += fi[0] + filp[0];
+	fytmp += fi[1] + filp[1];
+	fztmp += fi[2] + filp[2];
+	fjxtmp += fj[0] + fjlp[0];
+	fjytmp += fj[1] + fjlp[1];
+	fjztmp += fj[2] + fjlp[2];
+	f[k][0] += fk[0] + fklp[0];
+	f[k][1] += fk[1] + fklp[1];
+	f[k][2] += fk[2] + fklp[2];
+
+        if (EVFLAG) 
+	  ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
+		       elp_ij,0.0,0.0,0.0,0.0,0.0, thr);
+	if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,thr);
+      }
+      f[j][0] += fjxtmp;
+      f[j][1] += fjytmp;
+      f[j][2] += fjztmp;
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+
+    if (cuo_flag) params[iparam_i].cutsq *= 0.65;
+  }
+  cuo_flag = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairCombOMP::yasu_char(double *qf_fix, int &igroup)
+{
+  int ii;
+  double potal,fac11,fac11e;
+
+  const double * const * const x = atom->x;
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int * const tag = atom->tag;
+
+  const int inum = list->inum;
+  const int * const ilist = list->ilist;
+  const int * const numneigh = list->numneigh;
+  const int * const * const firstneigh = list->firstneigh;
+
+  const int * const mask = atom->mask;
+  const int groupbit = group->bitmask[igroup];
+
+  qf = qf_fix;
+  for (ii = 0; ii < inum; ii++) {
+    const int i = ilist[ii];
+    if (mask[i] & groupbit)
+      qf[i] = 0.0;
+  }
+
+  // communicating charge force to all nodes, first forward then reverse
+
+  comm->forward_comm_pair(this);
+
+  // self energy correction term: potal
+
+  potal_calc(potal,fac11,fac11e);
+
+  // loop over full neighbor list of my atoms
+#if defined(_OPENMP)
+#pragma omp parallel for private(ii) default(none) shared(potal,fac11e)
+#endif
+  for (ii = 0; ii < inum; ii ++) {
+    double fqi,fqj,fqij,fqji,fqjj,delr1[3];
+    double sr1,sr2,sr3;
+    int mr1,mr2,mr3;
+
+    const int i = ilist[ii];
+    const int itag = tag[i];
+
+    if (mask[i] & groupbit) {
+      fqi = fqj = fqij = fqji = fqjj = 0.0; // should not be needed.
+      int itype = map[type[i]];
+      const double xtmp = x[i][0];
+      const double ytmp = x[i][1];
+      const double ztmp = x[i][2];
+      const double iq = q[i];
+      const int iparam_i = elem2param[itype][itype][itype];
+
+      // charge force from self energy
+
+      fqi = qfo_self(&params[iparam_i],iq,potal);
+
+      // two-body interactions
+
+      const int * const jlist = firstneigh[i];
+      const int jnum = numneigh[i];
+
+      for (int jj = 0; jj < jnum; jj++) {
+        const int j = jlist[jj] & NEIGHMASK;
+	const int jtag = tag[j];	
+
+        if (itag > jtag) {
+          if ((itag+jtag) % 2 == 0) continue;
+        } else if (itag < jtag) {
+          if ((itag+jtag) % 2 == 1) continue;
+        } else {
+          if (x[j][2] < ytmp) continue;
+          if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
+          if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
+        } 
+
+        const int jtype = map[type[j]];
+        double jq = q[j];
+
+        delr1[0] = x[j][0] - xtmp;
+        delr1[1] = x[j][1] - ytmp;
+        delr1[2] = x[j][2] - ztmp;
+        double rsq1 = vec3_dot(delr1,delr1);
+
+        const int iparam_ij = elem2param[itype][jtype][jtype];
+
+        // long range q-dependent
+
+        if (rsq1 > params[iparam_ij].lcutsq) continue;
+
+        const int inty = intype[itype][jtype];
+
+        // polynomial three-point interpolation
+
+        tri_point(rsq1,mr1,mr2,mr3,sr1,sr2,sr3,itype);
+
+        // 1/r charge forces
+
+        qfo_direct(inty,mr1,mr2,mr3,rsq1,sr1,sr2,sr3,fac11e,fqij);
+
+        // field correction to self energy and charge force
+
+        qfo_field(&params[iparam_ij],rsq1,iq,jq,fqji,fqjj);
+        fqi   += jq * fqij + fqji;
+#if defined(_OPENMP)
+#pragma omp atomic
+#endif
+        qf[j] += (iq * fqij + fqjj);
+      }
+      
+        // three-body interactions
+	
+      for (int jj = 0; jj < jnum; jj++) {
+	const int j = jlist[jj] & NEIGHMASK;
+	const int jtype = map[type[j]];
+	const double jq = q[j];
+
+	delr1[0] = x[j][0] - xtmp;
+	delr1[1] = x[j][1] - ytmp;
+	delr1[2] = x[j][2] - ztmp;
+	double rsq1 = vec3_dot(delr1,delr1);
+
+        const int iparam_ij = elem2param[itype][jtype][jtype];
+
+        if (rsq1 > params[iparam_ij].cutsq) continue;
+
+        // charge force in Aij and Bij
+
+        qfo_short(&params[iparam_ij],i,j,rsq1,iq,jq,fqij,fqjj);
+        fqi += fqij;  
+#if defined(_OPENMP)
+#pragma omp atomic
+#endif
+	qf[j] += fqjj;
+      }
+
+#if defined(_OPENMP) && 0
+#pragma omp atomic
+#endif
+      qf[i] += fqi;
+    }
+  }
+
+  comm->reverse_comm_pair(this);
+
+  // sum charge force on each node and return it
+
+  double eneg = 0.0;
+  for (ii = 0; ii < inum; ii++) {
+    const int i = ilist[ii];
+    if (mask[i] & groupbit)
+      eneg += qf[i];
+  }
+  double enegtot;
+  MPI_Allreduce(&eneg,&enegtot,1,MPI_DOUBLE,MPI_SUM,world);
+  return enegtot;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairCombOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairComb::memory_usage();
+
+  return bytes;
+}
+/* ---------------------------------------------------------------------- */
+
+void PairCombOMP::Short_neigh_thr()
+{
+
+  if (atom->nmax > nmax) {
+    nmax = int(1.0 * atom->nmax);
+    memory->sfree(sht_num);
+    memory->sfree(sht_first);
+    memory->create(sht_num,nmax,"pair:sht_num");
+    sht_first = (int **) memory->smalloc(nmax*sizeof(int *),
+	    "pair:sht_first");
+  }
+
+  const int nthreads = comm->nthreads;
+  if (nthreads > maxpage)
+    add_pages(nthreads - maxpage);
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+    int nj,npntj,*neighptrj,itype,jtype;
+    int iparam_ij,*ilist,*jlist,*numneigh,**firstneigh;
+    int jnum,i,j,ii,jj;
+    double xtmp,ytmp,ztmp,rsq,delrj[3];
+    double **x = atom->x;
+    int *type  = atom->type;
+
+    const int inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+
+
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    const int iidelta = 1 + inum/nthreads;
+    const int iifrom = tid*iidelta;
+    int iito   = iifrom + iidelta;
+    if (iito > inum) iito = inum;
+
+    int npage = tid;
+    npntj = 0;
+
+    for (ii = iifrom; ii < iito; ii++) {
+      i = ilist[ii];
+      itype = type[i];
+
+#if defined(_OPENMP)
+#pragma omp critical
+#endif
+      if(pgsize - npntj < oneatom) {
+	npntj = 0;
+	npage++;
+	if (npage == maxpage) add_pages(nthreads);
+      }
+ 
+      neighptrj = &pages[npage][npntj];
+      nj = 0;
+
+      xtmp = x[i][0];
+      ytmp = x[i][1];
+      ztmp = x[i][2];
+
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+
+      for (jj = 0; jj < jnum; jj++) {
+	j = jlist[jj];
+	j &= NEIGHMASK;
+	jtype = type[j];
+	iparam_ij = elem2param[itype][jtype][jtype];
+
+	delrj[0] = xtmp - x[j][0];
+	delrj[1] = ytmp - x[j][1];
+	delrj[2] = ztmp - x[j][2];
+	rsq = vec3_dot(delrj,delrj);
+      
+	if (rsq > cutmin) continue;
+	neighptrj[nj++] = j;
+      }
+      sht_first[i] = neighptrj;
+      sht_num[i] = nj;
+      npntj += nj;
+    }
+  }
+}
--- a/src/USER-OMP/pair_comb_omp.h
+++ b/src/USER-OMP/pair_comb_omp.h
@ -0,0 +1,47 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(comb/omp,PairCombOMP)
+
+#else
+
+#ifndef LMP_PAIR_COMB_OMP_H
+#define LMP_PAIR_COMB_OMP_H
+
+#include "pair_comb.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairCombOMP : public PairComb, public ThrOMP {
+
+ public:
+  PairCombOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+  virtual double yasu_char(double *, int &);
+
+ private:
+  template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
+  void eval(int ifrom, int ito, ThrData * const thr);
+
+  void Short_neigh_thr();
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_coul_diel_omp.cpp
+++ b/src/USER-OMP/pair_coul_diel_omp.cpp
@ -0,0 +1,165 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_coul_diel_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairCoulDielOMP::PairCoulDielOMP(LAMMPS *lmp) :
+  PairCoulDiel(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairCoulDielOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairCoulDielOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
+  double rsq,r,rarg,th,depsdr,epsr,forcecoul,factor_coul;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  ecoul = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double qqrd2e = force->qqrd2e;
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r = sqrt(rsq);
+	rarg = (r-rme[itype][jtype])/sigmae[itype][jtype];
+	th=tanh(rarg);
+	epsr=a_eps+b_eps*th;
+	depsdr=b_eps * (1.0 - th*th) / sigmae[itype][jtype];
+	
+	forcecoul = qqrd2e*qtmp*q[j]*((eps_s*(epsr+r*depsdr)/epsr/epsr) -1.)/rsq;
+	fpair = factor_coul*forcecoul/r;
+	
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) {
+	  ecoul = (qqrd2e*qtmp*q[j]*((eps_s/epsr) -1.)/r) - offset[itype][jtype];
+	  ecoul *= factor_coul;
+	}
+
+	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
+				 0.0,ecoul,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairCoulDielOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairCoulDiel::memory_usage();
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_coul_diel_omp.h
+++ b/src/USER-OMP/pair_coul_diel_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(coul/diel/omp,PairCoulDielOMP)
+
+#else
+
+#ifndef LMP_PAIR_COUL_DIEL_OMP_H
+#define LMP_PAIR_COUL_DIEL_OMP_H
+
+#include "pair_coul_diel.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairCoulDielOMP : public PairCoulDiel, public ThrOMP {
+
+ public:
+  PairCoulDielOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_gauss_cut_omp.cpp
+++ b/src/USER-OMP/pair_gauss_cut_omp.cpp
@ -0,0 +1,156 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_gauss_cut_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairGaussCutOMP::PairGaussCutOMP(LAMMPS *lmp) :
+  PairGaussCut(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGaussCutOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairGaussCutOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,r,rexp,ugauss,factor_lj;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r = sqrt(rsq);
+	rexp = (r-rmh[itype][jtype])/sigmah[itype][jtype];
+	ugauss = pgauss[itype][jtype]*exp(-0.5*rexp*rexp); 
+	fpair = factor_lj*rexp/r*ugauss/sigmah[itype][jtype];
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) {
+	  evdwl = ugauss - offset[itype][jtype];
+	  evdwl *= factor_lj;
+	}
+
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairGaussCutOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairGaussCut::memory_usage();
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_gauss_cut_omp.h
+++ b/src/USER-OMP/pair_gauss_cut_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(gauss/cut/omp,PairGaussCutOMP)
+
+#else
+
+#ifndef LMP_PAIR_GAUSS_CUT_OMP_H
+#define LMP_PAIR_GAUSS_CUT_OMP_H
+
+#include "pair_gauss_cut.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairGaussCutOMP : public PairGaussCut, public ThrOMP {
+
+ public:
+  PairGaussCutOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_line_lj_omp.cpp
+++ b/src/USER-OMP/pair_line_lj_omp.cpp
@ -0,0 +1,339 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_line_lj_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+#include <string.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairLineLJOMP::PairLineLJOMP(LAMMPS *lmp) :
+  PairLineLJ(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLineLJOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+  const int * const line = atom->line;
+  const int * const type = atom->type;
+
+  // grow discrete list if necessary and initialize
+
+  if (nall > nmax) {
+    nmax = nall;
+    memory->destroy(dnum);
+    memory->destroy(dfirst);
+    memory->create(dnum,nall,"pair:dnum");
+    memory->create(dfirst,nall,"pair:dfirst");
+  }
+  memset(dnum,0,nall*sizeof(int));
+  ndiscrete = 0;
+
+  // need to discretize the system ahead of time
+  // until we find a good way to multi-thread it.
+  for (int i = 0; i < nall; ++i)
+    if (line[i] >= 0)
+      if (dnum[i] == 0)
+	discretize(i,sigma[type[i]][type[i]]);
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLineLJOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  int ni,nj,npi,npj,ifirst,jfirst;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,r2inv,r6inv,term1,term2,sig,sig3,forcelj;
+  double xi[2],xj[2],fi[2],fj[2],dxi,dxj,dyi,dyj,ti,tj;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const * const torque = thr->get_torque();
+  const int * const line = atom->line;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+      
+      if (rsq >= cutsq[itype][jtype]) continue;
+      
+      // line/line interactions = NxN particles
+      
+      evdwl = 0.0;
+      if (line[i] >= 0 && line[j] >= 0) {
+	npi = dnum[i];
+	ifirst = dfirst[i];
+	npj = dnum[j];
+	jfirst = dfirst[j];
+
+	fi[0] = fi[1] = fj[0] = fj[1] = ti = tj = 0.0;
+
+	for (ni = 0; ni < npi; ni++) {
+	  dxi = discrete[ifirst+ni].dx;
+	  dyi = discrete[ifirst+ni].dy;
+
+	  for (nj = 0; nj < npj; nj++) {
+	    dxj = discrete[jfirst+nj].dx;
+	    dyj = discrete[jfirst+nj].dy;
+
+	    xi[0] = x[i][0] + dxi;
+	    xi[1] = x[i][1] + dyi;
+	    xj[0] = x[j][0] + dxj;
+	    xj[1] = x[j][1] + dyj;
+
+	    delx = xi[0] - xj[0];
+	    dely = xi[1] - xj[1];
+	    rsq = delx*delx + dely*dely;
+
+	    sig = 0.5 * (discrete[ifirst+ni].sigma+discrete[jfirst+nj].sigma);
+	    sig3 = sig*sig*sig;
+	    term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
+	    term1 = 2.0 * term2 * sig3*sig3;
+	    r2inv = 1.0/rsq;
+	    r6inv = r2inv*r2inv*r2inv;
+	    forcelj = r6inv * (term1*r6inv - term2);
+	    fpair = forcelj*r2inv;
+
+	    if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
+
+	    fi[0] += delx*fpair;
+	    fi[1] += dely*fpair;
+	    ti += fpair*(dxi*dely - dyi*delx);
+
+	    if (NEWTON_PAIR || j < nlocal) {
+	      fj[0] -= delx*fpair;
+	      fj[1] -= dely*fpair;
+	      tj += fpair*(dxj*dely - dyj*delx);
+	    }
+	  }
+	}
+
+	f[i][0] += fi[0];
+	f[i][1] += fi[1];
+	f[j][0] += fj[0];
+	f[j][1] += fj[1];
+	torque[i][2] += ti;
+	torque[j][2] += tj;
+
+      // line/particle interaction = Nx1 particles
+      // convert line into Np particles based on sigma and line length
+
+      } else if (line[i] >= 0) {
+	npi = dnum[i];
+	ifirst = dfirst[i];
+
+	fi[0] = fi[1] = fj[0] = fj[1] = ti = tj = 0.0;
+
+	for (ni = 0; ni < npi; ni++) {
+	  dxi = discrete[ifirst+ni].dx;
+	  dyi = discrete[ifirst+ni].dy;
+
+	  xi[0] = x[i][0] + dxi;
+	  xi[1] = x[i][1] + dyi;
+	  xj[0] = x[j][0];
+	  xj[1] = x[j][1];
+
+	  delx = xi[0] - xj[0];
+	  dely = xi[1] - xj[1];
+	  rsq = delx*delx + dely*dely;
+
+	  sig = 0.5 * (discrete[ifirst+ni].sigma+sigma[jtype][jtype]);
+	  sig3 = sig*sig*sig;
+	  term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
+	  term1 = 2.0 * term2 * sig3*sig3;
+	  r2inv = 1.0/rsq;
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (term1*r6inv - term2);
+	  fpair = forcelj*r2inv;
+
+	  if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
+
+	  fi[0] += delx*fpair;
+	  fi[1] += dely*fpair;
+	  ti += fpair*(dxi*dely - dyi*delx);
+
+	  if (NEWTON_PAIR || j < nlocal) {
+	    fj[0] -= delx*fpair;
+	    fj[1] -= dely*fpair;
+	    tj += fpair*(dxj*dely - dyj*delx);
+	  }
+	}
+
+	f[i][0] += fi[0];
+	f[i][1] += fi[1];
+	f[j][0] += fj[0];
+	f[j][1] += fj[1];
+	torque[i][2] += ti;
+	torque[j][2] += tj;
+
+      // particle/line interaction = Nx1 particles
+      // convert line into Np particles based on sigma and line length
+
+      } else if (line[j] >= 0) {
+	npj = dnum[j];
+	jfirst = dfirst[j];
+
+	fi[0] = fi[1] = fj[0] = fj[1] = ti = tj = 0.0;
+
+	for (nj = 0; nj < npj; nj++) {
+	  dxj = discrete[jfirst+nj].dx;
+	  dyj = discrete[jfirst+nj].dy;
+
+	  xi[0] = x[i][0];
+	  xi[1] = x[i][1];
+	  xj[0] = x[j][0] + dxj;
+	  xj[1] = x[j][1] + dyj;
+
+	  delx = xi[0] - xj[0];
+	  dely = xi[1] - xj[1];
+	  rsq = delx*delx + dely*dely;
+
+	  sig = 0.5 * (sigma[itype][itype]+discrete[jfirst+nj].sigma);
+	  sig3 = sig*sig*sig;
+	  term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
+	  term1 = 2.0 * term2 * sig3*sig3;
+	  r2inv = 1.0/rsq;
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (term1*r6inv - term2);
+	  fpair = forcelj*r2inv;
+	  
+	  if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
+
+	  fi[0] += delx*fpair;
+	  fi[1] += dely*fpair;
+	  ti += fpair*(dxi*dely - dyi*delx);
+
+	  if (NEWTON_PAIR || j < nlocal) {
+	    fj[0] -= delx*fpair;
+	    fj[1] -= dely*fpair;
+	    tj -= fpair*(dxj*dely - dyj*delx);
+	  }
+	}
+
+	f[i][0] += fi[0];
+	f[i][1] += fi[1];
+	f[j][0] += fj[0];
+	f[j][1] += fj[1];
+	torque[i][2] += ti;
+	torque[j][2] += tj;
+
+      // particle/particle interaction = 1x1 particles
+
+      } else {
+	r2inv = 1.0/rsq;
+	r6inv = r2inv*r2inv*r2inv;
+	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	fpair = forcelj*r2inv;
+
+	if (EFLAG)
+	  evdwl += r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
+
+	f[i][0] += delx*fpair;
+	f[i][1] += dely*fpair;
+	f[i][2] += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+      }
+
+      if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+			       evdwl,0.0,fpair,delx,dely,delz,thr);
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLineLJOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLineLJ::memory_usage();
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_line_lj_omp.h
+++ b/src/USER-OMP/pair_line_lj_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(line/lj/omp,PairLineLJOMP)
+
+#else
+
+#ifndef LMP_PAIR_LINE_LJ_OMP_H
+#define LMP_PAIR_LINE_LJ_OMP_H
+
+#include "pair_line_lj.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLineLJOMP : public PairLineLJ, public ThrOMP {
+
+ public:
+  PairLineLJOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.cpp
+++ b/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.cpp
@ -0,0 +1,260 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_lj_charmm_coul_pppm_omp.h"
+#include "pppm_proxy.h"
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "update.h"
+
+#include <string.h>
+
+using namespace LAMMPS_NS;
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ---------------------------------------------------------------------- */
+
+PairLJCharmmCoulPPPMOMP::PairLJCharmmCoulPPPMOMP(LAMMPS *lmp) :
+  PairLJCharmmCoulLong(lmp), ThrOMP(lmp, THR_PAIR|THR_PROXY)
+{
+  respa_enable = 0;
+  nproxy=1;
+  
+  kspace = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCharmmCoulPPPMOMP::init_style()
+{
+  if (comm->nthreads < 2)
+    error->all(FLERR,"need at least two threads per MPI task for this pair style");
+
+  if (strcmp(force->kspace_style,"pppm/proxy") != 0)
+    error->all(FLERR,"kspace style pppm/proxy is required with this pair style");
+
+  kspace = static_cast<PPPMProxy *>(force->kspace);
+
+  PairLJCharmmCoulLong::init_style();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCharmmCoulPPPMOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads, nproxy);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    // thread id 0 runs pppm, the rest the pair style
+    if (tid < nproxy) {
+      if (update->setupflag) kspace->setup_proxy();
+      kspace->compute_proxy(eflag,vflag);
+    } else {
+      if (evflag) {
+	if (eflag) {
+	  if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	  else eval<1,1,0>(ifrom, ito, thr);
+	} else {
+	  if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	  else eval<1,0,0>(ifrom, ito, thr);
+	}
+      } else {
+	if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+	else eval<0,0,0>(ifrom, ito, thr);
+      }
+    }
+    
+    sync_threads();
+    reduce_thr(this, eflag, vflag, thr, nproxy);
+  } // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLJCharmmCoulPPPMOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype,itable;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double fraction,table;
+  double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc;
+  double philj,switch1,switch2;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = ecoul = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r2inv = 1.0/rsq;
+
+	if (rsq < cut_coulsq) {
+	  if (!ncoultablebits || rsq <= tabinnersq) {
+	    r = sqrt(rsq);
+	    grij = g_ewald * r;
+	    expm2 = exp(-grij*grij);
+	    t = 1.0 / (1.0 + EWALD_P*grij);
+	    erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	    prefactor = qqrd2e * qtmp*q[j]/r;
+	    forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	    if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	  } else {
+	    union_int_float_t rsq_lookup;
+	    rsq_lookup.f = rsq;
+	    itable = rsq_lookup.i & ncoulmask;
+	    itable >>= ncoulshiftbits;
+	    fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
+	    table = ftable[itable] + fraction*dftable[itable];
+	    forcecoul = qtmp*q[j] * table;
+	    if (factor_coul < 1.0) {
+	      table = ctable[itable] + fraction*dctable[itable];
+	      prefactor = qtmp*q[j] * table;
+	      forcecoul -= (1.0-factor_coul)*prefactor;
+	    }
+	  }
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  jtype = type[j];
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	  if (rsq > cut_lj_innersq) {
+	    switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
+	      (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
+	    switch2 = 12.0*rsq * (cut_ljsq-rsq) * 
+	      (rsq-cut_lj_innersq) / denom_lj;
+	    philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
+	    forcelj = forcelj*switch1 + philj*switch2;
+	  }
+	  forcelj *= factor_lj;
+	} else forcelj = 0.0;
+
+	fpair = (forcecoul + forcelj) * r2inv;
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) {
+	  if (rsq < cut_coulsq) {
+	    if (!ncoultablebits || rsq <= tabinnersq)
+	      ecoul = prefactor*erfc;
+	    else {
+	      table = etable[itable] + fraction*detable[itable];
+	      ecoul = qtmp*q[j] * table;
+	    }
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+
+	  if (rsq < cut_ljsq) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
+	    if (rsq > cut_lj_innersq) {
+	      switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
+		(cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
+	      evdwl *= switch1;
+	    }
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
+	}
+	
+	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLJCharmmCoulPPPMOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLJCharmmCoulLong::memory_usage();
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.h
+++ b/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.h
@ -0,0 +1,54 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/charmm/coul/pppm/omp,PairLJCharmmCoulPPPMOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CHARMM_COUL_PPPM_OMP_H
+#define LMP_PAIR_LJ_CHARMM_COUL_PPPM_OMP_H
+
+#include "pair_lj_charmm_coul_long.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCharmmCoulPPPMOMP : public PairLJCharmmCoulLong, public ThrOMP {
+
+ public:
+  PairLJCharmmCoulPPPMOMP(class LAMMPS *);
+  virtual ~PairLJCharmmCoulPPPMOMP() {};
+
+  virtual void init_style();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+
+  class PPPMProxy *kspace;
+  int nproxy;
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_lj_cut_coul_pppm_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_pppm_omp.cpp
@ -0,0 +1,245 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_lj_cut_coul_pppm_omp.h"
+#include "pppm_proxy.h"
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "update.h"
+
+#include <string.h>
+
+using namespace LAMMPS_NS;
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ---------------------------------------------------------------------- */
+
+PairLJCutCoulPPPMOMP::PairLJCutCoulPPPMOMP(LAMMPS *lmp) :
+  PairLJCutCoulLong(lmp), ThrOMP(lmp, THR_PAIR|THR_PROXY)
+{
+  respa_enable = 0;
+  nproxy=1;
+  
+  kspace = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulPPPMOMP::init_style()
+{
+  if (comm->nthreads < 2)
+    error->all(FLERR,"need at least two threads per MPI task for this pair style");
+
+  if (strcmp(force->kspace_style,"pppm/proxy") != 0)
+    error->all(FLERR,"kspace style pppm/proxy is required with this pair style");
+
+  kspace = static_cast<PPPMProxy *>(force->kspace);
+
+  PairLJCutCoulLong::init_style();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulPPPMOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads, nproxy);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    // thread id 0 runs pppm, the rest the pair style
+    if (tid < nproxy) {
+      if (update->setupflag) kspace->setup_proxy();
+      kspace->compute_proxy(eflag,vflag);
+    } else {
+      if (evflag) {
+	if (eflag) {
+	  if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	  else eval<1,1,0>(ifrom, ito, thr);
+	} else {
+	  if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	  else eval<1,0,0>(ifrom, ito, thr);
+	}
+      } else {
+	if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+	else eval<0,0,0>(ifrom, ito, thr);
+      }
+    }
+    
+    sync_threads();
+    reduce_thr(this, eflag, vflag, thr, nproxy);
+  } // end of omp parallel region
+}
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLJCutCoulPPPMOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype,itable;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double fraction,table;
+  double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = ecoul = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r2inv = 1.0/rsq;
+
+	if (rsq < cut_coulsq) {
+	  if (!ncoultablebits || rsq <= tabinnersq) {
+	    r = sqrt(rsq);
+	    grij = g_ewald * r;
+	    expm2 = exp(-grij*grij);
+	    t = 1.0 / (1.0 + EWALD_P*grij);
+	    erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	    prefactor = qqrd2e * qtmp*q[j]/r;
+	    forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	    if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	  } else {
+	    union_int_float_t rsq_lookup;
+	    rsq_lookup.f = rsq;
+	    itable = rsq_lookup.i & ncoulmask;
+	    itable >>= ncoulshiftbits;
+	    fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
+	    table = ftable[itable] + fraction*dftable[itable];
+	    forcecoul = qtmp*q[j] * table;
+	    if (factor_coul < 1.0) {
+	      table = ctable[itable] + fraction*dctable[itable];
+	      prefactor = qtmp*q[j] * table;
+	      forcecoul -= (1.0-factor_coul)*prefactor;
+	    }
+	  }
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	  forcelj *= factor_lj;
+	} else forcelj = 0.0;
+
+	fpair = (forcecoul + forcelj) * r2inv;
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) {
+	  if (rsq < cut_coulsq) {
+	    if (!ncoultablebits || rsq <= tabinnersq)
+	      ecoul = prefactor*erfc;
+	    else {
+	      table = etable[itable] + fraction*detable[itable];
+	      ecoul = qtmp*q[j] * table;
+	    }
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
+	      offset[itype][jtype];
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
+	}
+
+	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLJCutCoulPPPMOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLJCutCoulLong::memory_usage();
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_lj_cut_coul_pppm_omp.h
+++ b/src/USER-OMP/pair_lj_cut_coul_pppm_omp.h
@ -0,0 +1,54 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/cut/coul/pppm/omp,PairLJCutCoulPPPMOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CUT_COUL_PPPM_OMP_H
+#define LMP_PAIR_LJ_CUT_COUL_PPPM_OMP_H
+
+#include "pair_lj_cut_coul_long.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCutCoulPPPMOMP : public PairLJCutCoulLong, public ThrOMP {
+
+ public:
+  PairLJCutCoulPPPMOMP(class LAMMPS *);
+  virtual ~PairLJCutCoulPPPMOMP() {};
+
+  virtual void init_style();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+
+  class PPPMProxy *kspace;
+  int nproxy;
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.cpp
@ -0,0 +1,487 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_lj_cut_coul_pppm_tip4p_omp.h"
+#include "pppm_proxy.h"
+#include "atom.h"
+#include "comm.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "update.h"
+
+#include <string.h>
+
+using namespace LAMMPS_NS;
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ---------------------------------------------------------------------- */
+
+PairLJCutCoulPPPMTIP4POMP::PairLJCutCoulPPPMTIP4POMP(LAMMPS *lmp) :
+  PairLJCutCoulLongTIP4P(lmp), ThrOMP(lmp, THR_PAIR|THR_PROXY)
+{
+  respa_enable = 0;
+  nproxy=1;
+  
+  kspace = NULL;
+
+  // for caching m-shift corrected positions
+  maxmpos = 0;
+  h1idx = h2idx = NULL;
+  mpos = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairLJCutCoulPPPMTIP4POMP::~PairLJCutCoulPPPMTIP4POMP()
+{
+  memory->destroy(h1idx);
+  memory->destroy(h2idx);
+  memory->destroy(mpos);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulPPPMTIP4POMP::init_style()
+{
+  if (comm->nthreads < 2)
+    error->all(FLERR,"need at least two threads per MPI task for this pair style");
+
+  if (strcmp(force->kspace_style,"pppm/tip4p/proxy") != 0)
+    error->all(FLERR,"kspace style pppm/tip4p/proxy is required with this pair style");
+
+  kspace = static_cast<PPPMProxy *>(force->kspace);
+
+  PairLJCutCoulLongTIP4P::init_style();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulPPPMTIP4POMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
+
+  // reallocate per-atom arrays, if necessary
+  if (nall > maxmpos) {
+    maxmpos = nall;
+    memory->grow(mpos,maxmpos,3,"pair:mpos");
+    memory->grow(h1idx,maxmpos,"pair:h1idx");
+    memory->grow(h2idx,maxmpos,"pair:h2idx");
+  }
+
+  // cache corrected M positions in mpos[]
+  const double * const * const x = atom->x;
+  const int * const type = atom->type;
+  for (int i = 0; i < nlocal; i++) {
+    if (type[i] == typeO) {
+      find_M(i,h1idx[i],h2idx[i],mpos[i]);
+    } else {
+      mpos[i][0] = x[i][0];
+      mpos[i][1] = x[i][1];
+      mpos[i][2] = x[i][2];
+    }
+  }
+  for (int i = nlocal; i < nall; i++) {
+    if (type[i] == typeO) {
+      find_M_permissive(i,h1idx[i],h2idx[i],mpos[i]);
+    } else {
+      mpos[i][0] = x[i][0];
+      mpos[i][1] = x[i][1];
+      mpos[i][2] = x[i][2];
+    }
+  }
+
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads, nproxy);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    // thread id 0 runs pppm, the rest the pair style
+    if (tid < nproxy) {
+      if (update->setupflag) kspace->setup_proxy();
+      kspace->compute_proxy(eflag,vflag);
+    } else {
+      if (evflag) {
+	if (eflag) {
+	  if (vflag) eval<1,1,1>(ifrom, ito, thr);
+	  else eval<1,1,0>(ifrom, ito, thr);
+	} else {
+	  if (vflag) eval<1,0,1>(ifrom, ito, thr);
+	  else eval<1,0,0>(ifrom, ito, thr);
+	}
+      } else {
+	eval<0,0,0>(ifrom, ito, thr);
+      }
+    }
+    
+    sync_threads();
+    reduce_thr(this, eflag, vflag, thr, nproxy);
+  } // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int VFLAG>
+void PairLJCutCoulPPPMTIP4POMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype,itable;
+  int n,vlist[6];
+  int iH1,iH2,jH1,jH2;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul;
+  double fraction,table;
+  double delxOM,delyOM,delzOM;
+  double r,rsq,r2inv,r6inv,forcecoul,forcelj,cforce;
+  double factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc,ddotf;
+  double v[6],xH1[3],xH2[3];
+  double fdx,fdy,fdz,f1x,f1y,f1z,fOx,fOy,fOz,fHx,fHy,fHz;
+  double *x1,*x2;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = ecoul = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+    x1 = mpos[i];
+    iH1 = h1idx[i];
+    iH2 = h2idx[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+
+	r2inv = 1.0/rsq;
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	  forcelj *= factor_lj * r2inv;
+
+	  fxtmp += delx*forcelj;
+	  fytmp += dely*forcelj;
+	  fztmp += delz*forcelj;
+	  f[j][0] -= delx*forcelj;
+	  f[j][1] -= dely*forcelj;
+	  f[j][2] -= delz*forcelj;
+
+	  if (EFLAG) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
+	      offset[itype][jtype];
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
+
+	  if (EVFLAG) ev_tally_thr(this,i,j,nlocal, /* newton_pair = */ 1,
+				   evdwl,0.0,forcelj,delx,dely,delz,thr);
+	}
+
+	// adjust rsq and delxyz for off-site O charge(s)
+
+	if (itype == typeO || jtype == typeO) { 
+	  x2 = mpos[j];
+	  jH1 = h1idx[j];
+	  jH2 = h2idx[j];
+	  if (jtype == typeO  && ( jH1 < 0 || jH2 < 0 ))
+	    error->one(FLERR,"TIP4P hydrogen is missing");
+	  delx = x1[0] - x2[0];
+	  dely = x1[1] - x2[1];
+	  delz = x1[2] - x2[2];
+	  rsq = delx*delx + dely*dely + delz*delz;
+	}
+
+	// test current rsq against cutoff and compute Coulombic force
+
+	if (rsq < cut_coulsq) {
+	  r2inv = 1 / rsq;
+	  if (!ncoultablebits || rsq <= tabinnersq) {
+	    r = sqrt(rsq);
+	    grij = g_ewald * r;
+	    expm2 = exp(-grij*grij);
+	    t = 1.0 / (1.0 + EWALD_P*grij);
+	    erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	    prefactor = qqrd2e * qtmp*q[j]/r;
+	    forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	    if (factor_coul < 1.0) {
+	      forcecoul -= (1.0-factor_coul)*prefactor; 
+	    }
+	  } else {
+	    union_int_float_t rsq_lookup;
+	    rsq_lookup.f = rsq;
+	    itable = rsq_lookup.i & ncoulmask;
+	    itable >>= ncoulshiftbits;
+	    fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
+	    table = ftable[itable] + fraction*dftable[itable];
+	    forcecoul = qtmp*q[j] * table;
+	    if (factor_coul < 1.0) {
+	      table = ctable[itable] + fraction*dctable[itable];
+	      prefactor = qtmp*q[j] * table;
+	      forcecoul -= (1.0-factor_coul)*prefactor;
+	    }
+	  }
+
+	  cforce = forcecoul * r2inv;
+
+	  // if i,j are not O atoms, force is applied directly
+	  // if i or j are O atoms, force is on fictitious atom & partitioned
+	  // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999)
+	  // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f
+	  // preserves total force and torque on water molecule
+	  // virial = sum(r x F) where each water's atoms are near xi and xj
+	  // vlist stores 2,4,6 atoms whose forces contribute to virial
+
+	  n = 0;
+
+	  if (itype != typeO) {
+	    fxtmp += delx * cforce;
+	    fytmp += dely * cforce;
+	    fztmp += delz * cforce;
+
+	    if (VFLAG) {
+	      v[0] = x[i][0] * delx * cforce;
+	      v[1] = x[i][1] * dely * cforce;
+	      v[2] = x[i][2] * delz * cforce;
+	      v[3] = x[i][0] * dely * cforce;
+	      v[4] = x[i][0] * delz * cforce;
+	      v[5] = x[i][1] * delz * cforce;
+	      vlist[n++] = i;
+	    }
+
+	  } else {
+
+            fdx = delx*cforce;
+            fdy = dely*cforce;
+            fdz = delz*cforce;
+
+            delxOM = x[i][0] - x1[0];
+            delyOM = x[i][1] - x1[1];
+            delzOM = x[i][2] - x1[2];
+
+            ddotf = (delxOM * fdx + delyOM * fdy + delzOM * fdz) /
+	      (qdist*qdist);
+
+	    f1x = alpha * (fdx - ddotf * delxOM);
+	    f1y = alpha * (fdy - ddotf * delyOM);
+	    f1z = alpha * (fdz - ddotf * delzOM);
+
+            fOx = fdx - f1x;
+            fOy = fdy - f1y;
+            fOz = fdz - f1z;
+
+            fHx = 0.5 * f1x;
+            fHy = 0.5 * f1y;
+            fHz = 0.5 * f1z;
+
+            fxtmp += fOx;
+            fytmp += fOy;
+            fztmp += fOz;
+
+            f[iH1][0] += fHx;
+            f[iH1][1] += fHy;
+            f[iH1][2] += fHz;
+
+            f[iH2][0] += fHx;
+            f[iH2][1] += fHy;
+            f[iH2][2] += fHz;
+
+	    if (VFLAG) {
+	      domain->closest_image(x[i],x[iH1],xH1);
+	      domain->closest_image(x[i],x[iH2],xH2);
+
+	      v[0] = x[i][0]*fOx + xH1[0]*fHx + xH2[0]*fHx;
+	      v[1] = x[i][1]*fOy + xH1[1]*fHy + xH2[1]*fHy;
+	      v[2] = x[i][2]*fOz + xH1[2]*fHz + xH2[2]*fHz;
+	      v[3] = x[i][0]*fOy + xH1[0]*fHy + xH2[0]*fHy;
+	      v[4] = x[i][0]*fOz + xH1[0]*fHz + xH2[0]*fHz;
+	      v[5] = x[i][1]*fOz + xH1[1]*fHz + xH2[1]*fHz;
+
+	      vlist[n++] = i;
+	      vlist[n++] = iH1;
+	      vlist[n++] = iH2;
+	    }
+	  }
+
+	  if (jtype != typeO) {
+	    f[j][0] -= delx * cforce;
+	    f[j][1] -= dely * cforce;
+	    f[j][2] -= delz * cforce;
+
+	    if (VFLAG) {
+	      v[0] -= x[j][0] * delx * cforce;
+	      v[1] -= x[j][1] * dely * cforce;
+	      v[2] -= x[j][2] * delz * cforce;
+	      v[3] -= x[j][0] * dely * cforce;
+	      v[4] -= x[j][0] * delz * cforce;
+	      v[5] -= x[j][1] * delz * cforce;
+	      vlist[n++] = j;
+	    }
+
+	  } else {
+
+	    fdx = -delx*cforce;
+	    fdy = -dely*cforce;
+	    fdz = -delz*cforce;
+
+	    delxOM = x[j][0] - x2[0];
+	    delyOM = x[j][1] - x2[1];
+	    delzOM = x[j][2] - x2[2];
+
+            ddotf = (delxOM * fdx + delyOM * fdy + delzOM * fdz) /
+	      (qdist*qdist);
+
+	    f1x = alpha * (fdx - ddotf * delxOM);
+	    f1y = alpha * (fdy - ddotf * delyOM);
+	    f1z = alpha * (fdz - ddotf * delzOM);
+
+            fOx = fdx - f1x;
+            fOy = fdy - f1y;
+            fOz = fdz - f1z;
+
+            fHx = 0.5 * f1x;
+            fHy = 0.5 * f1y;
+            fHz = 0.5 * f1z;
+
+	    f[j][0] += fOx;
+	    f[j][1] += fOy;
+	    f[j][2] += fOz;
+
+            f[jH1][0] += fHx;
+            f[jH1][1] += fHy;
+            f[jH1][2] += fHz;
+
+            f[jH2][0] += fHx;
+            f[jH2][1] += fHy;
+            f[jH2][2] += fHz;
+
+	    if (VFLAG) {
+	      domain->closest_image(x[j],x[jH1],xH1);
+	      domain->closest_image(x[j],x[jH2],xH2);
+
+	      v[0] += x[j][0]*fOx + xH1[0]*fHx + xH2[0]*fHx;
+	      v[1] += x[j][1]*fOy + xH1[1]*fHy + xH2[1]*fHy;
+	      v[2] += x[j][2]*fOz + xH1[2]*fHz + xH2[2]*fHz;
+	      v[3] += x[j][0]*fOy + xH1[0]*fHy + xH2[0]*fHy;
+	      v[4] += x[j][0]*fOz + xH1[0]*fHz + xH2[0]*fHz;
+	      v[5] += x[j][1]*fOz + xH1[1]*fHz + xH2[1]*fHz;
+
+	      vlist[n++] = j;
+	      vlist[n++] = jH1;
+	      vlist[n++] = jH2;
+	    }
+	  }
+
+	  if (EFLAG) {
+	    if (!ncoultablebits || rsq <= tabinnersq)
+	      ecoul = prefactor*erfc;
+	    else {
+	      table = etable[itable] + fraction*detable[itable];
+	      ecoul = qtmp*q[j] * table;
+	    }
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+
+	  if (EVFLAG) ev_tally_list_thr(this,n,vlist,ecoul,v,thr);
+	}
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulPPPMTIP4POMP::find_M_permissive(int i, int &iH1, int &iH2, double *xM)
+{
+  // test that O is correctly bonded to 2 succesive H atoms
+
+   iH1 = atom->map(atom->tag[i] + 1);
+   iH2 = atom->map(atom->tag[i] + 2);
+
+   if (iH1 == -1 || iH2 == -1)
+      return;
+   else
+      find_M(i,iH1,iH2,xM);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLJCutCoulPPPMTIP4POMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLJCutCoulLongTIP4P::memory_usage();
+  bytes += 2 * maxmpos * sizeof(int);
+  bytes += 3 * maxmpos * sizeof(double);
+  bytes += maxmpos * sizeof(double *);
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.h
+++ b/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.h
@ -0,0 +1,61 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/cut/coul/pppm/tip4p/omp,PairLJCutCoulPPPMTIP4POMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CUT_COUL_PPPM_TIP4P_OMP_H
+#define LMP_PAIR_LJ_CUT_COUL_PPPM_TIP4P_OMP_H
+
+#include "pair_lj_cut_coul_long_tip4p.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCutCoulPPPMTIP4POMP : public PairLJCutCoulLongTIP4P, public ThrOMP {
+
+ public:
+  PairLJCutCoulPPPMTIP4POMP(class LAMMPS *);
+  virtual ~PairLJCutCoulPPPMTIP4POMP();
+
+  virtual void init_style();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ protected:
+  // this is to cache m-shift corrected positions.
+  int maxmpos;        // size of the following arrays
+  int *h1idx, *h2idx; // local index of hydrogen atoms
+  double **mpos;      // coordinates corrected for m-shift.
+  void find_M_permissive(int, int &, int &, double *);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+
+  class PPPMProxy *kspace;
+  int nproxy;
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp
+++ b/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp
@ -0,0 +1,248 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_lj_sdk_coul_long_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+#include "lj_sdk_common.h"
+
+using namespace LAMMPS_NS;
+using namespace LJSDKParms;
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ---------------------------------------------------------------------- */
+
+PairLJSDKCoulLongOMP::PairLJSDKCoulLongOMP(LAMMPS *lmp) :
+  PairLJSDKCoulLong(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJSDKCoulLongOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval_thr<1,1,1>(ifrom, ito, thr);
+	else eval_thr<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval_thr<1,0,1>(ifrom, ito, thr);
+	else eval_thr<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair) eval_thr<0,0,1>(ifrom, ito, thr);
+      else eval_thr<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLJSDKCoulLongOMP::eval_thr(int iifrom, int iito, ThrData * const thr)
+{
+  int i,ii,j,jj,jtype,itable;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double fraction,table;
+  double r,rsq,r2inv,forcecoul,forcelj,factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc;
+
+  evdwl = ecoul = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_coul = force->special_coul;
+  const double * const special_lj = force->special_lj;
+  const double qqrd2e = force->qqrd2e;
+  double fxtmp,fytmp,fztmp;
+
+  const int * const ilist = list->ilist;
+  const int * const numneigh = list->numneigh;
+  const int * const * const firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    fxtmp=fytmp=fztmp=0.0;
+
+    const int itype = type[i];
+    const int * const jlist = firstneigh[i];
+    const int jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r2inv = 1.0/rsq;
+	const int ljt = lj_type[itype][jtype];
+
+	if (rsq < cut_coulsq) {
+	  if (!ncoultablebits || rsq <= tabinnersq) {
+	    r = sqrt(rsq);
+	    grij = g_ewald * r;
+	    expm2 = exp(-grij*grij);
+	    t = 1.0 / (1.0 + EWALD_P*grij);
+	    erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	    prefactor = qqrd2e * qtmp*q[j]/r;
+	    forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	    if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	  } else {
+	    union_int_float_t rsq_lookup;
+	    rsq_lookup.f = rsq;
+	    itable = rsq_lookup.i & ncoulmask;
+	    itable >>= ncoulshiftbits;
+	    fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
+	    table = ftable[itable] + fraction*dftable[itable];
+	    forcecoul = qtmp*q[j] * table;
+	    if (factor_coul < 1.0) {
+	      table = ctable[itable] + fraction*dctable[itable];
+	      prefactor = qtmp*q[j] * table;
+	      forcecoul -= (1.0-factor_coul)*prefactor;
+	    }
+	  }
+	} else {
+	  forcecoul = 0.0;
+	  ecoul = 0.0;
+	}
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+
+	  if (ljt == LJ12_4) {
+	    const double r4inv=r2inv*r2inv;
+	    forcelj = r4inv*(lj1[itype][jtype]*r4inv*r4inv
+			     - lj2[itype][jtype]);
+
+	    if (EFLAG)
+	      evdwl = r4inv*(lj3[itype][jtype]*r4inv*r4inv
+			     - lj4[itype][jtype]) - offset[itype][jtype];
+	  
+	  } else if (ljt == LJ9_6) {
+	    const double r3inv = r2inv*sqrt(r2inv);
+	    const double r6inv = r3inv*r3inv;
+	    forcelj = r6inv*(lj1[itype][jtype]*r3inv
+			     - lj2[itype][jtype]);
+	    if (EFLAG)
+	      evdwl = r6inv*(lj3[itype][jtype]*r3inv
+			     - lj4[itype][jtype]) - offset[itype][jtype];
+
+	  } else if (ljt == LJ12_6) {
+	    const double r6inv = r2inv*r2inv*r2inv;
+	    forcelj = r6inv*(lj1[itype][jtype]*r6inv
+			     - lj2[itype][jtype]);
+	    if (EFLAG)
+	      evdwl = r6inv*(lj3[itype][jtype]*r6inv
+			     - lj4[itype][jtype]) - offset[itype][jtype];
+	  }
+	} else {
+	  forcelj=0.0;
+	  evdwl = 0.0;
+	}
+
+	fpair = (forcecoul + factor_lj*forcelj) * r2inv;
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) {
+	  if (rsq < cut_coulsq) {
+	    if (!ncoultablebits || rsq <= tabinnersq)
+	      ecoul = prefactor*erfc;
+	    else {
+	      table = etable[itable] + fraction*detable[itable];
+	      ecoul = qtmp*q[j] * table;
+	    }
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
+	}
+
+	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
+				 evdwl,ecoul,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLJSDKCoulLongOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLJSDKCoulLong::memory_usage();
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_lj_sdk_coul_long_omp.h
+++ b/src/USER-OMP/pair_lj_sdk_coul_long_omp.h
@ -0,0 +1,49 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/sdk/coul/long/omp,PairLJSDKCoulLongOMP)
+PairStyle(cg/cmm/coul/long/omp,PairLJSDKCoulLongOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_SDK_COUL_LONG_OMP_H
+#define LMP_PAIR_LJ_SDK_COUL_LONG_OMP_H
+
+#include "pair_lj_sdk_coul_long.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJSDKCoulLongOMP : public PairLJSDKCoulLong, public ThrOMP {
+
+ public:
+  PairLJSDKCoulLongOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval_thr(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_lj_sdk_omp.cpp
+++ b/src/USER-OMP/pair_lj_sdk_omp.cpp
@ -0,0 +1,184 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+   This style is a simplified re-implementation of the CG/CMM pair style
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_lj_sdk_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+#include "lj_sdk_common.h"
+
+using namespace LAMMPS_NS;
+using namespace LJSDKParms;
+
+/* ---------------------------------------------------------------------- */
+
+PairLJSDKOMP::PairLJSDKOMP(LAMMPS *lmp) :
+  PairLJSDK(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJSDKOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval_thr<1,1,1>(ifrom, ito, thr);
+	else eval_thr<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval_thr<1,0,1>(ifrom, ito, thr);
+	else eval_thr<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair) eval_thr<0,0,1>(ifrom, ito, thr);
+      else eval_thr<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLJSDKOMP::eval_thr(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,r2inv,forcelj,factor_lj;
+
+  evdwl = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double * const special_lj = force->special_lj;
+  double fxtmp,fytmp,fztmp;
+
+  const int * const ilist = list->ilist;
+  const int * const numneigh = list->numneigh;
+  const int * const * const firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    fxtmp=fytmp=fztmp=0.0;
+
+    const int itype = type[i];
+    const int * const jlist = firstneigh[i];
+    const int jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r2inv = 1.0/rsq;
+	const int ljt = lj_type[itype][jtype];
+
+	if (ljt == LJ12_4) {
+	  const double r4inv=r2inv*r2inv;
+	  forcelj = r4inv*(lj1[itype][jtype]*r4inv*r4inv
+			   - lj2[itype][jtype]);
+
+	  if (EFLAG)
+	    evdwl = r4inv*(lj3[itype][jtype]*r4inv*r4inv
+			   - lj4[itype][jtype]) - offset[itype][jtype];
+	  
+	} else if (ljt == LJ9_6) {
+	  const double r3inv = r2inv*sqrt(r2inv);
+	  const double r6inv = r3inv*r3inv;
+	  forcelj = r6inv*(lj1[itype][jtype]*r3inv
+			   - lj2[itype][jtype]);
+	  if (EFLAG)
+	    evdwl = r6inv*(lj3[itype][jtype]*r3inv
+			   - lj4[itype][jtype]) - offset[itype][jtype];
+
+	} else if (ljt == LJ12_6) {
+	  const double r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv*(lj1[itype][jtype]*r6inv
+			  - lj2[itype][jtype]);
+	  if (EFLAG)
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv
+			   - lj4[itype][jtype]) - offset[itype][jtype];
+	} else continue;
+
+	fpair = factor_lj*forcelj*r2inv;
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) evdwl *= factor_lj;
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,thr);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLJSDKOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLJSDK::memory_usage();
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_lj_sdk_omp.h
+++ b/src/USER-OMP/pair_lj_sdk_omp.h
@ -0,0 +1,49 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/sdk/omp,PairLJSDKOMP)
+PairStyle(cg/cmm/omp,PairLJSDKOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_SDK_OMP_H
+#define LMP_PAIR_LJ_SDK_OMP_H
+
+#include "pair_lj_sdk.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJSDKOMP : public PairLJSDK, public ThrOMP {
+
+ public:
+  PairLJSDKOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval_thr(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_rebo_omp.cpp
+++ b/src/USER-OMP/pair_rebo_omp.cpp
@ -0,0 +1,33 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "pair_rebo_omp.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairREBOOMP::PairREBOOMP(LAMMPS *lmp) : PairAIREBOOMP(lmp) {}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairREBOOMP::settings(int narg, char **arg)
+{
+  if (narg != 0) error->all(FLERR,"Illegal pair_style command");
+
+  cutlj = 0.0;
+  ljflag = torflag = 0;
+}
--- a/src/USER-OMP/pair_rebo_omp.h
+++ b/src/USER-OMP/pair_rebo_omp.h
@ -0,0 +1,36 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(rebo/omp,PairREBOOMP)
+
+#else
+
+#ifndef LMP_PAIR_REBO_OMP_H
+#define LMP_PAIR_REBO_OMP_H
+
+#include "pair_airebo_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairREBOOMP : public PairAIREBOOMP {
+ public:
+  PairREBOOMP(class LAMMPS *);
+  virtual void settings(int, char **);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_tersoff_table_omp.cpp
+++ b/src/USER-OMP/pair_tersoff_table_omp.cpp
@ -0,0 +1,496 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_tersoff_table_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairTersoffTableOMP::PairTersoffTableOMP(LAMMPS *lmp) :
+  PairTersoffTable(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairTersoffTableOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = vflag_atom = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag)
+      if (vflag_atom) eval<1,1>(ifrom, ito, thr);
+      else eval<1,0>(ifrom, ito, thr);
+    else eval<0,0>(ifrom, ito, thr);
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int VFLAG_ATOM>
+void PairTersoffTableOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,k,ii,inum,jnum;
+  int itype,jtype,ktype,ijparam,ikparam,ijkparam;
+  double xtmp,ytmp,ztmp;
+  double fxtmp,fytmp,fztmp;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  int interpolIDX;
+  double r_ik_x, r_ik_y, r_ik_z;
+  double directorCos_ij_x, directorCos_ij_y, directorCos_ij_z, directorCos_ik_x, directorCos_ik_y, directorCos_ik_z;
+  double invR_ij, invR_ik, cosTeta;
+  double repulsivePotential, attractivePotential;
+  double exponentRepulsivePotential, exponentAttractivePotential,interpolTMP,interpolDeltaX,interpolY1;
+  double interpolY2, cutoffFunctionIJ, attractiveExponential, repulsiveExponential, cutoffFunctionDerivedIJ,zeta; 
+  double gtetaFunctionIJK,gtetaFunctionDerivedIJK,cutoffFunctionIK;
+  double cutoffFunctionDerivedIK,factor_force3_ij,factor_1_force3_ik;
+  double factor_2_force3_ik,betaZetaPowerIJK,betaZetaPowerDerivedIJK,factor_force_tot;
+  double factor_force_ij;
+  double gtetaFunctionDerived_temp,gtetaFunction_temp;
+
+  double evdwl = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over full neighbor list of my atoms
+  for (ii = iifrom; ii < iito; ii++) {
+
+    i = ilist[ii];
+    itype = map[type[i]];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    // Pre-calculate gteta and cutoff function
+    for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) {
+
+      double dr_ij[3], r_ij;
+
+      j = jlist[neighbor_j];
+      j &= NEIGHMASK;
+
+      dr_ij[0] = xtmp - x[j][0];
+      dr_ij[1] = ytmp - x[j][1];
+      dr_ij[2] = ztmp - x[j][2];
+      r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2];
+
+      jtype = map[type[j]];
+      ijparam = elem2param[itype][jtype][jtype];
+
+      if (r_ij > params[ijparam].cutsq) continue;
+
+      r_ij = sqrt(r_ij);
+
+      invR_ij = 1.0 / r_ij;
+      
+      directorCos_ij_x = invR_ij * dr_ij[0];
+      directorCos_ij_y = invR_ij * dr_ij[1];
+      directorCos_ij_z = invR_ij * dr_ij[2];
+      
+      // preCutoffFunction
+      interpolDeltaX =  r_ij - GRIDSTART;
+      interpolTMP = (interpolDeltaX * GRIDDENSITY_FCUTOFF);
+      interpolIDX = (int) interpolTMP;
+      interpolY1 = cutoffFunction[itype][jtype][interpolIDX];
+      interpolY2 = cutoffFunction[itype][jtype][interpolIDX+1];
+      preCutoffFunction[neighbor_j] = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
+      // preCutoffFunctionDerived
+      interpolY1 = cutoffFunctionDerived[itype][jtype][interpolIDX];
+      interpolY2 = cutoffFunctionDerived[itype][jtype][interpolIDX+1];
+      preCutoffFunctionDerived[neighbor_j] = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
+
+
+      for (int neighbor_k = neighbor_j + 1; neighbor_k < jnum; neighbor_k++) {
+	double dr_ik[3], r_ik;
+
+	k = jlist[neighbor_k];
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	ikparam = elem2param[itype][ktype][ktype];
+	ijkparam = elem2param[itype][jtype][ktype];
+
+	dr_ik[0] = xtmp -x[k][0];
+	dr_ik[1] = ytmp -x[k][1];
+	dr_ik[2] = ztmp -x[k][2];
+	r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
+
+	if (r_ik > params[ikparam].cutsq) continue;
+
+	r_ik = sqrt(r_ik);
+
+	invR_ik = 1.0 / r_ik;
+          
+	directorCos_ik_x = invR_ik * dr_ik[0];
+	directorCos_ik_y = invR_ik * dr_ik[1];
+	directorCos_ik_z = invR_ik * dr_ik[2];
+          
+	cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + directorCos_ij_z * directorCos_ik_z;
+          
+	// preGtetaFunction
+	interpolDeltaX=cosTeta+1.0;
+	interpolTMP = (interpolDeltaX * GRIDDENSITY_GTETA);
+	interpolIDX = (int) interpolTMP;
+	interpolY1 = gtetaFunction[itype][interpolIDX];
+	interpolY2 = gtetaFunction[itype][interpolIDX+1];
+	gtetaFunction_temp = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
+	// preGtetaFunctionDerived
+	interpolY1 = gtetaFunctionDerived[itype][interpolIDX];
+	interpolY2 = gtetaFunctionDerived[itype][interpolIDX+1];
+	gtetaFunctionDerived_temp = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
+          
+	preGtetaFunction[neighbor_j][neighbor_k]=params[ijkparam].gamma*gtetaFunction_temp;
+	preGtetaFunctionDerived[neighbor_j][neighbor_k]=params[ijkparam].gamma*gtetaFunctionDerived_temp;
+	preGtetaFunction[neighbor_k][neighbor_j]=params[ijkparam].gamma*gtetaFunction_temp;
+	preGtetaFunctionDerived[neighbor_k][neighbor_j]=params[ijkparam].gamma*gtetaFunctionDerived_temp;
+
+      } // loop on K
+
+    } // loop on J
+
+
+    // loop over neighbours of atom i
+    for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) {
+
+      double dr_ij[3], r_ij, f_ij[3];
+
+      j = jlist[neighbor_j];
+      j &= NEIGHMASK;
+
+      dr_ij[0] = xtmp - x[j][0];
+      dr_ij[1] = ytmp - x[j][1];
+      dr_ij[2] = ztmp - x[j][2];
+      r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2];
+
+      jtype = map[type[j]];
+      ijparam = elem2param[itype][jtype][jtype];
+
+      if (r_ij > params[ijparam].cutsq) continue;
+
+      r_ij = sqrt(r_ij);
+      invR_ij = 1.0 / r_ij;
+      
+      directorCos_ij_x = invR_ij * dr_ij[0];
+      directorCos_ij_y = invR_ij * dr_ij[1];
+      directorCos_ij_z = invR_ij * dr_ij[2];
+
+      exponentRepulsivePotential = params[ijparam].lam1 * r_ij;
+      exponentAttractivePotential = params[ijparam].lam2 * r_ij;
+
+      // repulsiveExponential
+      interpolDeltaX =  exponentRepulsivePotential - minArgumentExponential;
+      interpolTMP = (interpolDeltaX * GRIDDENSITY_EXP);
+      interpolIDX = (int) interpolTMP;
+      interpolY1 = exponential[interpolIDX];
+      interpolY2 = exponential[interpolIDX+1];
+      repulsiveExponential = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
+      // attractiveExponential
+      interpolDeltaX =  exponentAttractivePotential - minArgumentExponential;
+      interpolTMP = (interpolDeltaX * GRIDDENSITY_EXP);
+      interpolIDX = (int) interpolTMP;
+      interpolY1 = exponential[interpolIDX];
+      interpolY2 = exponential[interpolIDX+1];
+      attractiveExponential = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
+
+      repulsivePotential = params[ijparam].biga * repulsiveExponential;
+      attractivePotential = -params[ijparam].bigb * attractiveExponential;
+
+      cutoffFunctionIJ = preCutoffFunction[neighbor_j];
+      cutoffFunctionDerivedIJ = preCutoffFunctionDerived[neighbor_j];
+
+      zeta = 0.0;
+            
+      // first loop over neighbours of atom i except j - part 1/2
+      for (int neighbor_k = 0; neighbor_k < neighbor_j; neighbor_k++) {
+	double dr_ik[3], r_ik;
+
+	k = jlist[neighbor_k];
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	ikparam = elem2param[itype][ktype][ktype];
+	ijkparam = elem2param[itype][jtype][ktype];
+
+	dr_ik[0] = xtmp -x[k][0];
+	dr_ik[1] = ytmp -x[k][1];
+	dr_ik[2] = ztmp -x[k][2];
+	r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
+
+	if (r_ik > params[ikparam].cutsq) continue;
+
+	r_ik = sqrt(r_ik);
+
+	invR_ik = 1.0 / r_ik;
+          
+	directorCos_ik_x = invR_ik * r_ik_x;
+	directorCos_ik_y = invR_ik * r_ik_y;
+	directorCos_ik_z = invR_ik * r_ik_z;
+          
+	gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
+          
+	cutoffFunctionIK = preCutoffFunction[neighbor_k];
+          
+	zeta += cutoffFunctionIK * gtetaFunctionIJK;
+          
+      }
+
+      // first loop over neighbours of atom i except j - part 2/2
+      for (int neighbor_k = neighbor_j+1; neighbor_k < jnum; neighbor_k++) {
+	double dr_ik[3], r_ik;
+
+	k = jlist[neighbor_k];
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	ikparam = elem2param[itype][ktype][ktype];
+	ijkparam = elem2param[itype][jtype][ktype];
+
+	dr_ik[0] = xtmp -x[k][0];
+	dr_ik[1] = ytmp -x[k][1];
+	dr_ik[2] = ztmp -x[k][2];
+	r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
+
+	if (r_ik > params[ikparam].cutsq) continue;
+
+	r_ik = sqrt(r_ik);
+	invR_ik = 1.0 / r_ik;
+          
+	directorCos_ik_x = invR_ik * dr_ik[0];
+	directorCos_ik_y = invR_ik * dr_ik[1];
+	directorCos_ik_z = invR_ik * dr_ik[2];
+          
+	gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
+          
+	cutoffFunctionIK = preCutoffFunction[neighbor_k];
+          
+	zeta += cutoffFunctionIK * gtetaFunctionIJK;
+      }
+            
+      // betaZetaPowerIJK 
+      interpolDeltaX= params[ijparam].beta * zeta;
+      interpolTMP = (interpolDeltaX * GRIDDENSITY_BIJ);
+      interpolIDX = (int) interpolTMP;
+      interpolY1 = betaZetaPower[itype][interpolIDX];
+      interpolY2 = betaZetaPower[itype][interpolIDX+1];
+      betaZetaPowerIJK = (interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX));
+      // betaZetaPowerDerivedIJK
+      interpolY1 = betaZetaPowerDerived[itype][interpolIDX];
+      interpolY2 = betaZetaPowerDerived[itype][interpolIDX+1];
+      betaZetaPowerDerivedIJK = params[ijparam].beta*(interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX));
+
+      // Forces and virial
+      factor_force_ij = 0.5*cutoffFunctionDerivedIJ*(repulsivePotential + attractivePotential * betaZetaPowerIJK)+0.5*cutoffFunctionIJ*(-repulsivePotential*params[ijparam].lam1-betaZetaPowerIJK*attractivePotential*params[ijparam].lam2);
+
+      f_ij[0] = factor_force_ij * directorCos_ij_x;
+      f_ij[1] = factor_force_ij * directorCos_ij_y;
+      f_ij[2] = factor_force_ij * directorCos_ij_z;
+
+      f[j][0] += f_ij[0];
+      f[j][1] += f_ij[1];
+      f[j][2] += f_ij[2];
+
+      fxtmp -= f_ij[0];
+      fytmp -= f_ij[1];
+      fztmp -= f_ij[2];
+
+      // potential energy
+      evdwl = cutoffFunctionIJ * repulsivePotential 
+	+ cutoffFunctionIJ * attractivePotential * betaZetaPowerIJK;
+
+      if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, 0.5 * evdwl, 0.0,
+			       -factor_force_ij*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],thr);
+
+      factor_force_tot= 0.5*cutoffFunctionIJ*attractivePotential*betaZetaPowerDerivedIJK;
+
+      // second loop over neighbours of atom i except j, forces and virial only - part 1/2
+      for (int neighbor_k = 0; neighbor_k < neighbor_j; neighbor_k++) {
+	double dr_ik[3], r_ik, f_ik[3];
+
+	k = jlist[neighbor_k];
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	ikparam = elem2param[itype][ktype][ktype];
+	ijkparam = elem2param[itype][jtype][ktype];
+
+	dr_ik[0] = xtmp -x[k][0];
+	dr_ik[1] = ytmp -x[k][1];
+	dr_ik[2] = ztmp -x[k][2];
+	r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
+
+	if (r_ik > params[ikparam].cutsq) continue;
+
+	r_ik = sqrt(r_ik);
+	invR_ik = 1.0 / r_ik;
+          
+	directorCos_ik_x = invR_ik * dr_ik[0];
+	directorCos_ik_y = invR_ik * dr_ik[1];
+	directorCos_ik_z = invR_ik * dr_ik[2];
+          
+	cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y
+	  + directorCos_ij_z * directorCos_ik_z;
+          
+	gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
+          
+	gtetaFunctionDerivedIJK = preGtetaFunctionDerived[neighbor_j][neighbor_k];
+          
+	cutoffFunctionIK = preCutoffFunction[neighbor_k];
+          
+	cutoffFunctionDerivedIK = preCutoffFunctionDerived[neighbor_k];
+          
+	factor_force3_ij= cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ij *factor_force_tot;
+          
+	f_ij[0] = factor_force3_ij * (directorCos_ij_x*cosTeta - directorCos_ik_x);
+	f_ij[1] = factor_force3_ij * (directorCos_ij_y*cosTeta - directorCos_ik_y);
+	f_ij[2] = factor_force3_ij * (directorCos_ij_z*cosTeta - directorCos_ik_z);
+          
+	factor_1_force3_ik = (cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ik)*factor_force_tot;
+	factor_2_force3_ik = -(cutoffFunctionDerivedIK * gtetaFunctionIJK)*factor_force_tot;
+          
+	f_ik[0] = factor_1_force3_ik * (directorCos_ik_x*cosTeta - directorCos_ij_x)
+	  + factor_2_force3_ik * directorCos_ik_x;
+	f_ik[1] = factor_1_force3_ik * (directorCos_ik_y*cosTeta - directorCos_ij_y)
+	  + factor_2_force3_ik * directorCos_ik_y;
+	f_ik[2] = factor_1_force3_ik * (directorCos_ik_z*cosTeta - directorCos_ij_z)
+	  + factor_2_force3_ik * directorCos_ik_z;
+          
+	f[j][0] -= f_ij[0];
+	f[j][1] -= f_ij[1];
+	f[j][2] -= f_ij[2];
+
+	f[k][0] -= f_ik[0];
+	f[k][1] -= f_ik[1];
+	f[k][2] -= f_ik[2];
+
+	fxtmp += f_ij[0] + f_ik[0];
+	fytmp += f_ij[1] + f_ik[1];
+	fztmp += f_ij[2] + f_ik[2];
+
+	if (VFLAG_ATOM) v_tally3_thr(i,j,k,f_ij,f_ik,dr_ij,dr_ik,thr);
+      }
+            
+      // second loop over neighbours of atom i except j, forces and virial only - part 2/2
+      for (int neighbor_k = neighbor_j+1; neighbor_k < jnum; neighbor_k++) {
+	double dr_ik[3], r_ik, f_ik[3];
+
+	k = jlist[neighbor_k];
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	ikparam = elem2param[itype][ktype][ktype];
+	ijkparam = elem2param[itype][jtype][ktype];
+
+	dr_ik[0] = xtmp -x[k][0];
+	dr_ik[1] = ytmp -x[k][1];
+	dr_ik[2] = ztmp -x[k][2];
+	r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
+
+	if (r_ik > params[ikparam].cutsq) continue;
+
+	r_ik = sqrt(r_ik);
+	invR_ik = 1.0 / r_ik;
+          
+	directorCos_ik_x = invR_ik * dr_ik[0];
+	directorCos_ik_y = invR_ik * dr_ik[1];
+	directorCos_ik_z = invR_ik * dr_ik[2];
+          
+	cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y
+	  + directorCos_ij_z * directorCos_ik_z;
+          
+	gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
+          
+	gtetaFunctionDerivedIJK = preGtetaFunctionDerived[neighbor_j][neighbor_k];
+          
+	cutoffFunctionIK = preCutoffFunction[neighbor_k];
+          
+	cutoffFunctionDerivedIK = preCutoffFunctionDerived[neighbor_k];
+          
+	factor_force3_ij= cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ij *factor_force_tot;
+          
+	f_ij[0] = factor_force3_ij * (directorCos_ij_x*cosTeta - directorCos_ik_x);
+	f_ij[1] = factor_force3_ij * (directorCos_ij_y*cosTeta - directorCos_ik_y);
+	f_ij[2] = factor_force3_ij * (directorCos_ij_z*cosTeta - directorCos_ik_z);
+          
+	factor_1_force3_ik = (cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ik)*factor_force_tot;
+	factor_2_force3_ik = -(cutoffFunctionDerivedIK * gtetaFunctionIJK)*factor_force_tot;
+          
+	f_ik[0] = factor_1_force3_ik * (directorCos_ik_x*cosTeta - directorCos_ij_x)
+	  + factor_2_force3_ik * directorCos_ik_x;
+	f_ik[1] = factor_1_force3_ik * (directorCos_ik_y*cosTeta - directorCos_ij_y)
+	  + factor_2_force3_ik * directorCos_ik_y;
+	f_ik[2] = factor_1_force3_ik * (directorCos_ik_z*cosTeta - directorCos_ij_z)
+	  + factor_2_force3_ik * directorCos_ik_z;
+          
+	f[j][0] -= f_ij[0];
+	f[j][1] -= f_ij[1];
+	f[j][2] -= f_ij[2];
+
+	f[k][0] -= f_ik[0];
+	f[k][1] -= f_ik[1];
+	f[k][2] -= f_ik[2];
+
+	fxtmp += f_ij[0] + f_ik[0];
+	fytmp += f_ij[1] + f_ik[1];
+	fztmp += f_ij[2] + f_ik[2];
+
+	if (VFLAG_ATOM) v_tally3_thr(i,j,k,f_ij,f_ik,dr_ij,dr_ik,thr);
+         
+      }
+    } // loop on J
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  } // loop on I
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairTersoffTableOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairTersoffTable::memory_usage();
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_tersoff_table_omp.h
+++ b/src/USER-OMP/pair_tersoff_table_omp.h
@ -0,0 +1,43 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(tersoff/table/omp,PairTersoffTableOMP)
+
+#else
+
+#ifndef LMP_PAIR_TERSOFF_TABLE_OMP_H
+#define LMP_PAIR_TERSOFF_TABLE_OMP_H
+
+#include "pair_tersoff_table.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairTersoffTableOMP : public PairTersoffTable, public ThrOMP {
+
+ public:
+  PairTersoffTableOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int VFLAG_ATOM>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pair_tri_lj_omp.cpp
+++ b/src/USER-OMP/pair_tri_lj_omp.cpp
@ -0,0 +1,409 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_tri_lj_omp.h"
+#include "math_extra.h"
+#include "atom.h"
+#include "atom_vec_tri.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+#include <string.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairTriLJOMP::PairTriLJOMP(LAMMPS *lmp) :
+  PairTriLJ(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairTriLJOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+  const int * const tri = atom->tri;
+  const int * const type = atom->type;
+  AtomVecTri::Bonus * const bonus = avec->bonus;
+
+  // grow discrete list if necessary and initialize
+
+  if (nall > nmax) {
+    nmax = nall;
+    memory->destroy(dnum);
+    memory->destroy(dfirst);
+    memory->create(dnum,nall,"pair:dnum");
+    memory->create(dfirst,nall,"pair:dfirst");
+  }
+  memset(dnum,0,nall*sizeof(int));
+  ndiscrete = 0;
+
+  // need to discretize the system ahead of time
+  // until we find a good way to multi-thread it.
+  for (int i = 0; i < nall; ++i) {
+    double dc1[3],dc2[3],dc3[3],p[3][3];
+    
+    if (tri[i] >= 0) {
+      if (dnum[i] == 0) {
+	MathExtra::quat_to_mat(bonus[tri[i]].quat,p);
+	MathExtra::matvec(p,bonus[tri[i]].c1,dc1);
+	MathExtra::matvec(p,bonus[tri[i]].c2,dc2);
+	MathExtra::matvec(p,bonus[tri[i]].c3,dc3);
+	dfirst[i] = ndiscrete;
+	discretize(i,sigma[type[i]][type[i]],dc1,dc2,dc3);
+	dnum[i] = ndiscrete - dfirst[i];
+      }
+    }
+  }
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
+	else eval<1,1,0>(ifrom, ito, thr);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
+	else eval<1,0,0>(ifrom, ito, thr);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
+      else eval<0,0,0>(ifrom, ito, thr);
+    }
+
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairTriLJOMP::eval(int iifrom, int iito, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  int ni,nj,npi,npj,ifirst,jfirst;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,r2inv,r6inv,term1,term2,sig,sig3,forcelj;
+  double dxi,dxj,dyi,dyj,dzi,dzj;
+  double xi[3],xj[3],fi[3],fj[3],ti[3],tj[3];
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  double * const * const torque = thr->get_torque();
+  const int * const tri = atom->tri;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+      
+      if (rsq >= cutsq[itype][jtype]) continue;
+      
+      // tri/tri interactions = NxN particles
+      // c1,c2,c3 = corner pts of triangle I or J
+      
+      evdwl = 0.0;
+      if (tri[i] >= 0 && tri[j] >= 0) {
+	npi = dnum[i];
+	ifirst = dfirst[i];
+	npj = dnum[j];
+	jfirst = dfirst[j];
+
+	fi[0]=fi[1]=fi[2]=fj[0]=fj[1]=fj[2]=0.0;
+	ti[0]=ti[1]=ti[2]=tj[0]=tj[1]=tj[2]=0.0;
+
+	for (ni = 0; ni < npi; ni++) {
+	  dxi = discrete[ifirst+ni].dx;
+	  dyi = discrete[ifirst+ni].dy;
+	  dzi = discrete[ifirst+ni].dz;
+
+	  for (nj = 0; nj < npj; nj++) {
+	    dxj = discrete[jfirst+nj].dx;
+	    dyj = discrete[jfirst+nj].dy;
+	    dzj = discrete[jfirst+nj].dz;
+
+	    xi[0] = x[i][0] + dxi;
+	    xi[1] = x[i][1] + dyi;
+	    xi[2] = x[i][2] + dzi;
+	    xj[0] = x[j][0] + dxj;
+	    xj[1] = x[j][1] + dyj;
+	    xj[2] = x[j][2] + dzj;
+
+	    delx = xi[0] - xj[0];
+	    dely = xi[1] - xj[1];
+	    delz = xi[2] - xj[2];
+	    rsq = delx*delx + dely*dely + delz*delz;
+ 
+	    sig = 0.5 * (discrete[ifirst+ni].sigma+discrete[jfirst+nj].sigma);
+	    sig3 = sig*sig*sig;
+	    term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
+	    term1 = 2.0 * term2 * sig3*sig3;
+	    r2inv = 1.0/rsq;
+	    r6inv = r2inv*r2inv*r2inv;
+	    forcelj = r6inv * (term1*r6inv - term2);
+	    fpair = forcelj*r2inv;
+
+	    if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
+
+	    fi[0] += delx*fpair;
+	    fi[1] += dely*fpair;
+	    fi[2] += delz*fpair;
+	    ti[0] += fpair*(dyi*delz - dzi*dely);
+	    ti[1] += fpair*(dzi*delx - dxi*delz);
+	    ti[2] += fpair*(dxi*dely - dyi*delx);
+
+	    if (NEWTON_PAIR || j < nlocal) {
+	      fj[0] -= delx*fpair;
+	      fj[1] -= dely*fpair;
+	      fj[2] -= delz*fpair;
+	      tj[0] -= fpair*(dyj*delz - dzj*dely);
+	      tj[1] -= fpair*(dzj*delx - dxj*delz);
+	      tj[2] -= fpair*(dxj*dely - dyj*delx);
+	    }
+	  }
+	}
+
+	f[i][0] += fi[0];
+	f[i][1] += fi[1];
+	f[i][2] += fi[2];
+	f[j][0] += fj[0];
+	f[j][1] += fj[1];
+	f[j][2] += fj[2];
+	torque[i][0] += ti[0];
+	torque[i][1] += ti[1];
+	torque[i][2] += ti[2];
+	torque[j][0] += tj[0];
+	torque[j][1] += tj[1];
+	torque[j][2] += tj[2];
+
+	// tri/particle interaction = Nx1 particles
+	// c1,c2,c3 = corner pts of triangle I
+
+      } else if (tri[i] >= 0) {
+	npi = dnum[i];
+	ifirst = dfirst[i];
+
+	fi[0]=fi[1]=fi[2]=fj[0]=fj[1]=fj[2]=0.0;
+	ti[0]=ti[1]=ti[2]=tj[0]=tj[1]=tj[2]=0.0;
+
+	for (ni = 0; ni < npi; ni++) {
+	  dxi = discrete[ifirst+ni].dx;
+	  dyi = discrete[ifirst+ni].dy;
+	  dzi = discrete[ifirst+ni].dz;
+
+	  xi[0] = x[i][0] + dxi;
+	  xi[1] = x[i][1] + dyi;
+	  xi[2] = x[i][2] + dzi;
+	  xj[0] = x[j][0];
+	  xj[1] = x[j][1];
+	  xj[2] = x[j][2];
+
+	  delx = xi[0] - xj[0];
+	  dely = xi[1] - xj[1];
+	  delz = xi[2] - xj[2];
+	  rsq = delx*delx + dely*dely + delz*delz;
+
+	  sig = 0.5 * (discrete[ifirst+ni].sigma+sigma[jtype][jtype]);
+	  sig3 = sig*sig*sig;
+	  term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
+	  term1 = 2.0 * term2 * sig3*sig3;
+	  r2inv = 1.0/rsq;
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (term1*r6inv - term2);
+	  fpair = forcelj*r2inv;
+
+	  if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
+
+	  fi[0] += delx*fpair;
+	  fi[1] += dely*fpair;
+	  fi[2] += delz*fpair;
+	  ti[0] += fpair*(dyi*delz - dzi*dely);
+	  ti[1] += fpair*(dzi*delx - dxi*delz);
+	  ti[2] += fpair*(dxi*dely - dyi*delx);
+
+	  if (NEWTON_PAIR || j < nlocal) {
+	    fj[0] -= delx*fpair;
+	    fj[1] -= dely*fpair;
+	    fj[2] -= delz*fpair;
+	    tj[0] -= fpair*(dyj*delz - dzj*dely);
+	    tj[1] -= fpair*(dzj*delx - dxj*delz);
+	    tj[2] -= fpair*(dxj*dely - dyj*delx);
+	  }
+	}
+
+	f[i][0] += fi[0];
+	f[i][1] += fi[1];
+	f[i][2] += fi[2];
+	f[j][0] += fj[0];
+	f[j][1] += fj[1];
+	f[j][2] += fj[2];
+	torque[i][0] += ti[0];
+	torque[i][1] += ti[1];
+	torque[i][2] += ti[2];
+	torque[j][0] += tj[0];
+	torque[j][1] += tj[1];
+	torque[j][2] += tj[2];
+
+	// particle/tri interaction = Nx1 particles
+	// c1,c2,c3 = corner pts of triangle J
+
+      } else if (tri[j] >= 0) {
+	npj = dnum[j];
+	jfirst = dfirst[j];
+
+	fi[0]=fi[1]=fi[2]=fj[0]=fj[1]=fj[2]=0.0;
+	ti[0]=ti[1]=ti[2]=tj[0]=tj[1]=tj[2]=0.0;
+
+	for (nj = 0; nj < npj; nj++) {
+	  dxj = discrete[jfirst+nj].dx;
+	  dyj = discrete[jfirst+nj].dy;
+	  dzj = discrete[jfirst+nj].dz;
+
+	  xi[0] = x[i][0];
+	  xi[1] = x[i][1];
+	  xi[2] = x[i][2];
+	  xj[0] = x[j][0] + dxj;
+	  xj[1] = x[j][1] + dyj;
+	  xj[2] = x[j][2] + dzj;
+
+	  delx = xi[0] - xj[0];
+	  dely = xi[1] - xj[1];
+	  delz = xi[2] - xj[2];
+	  rsq = delx*delx + dely*dely + delz*delz;
+
+	  sig = 0.5 * (sigma[itype][itype]+discrete[jfirst+nj].sigma);
+	  sig3 = sig*sig*sig;
+	  term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
+	  term1 = 2.0 * term2 * sig3*sig3;
+	  r2inv = 1.0/rsq;
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (term1*r6inv - term2);
+	  fpair = forcelj*r2inv;
+	  
+	  if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
+
+	  if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
+
+	  fi[0] += delx*fpair;
+	  fi[1] += dely*fpair;
+	  fi[2] += delz*fpair;
+	  ti[0] += fpair*(dyi*delz - dzi*dely);
+	  ti[1] += fpair*(dzi*delx - dxi*delz);
+	  ti[2] += fpair*(dxi*dely - dyi*delx);
+
+	  if (NEWTON_PAIR || j < nlocal) {
+	    fj[0] -= delx*fpair;
+	    fj[1] -= dely*fpair;
+	    fj[2] -= delz*fpair;
+	    tj[0] -= fpair*(dyj*delz - dzj*dely);
+	    tj[1] -= fpair*(dzj*delx - dxj*delz);
+	    tj[2] -= fpair*(dxj*dely - dyj*delx);
+	  }
+	}
+
+	f[i][0] += fi[0];
+	f[i][1] += fi[1];
+	f[i][2] += fi[2];
+	f[j][0] += fj[0];
+	f[j][1] += fj[1];
+	f[j][2] += fj[2];
+	torque[i][0] += ti[0];
+	torque[i][1] += ti[1];
+	torque[i][2] += ti[2];
+	torque[j][0] += tj[0];
+	torque[j][1] += tj[1];
+	torque[j][2] += tj[2];
+
+	// particle/particle interaction = 1x1 particles
+
+      } else {
+	r2inv = 1.0/rsq;
+	r6inv = r2inv*r2inv*r2inv;
+	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	fpair = forcelj*r2inv;
+
+	if (EFLAG)
+	  evdwl += r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
+
+	f[i][0] += delx*fpair;
+	f[i][1] += dely*fpair;
+	f[i][2] += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+      }
+
+      if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+			       evdwl,0.0,fpair,delx,dely,delz,thr);
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairTriLJOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairTriLJ::memory_usage();
+
+  return bytes;
+}
--- a/src/USER-OMP/pair_tri_lj_omp.h
+++ b/src/USER-OMP/pair_tri_lj_omp.h
@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(tri/lj/omp,PairTriLJOMP)
+
+#else
+
+#ifndef LMP_PAIR_TRI_LJ_OMP_H
+#define LMP_PAIR_TRI_LJ_OMP_H
+
+#include "pair_tri_lj.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairTriLJOMP : public PairTriLJ, public ThrOMP {
+
+ public:
+  PairTriLJOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(int ifrom, int ito, ThrData * const thr);
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pppm_omp.cpp
+++ b/src/USER-OMP/pppm_omp.cpp
@ -0,0 +1,323 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "pppm_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+
+#include <string.h>
+
+using namespace LAMMPS_NS;
+
+#ifdef FFT_SINGLE
+#define ZEROF 0.0f
+#define ONEF  1.0f
+#else
+#define ZEROF 0.0
+#define ONEF  1.0
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+PPPMOMP::PPPMOMP(LAMMPS *lmp, int narg, char **arg) :
+  PPPM(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE)
+{
+}
+
+
+/* ----------------------------------------------------------------------
+   allocate memory that depends on # of K-vectors and order 
+------------------------------------------------------------------------- */
+
+void PPPMOMP::allocate()
+{
+  PPPM::allocate();
+
+  const int nthreads = comm->nthreads;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+#endif
+  {
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    FFT_SCALAR **rho1d_thr;
+    memory->create2d_offset(rho1d_thr,3,-order/2,order/2,"pppm:rho1d_thr");
+    ThrData *thr = fix->get_thr(tid);
+    thr->init_pppm(static_cast<void *>(rho1d_thr));
+  }
+
+  const int nzend = (nzhi_out-nzlo_out+1)*nthreads + nzlo_out -1;
+
+  // reallocate density brick, so it fits our needs
+  memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
+  memory->create3d_offset(density_brick,nzlo_out,nzend,nylo_out,nyhi_out,
+			  nxlo_out,nxhi_out,"pppm:density_brick");
+}
+
+// NOTE: special version of reduce_data for FFT_SCALAR data type.
+// reduce per thread data into the first part of the data
+// array that is used for the non-threaded parts and reset
+// the temporary storage to 0.0. this routine depends on
+// multi-dimensional arrays like force stored in this order
+// x1,y1,z1,x2,y2,z2,...
+// we need to post a barrier to wait until all threads are done
+// with writing to the array .
+static void data_reduce_fft(FFT_SCALAR *dall, int nall, int nthreads, int ndim, int tid)
+{
+#if defined(_OPENMP)
+  // NOOP in non-threaded execution.
+  if (nthreads == 1) return;
+#pragma omp barrier
+  {
+    const int nvals = ndim*nall;
+    const int idelta = nvals/nthreads + 1;
+    const int ifrom = tid*idelta;
+    const int ito   = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta);
+
+    for (int m = ifrom; m < ito; ++m) {
+      for (int n = 1; n < nthreads; ++n) {
+	dall[m] += dall[n*nvals + m];
+	dall[n*nvals + m] = 0.0;
+      }
+    }
+  }
+#else
+  // NOOP in non-threaded execution.
+  return;
+#endif
+}
+
+/* ----------------------------------------------------------------------
+   free memory that depends on # of K-vectors and order 
+------------------------------------------------------------------------- */
+
+void PPPMOMP::deallocate()
+{
+  PPPM::deallocate();
+  for (int i=0; i < comm->nthreads; ++i) {
+    ThrData * thr = fix->get_thr(i);
+    FFT_SCALAR ** rho1d_thr = static_cast<FFT_SCALAR **>(thr->get_rho1d());
+    memory->destroy2d_offset(rho1d_thr,-order/2);
+  }
+}
+
+void PPPMOMP::compute(int eflag, int vflag)
+{
+
+  PPPM::compute(eflag,vflag);
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+    ThrData *thr = fix->get_thr(tid);
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+/* ----------------------------------------------------------------------
+   create discretized "density" on section of global grid due to my particles
+   density(x,y,z) = charge "density" at grid points of my 3d brick
+   (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
+   in global grid 
+------------------------------------------------------------------------- */
+
+void PPPMOMP::make_rho()
+{
+  const double * const q = atom->q;
+  const double * const * const x = atom->x;
+  const int nthreads = comm->nthreads;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+  {  
+    // each thread works on a fixed chunk of atoms.
+    const int tid = omp_get_thread_num();
+    const int inum = atom->nlocal;
+    const int idelta = 1 + inum/nthreads;
+    const int ifrom = tid*idelta;
+    const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
+#else
+    const int tid = 0;
+    const int ifrom = 0;
+    const int ito = atom->nlocal;
+#endif
+
+    int l,m,n,nx,ny,nz,mx,my,mz;
+    FFT_SCALAR dx,dy,dz,x0,y0,z0;
+
+    // set up clear 3d density array
+    const int nzoffs = (nzhi_out-nzlo_out+1)*tid;
+    FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]);
+    memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR));
+
+    ThrData *thr = fix->get_thr(tid);
+    FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
+
+    // loop over my charges, add their contribution to nearby grid points
+    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+    // (dx,dy,dz) = distance to "lower left" grid pt
+    // (mx,my,mz) = global coords of moving stencil pt
+
+    for (int i = ifrom; i < ito; i++) {
+
+      nx = part2grid[i][0];
+      ny = part2grid[i][1];
+      nz = part2grid[i][2];
+      dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+      dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+      dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+      compute_rho1d_thr(r1d,dx,dy,dz);
+
+      z0 = delvolinv * q[i];
+      for (n = nlower; n <= nupper; n++) {
+	mz = n+nz;
+	y0 = z0*r1d[2][n];
+	for (m = nlower; m <= nupper; m++) {
+	  my = m+ny;
+	  x0 = y0*r1d[1][m];
+	  for (l = nlower; l <= nupper; l++) {
+	    mx = l+nx;
+	    db[mz][my][mx] += x0*r1d[0][l];
+	  }
+	}
+      }
+    }
+#if defined(_OPENMP)
+    // reduce 3d density array
+    if (nthreads > 1) {
+      sync_threads();
+      data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid);
+    }
+  }
+#endif
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get electric field & force on my particles 
+------------------------------------------------------------------------- */
+
+void PPPMOMP::fieldforce()
+{
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of E-field on particle
+
+  const double * const q = atom->q;
+  const double * const * const x = atom->x;
+  const int nthreads = comm->nthreads;
+  const double qqrd2e = force->qqrd2e;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(none)
+  {  
+    // each thread works on a fixed chunk of atoms.
+    const int tid = omp_get_thread_num();
+    const int inum = atom->nlocal;
+    const int idelta = 1 + inum/nthreads;
+    const int ifrom = tid*idelta;
+    const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
+#else
+    const int ifrom = 0;
+    const int ito = atom->nlocal;
+    const int tid = 0;
+#endif
+    ThrData *thr = fix->get_thr(tid);
+    double * const * const f = thr->get_f();
+    FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d());
+    
+    int l,m,n,nx,ny,nz,mx,my,mz;
+    FFT_SCALAR dx,dy,dz,x0,y0,z0;
+    FFT_SCALAR ekx,eky,ekz;
+
+    for (int i = ifrom; i < ito; i++) {
+
+      nx = part2grid[i][0];
+      ny = part2grid[i][1];
+      nz = part2grid[i][2];
+      dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+      dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+      dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+      compute_rho1d_thr(r1d,dx,dy,dz);
+
+      ekx = eky = ekz = ZEROF;
+      for (n = nlower; n <= nupper; n++) {
+	mz = n+nz;
+	z0 = r1d[2][n];
+	for (m = nlower; m <= nupper; m++) {
+	  my = m+ny;
+	  y0 = z0*r1d[1][m];
+	  for (l = nlower; l <= nupper; l++) {
+	    mx = l+nx;
+	    x0 = y0*r1d[0][l];
+	    ekx -= x0*vdx_brick[mz][my][mx];
+	    eky -= x0*vdy_brick[mz][my][mx];
+	    ekz -= x0*vdz_brick[mz][my][mx];
+	  }
+	}
+      }
+
+      // convert E-field to force
+      const double qfactor = qqrd2e*scale*q[i];
+      f[i][0] += qfactor*ekx;
+      f[i][1] += qfactor*eky;
+      f[i][2] += qfactor*ekz;
+    }
+#if defined(_OPENMP)
+  }
+#endif
+}
+
+/* ----------------------------------------------------------------------
+   charge assignment into rho1d
+   dx,dy,dz = distance of particle from "lower left" grid point 
+------------------------------------------------------------------------- */
+void PPPMOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx,
+				const FFT_SCALAR &dy, const FFT_SCALAR &dz)
+{
+  int k,l;
+  FFT_SCALAR r1,r2,r3;
+
+  for (k = (1-order)/2; k <= order/2; k++) {
+    r1 = r2 = r3 = ZEROF;
+
+    for (l = order-1; l >= 0; l--) {
+      r1 = rho_coeff[l][k] + r1*dx;
+      r2 = rho_coeff[l][k] + r2*dy;
+      r3 = rho_coeff[l][k] + r3*dz;
+    }
+    r1d[0][k] = r1;
+    r1d[1][k] = r2;
+    r1d[2][k] = r3;
+  }
+}
--- a/src/USER-OMP/pppm_omp.h
+++ b/src/USER-OMP/pppm_omp.h
@ -0,0 +1,49 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef KSPACE_CLASS
+
+KSpaceStyle(pppm/omp,PPPMOMP)
+
+#else
+
+#ifndef LMP_PPPM_OMP_H
+#define LMP_PPPM_OMP_H
+
+#include "pppm.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+  class PPPMOMP : public PPPM, public ThrOMP {
+ public:
+  PPPMOMP(class LAMMPS *, int, char **);
+  virtual ~PPPMOMP () {};
+
+ protected:
+  virtual void allocate();
+  virtual void deallocate();
+  virtual void compute(int, int);
+  virtual void fieldforce();
+  virtual void make_rho();
+  void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &,
+			 const FFT_SCALAR &, const FFT_SCALAR &);
+//  void compute_rho_coeff();
+//  void slabcorr(int);
+
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pppm_proxy.cpp
+++ b/src/USER-OMP/pppm_proxy.cpp
@ -0,0 +1,61 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "pppm_proxy.h"
+
+#include <stdio.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PPPMProxy::PPPMProxy(LAMMPS *lmp, int narg, char **arg) :
+  PPPM(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE|THR_PROXY) {}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMProxy::setup_proxy()
+{
+  PPPM::setup();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMProxy::compute(int eflag, int vflag)
+{
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    ThrData *thr = fix->get_thr(tid);
+    reduce_thr(this, eflag,vflag,thr);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMProxy::compute_proxy(int eflag, int vflag)
+{
+  PPPM::compute(eflag,vflag);
+}
+
--- a/src/USER-OMP/pppm_proxy.h
+++ b/src/USER-OMP/pppm_proxy.h
@ -0,0 +1,44 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef KSPACE_CLASS
+
+KSpaceStyle(pppm/proxy,PPPMProxy)
+
+#else
+
+#ifndef LMP_PPPM_PROXY_H
+#define LMP_PPPM_PROXY_H
+
+#include "pppm.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+  class PPPMProxy : public PPPM, public ThrOMP {
+ public:
+  PPPMProxy(class LAMMPS *, int, char **);
+  virtual ~PPPMProxy () {};
+
+  virtual void compute(int, int);
+  virtual void compute_proxy(int eflag, int vflag);
+
+  // nothing to do in setup
+  virtual void setup() {};
+  virtual void setup_proxy();
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/pppm_tip4p_proxy.cpp
+++ b/src/USER-OMP/pppm_tip4p_proxy.cpp
@ -0,0 +1,61 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "pppm_tip4p_proxy.h"
+
+#include <stdio.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PPPMTIP4PProxy::PPPMTIP4PProxy(LAMMPS *lmp, int narg, char **arg) :
+  PPPMTIP4P(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE|THR_PROXY) {}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMTIP4PProxy::setup_proxy()
+{
+  PPPMTIP4P::setup();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMTIP4PProxy::compute(int eflag, int vflag)
+{
+#if defined(_OPENMP)
+#pragma omp parallel default(none) shared(eflag,vflag)
+#endif
+  {
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    ThrData *thr = fix->get_thr(tid);
+    reduce_thr(this, eflag,vflag,thr);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMTIP4PProxy::compute_proxy(int eflag, int vflag)
+{
+  PPPMTIP4P::compute(eflag,vflag);
+}
+
--- a/src/USER-OMP/pppm_tip4p_proxy.h
+++ b/src/USER-OMP/pppm_tip4p_proxy.h
@ -0,0 +1,44 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef KSPACE_CLASS
+
+KSpaceStyle(pppm/tip4p/proxy,PPPMTIP4PProxy)
+
+#else
+
+#ifndef LMP_PPPM_TIP4P_PROXY_H
+#define LMP_PPPM_TIP4P_PROXY_H
+
+#include "pppm_tip4p.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+  class PPPMTIP4PProxy : public PPPMTIP4P, public ThrOMP {
+ public:
+  PPPMTIP4PProxy(class LAMMPS *, int, char **);
+  virtual ~PPPMTIP4PProxy () {};
+
+  virtual void compute(int, int);
+  virtual void compute_proxy(int eflag, int vflag);
+
+  // nothing to do in setup
+  virtual void setup() {};
+  virtual void setup_proxy();
+};
+
+}
+
+#endif
+#endif
--- a/src/USER-OMP/thr_data.cpp
+++ b/src/USER-OMP/thr_data.cpp
@ -0,0 +1,219 @@
+/* -------------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   per-thread data management for LAMMPS
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "thr_data.h"
+
+#include <string.h>
+#include <stdio.h>
+
+using namespace LAMMPS_NS;
+
+
+ThrData::ThrData(int tid) 
+  : _f(NULL), _torque(NULL), _erforce(NULL), _de(NULL), _drho(NULL), _mu(NULL),
+    _lambda(NULL), _rhoB(NULL), _D_values(NULL), _rho(NULL), _fp(NULL),
+    _rho1d(NULL), _tid(tid) 
+{
+  // nothing else to do here.
+}
+
+
+/* ---------------------------------------------------------------------- */
+
+void ThrData::check_tid(int tid)
+{
+  if (tid != _tid)
+    fprintf(stderr,"WARNING: external and internal tid mismatch %d != %d\n",tid,_tid);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ThrData::init_force(int nall, double **f, double **torque,
+			 double *erforce, double *de, double *drho)
+{
+  eng_vdwl=eng_coul=eng_bond=eng_angle=eng_dihed=eng_imprp=eng_kspce=0.0;
+  memset(virial_pair,0,6*sizeof(double));
+  memset(virial_bond,0,6*sizeof(double));
+  memset(virial_angle,0,6*sizeof(double));
+  memset(virial_dihed,0,6*sizeof(double));
+  memset(virial_imprp,0,6*sizeof(double));
+  memset(virial_kspce,0,6*sizeof(double));
+
+  eatom_pair=eatom_bond=eatom_angle=eatom_dihed=eatom_imprp=eatom_kspce=NULL;
+  vatom_pair=vatom_bond=vatom_angle=vatom_dihed=vatom_imprp=vatom_kspce=NULL;
+
+  _f = f + _tid*nall;
+  memset(&(_f[0][0]),0,nall*3*sizeof(double));
+
+  if (torque) {
+    _torque = torque + _tid*nall;
+    memset(&(_torque[0][0]),0,nall*3*sizeof(double));
+  } else _torque = NULL;
+
+  if (erforce) {
+    _erforce = erforce + _tid*nall;
+    memset(&(_erforce[0]),0,nall*sizeof(double));
+  } else _erforce = NULL;
+
+  if (de) {
+    _de = de + _tid*nall;
+    memset(&(_de[0]),0,nall*sizeof(double));
+  } else _de = NULL;
+
+  if (drho) {
+    _drho = drho + _tid*nall;
+    memset(&(_drho[0]),0,nall*sizeof(double));
+  } else _drho = NULL;
+}
+
+/* ----------------------------------------------------------------------
+   set up and clear out locally managed per atom arrays
+------------------------------------------------------------------------- */
+
+void ThrData::init_eam(int nall, double *rho)
+{
+  _rho = rho + _tid*nall;
+  memset(_rho, 0, nall*sizeof(double));
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ThrData::init_adp(int nall, double *rho, double **mu, double **lambda)
+{
+  init_eam(nall, rho);
+
+  _mu = mu + _tid*nall;
+  _lambda = lambda + _tid*nall;
+  memset(&(_mu[0][0]), 0, nall*3*sizeof(double));
+  memset(&(_lambda[0][0]), 0, nall*6*sizeof(double));
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ThrData::init_cdeam(int nall, double *rho, double *rhoB, double *D_values)
+{
+  init_eam(nall, rho);
+
+  _rhoB = rhoB + _tid*nall;
+  _D_values = D_values + _tid*nall;
+  memset(_rhoB, 0, nall*sizeof(double));
+  memset(_D_values, 0, nall*sizeof(double));
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ThrData::init_eim(int nall, double *rho, double *fp)
+{
+  init_eam(nall, rho);
+
+  _fp = fp + _tid*nall;
+  memset(_fp,0,nall*sizeof(double));
+}
+
+/* ----------------------------------------------------------------------
+   compute global pair virial via summing F dot r over own & ghost atoms
+   at this point, only pairwise forces have been accumulated in atom->f
+------------------------------------------------------------------------- */
+
+void ThrData::virial_fdotr_compute(double **x, int nlocal, int nghost, int nfirst)
+{
+
+  // sum over force on all particles including ghosts
+
+  if (nfirst < 0) {
+    int nall = nlocal + nghost;
+    for (int i = 0; i < nall; i++) {
+      virial_pair[0] += _f[i][0]*x[i][0];
+      virial_pair[1] += _f[i][1]*x[i][1];
+      virial_pair[2] += _f[i][2]*x[i][2];
+      virial_pair[3] += _f[i][1]*x[i][0];
+      virial_pair[4] += _f[i][2]*x[i][0];
+      virial_pair[5] += _f[i][2]*x[i][1];
+    }
+
+  // neighbor includegroup flag is set
+  // sum over force on initial nfirst particles and ghosts
+
+  } else {
+    int nall = nfirst;
+    for (int i = 0; i < nall; i++) {
+      virial_pair[0] += _f[i][0]*x[i][0];
+      virial_pair[1] += _f[i][1]*x[i][1];
+      virial_pair[2] += _f[i][2]*x[i][2];
+      virial_pair[3] += _f[i][1]*x[i][0];
+      virial_pair[4] += _f[i][2]*x[i][0];
+      virial_pair[5] += _f[i][2]*x[i][1];
+    }
+
+    nall = nlocal + nghost;
+    for (int i = nlocal; i < nall; i++) {
+      virial_pair[0] += _f[i][0]*x[i][0];
+      virial_pair[1] += _f[i][1]*x[i][1];
+      virial_pair[2] += _f[i][2]*x[i][2];
+      virial_pair[3] += _f[i][1]*x[i][0];
+      virial_pair[4] += _f[i][2]*x[i][0];
+      virial_pair[5] += _f[i][2]*x[i][1];
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double ThrData::memory_usage() 
+{
+  double bytes = (7 + 6*6) * sizeof(double);
+  bytes += 2 * sizeof(double*);
+  bytes += 4 * sizeof(int);
+
+  return bytes;
+}
+
+/* additional helper functions */
+
+// reduce per thread data into the first part of the data
+// array that is used for the non-threaded parts and reset
+// the temporary storage to 0.0. this routine depends on
+// multi-dimensional arrays like force stored in this order
+// x1,y1,z1,x2,y2,z2,...
+// we need to post a barrier to wait until all threads are done
+// with writing to the array .
+void LAMMPS_NS::data_reduce_thr(double *dall, int nall, int nthreads, int ndim, int tid)
+{
+#if defined(_OPENMP)
+  // NOOP in non-threaded execution.
+  if (nthreads == 1) return;
+#pragma omp barrier
+  {
+    const int nvals = ndim*nall;
+    const int idelta = nvals/nthreads + 1;
+    const int ifrom = tid*idelta;
+    const int ito   = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta);
+
+    for (int m = ifrom; m < ito; ++m) {
+      for (int n = 1; n < nthreads; ++n) {
+	dall[m] += dall[n*nvals + m];
+	dall[n*nvals + m] = 0.0;
+      }
+    }
+  }
+#else
+  // NOOP in non-threaded execution.
+  return;
+#endif
+}
+
--- a/src/USER-OMP/thr_data.h
+++ b/src/USER-OMP/thr_data.h
@ -0,0 +1,132 @@
+/* -*- c++ -*- -------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifndef LMP_THR_DATA_H
+#define LMP_THR_DATA_H
+
+#if defined(_OPENMP)
+#include <omp.h>
+#endif
+
+namespace LAMMPS_NS {
+
+// per thread data accumulators
+// there should be one instance
+// of this class for each thread.
+class ThrData {
+  friend class FixOMP;
+  friend class ThrOMP;
+
+ public:
+  ThrData(int tid);
+  ~ThrData() {};
+
+  void check_tid(int);    // thread id consistency check
+  int get_tid() const { return _tid; }; // our thread id.
+
+  // erase accumulator contents and hook up force arrays
+  void init_force(int, double **, double **, double *, double *, double *);
+
+  // give access to per-thread offset arrays
+  double **get_f() const { return _f; };
+  double **get_torque() const { return _torque; };
+  double *get_de() const { return _de; };
+  double *get_drho() const { return _drho; };
+
+  // setup and erase per atom arrays
+  void init_adp(int, double *, double **, double **); // ADP (+ EAM)
+  void init_cdeam(int, double *, double *, double *); // CDEAM (+ EAM)
+  void init_eam(int, double *);                       // EAM 
+  void init_eim(int, double *, double *);             // EIM (+ EAM)
+
+  void init_pppm(void *r1d) { _rho1d = r1d; };
+
+  // access methods for arrays that we handle in this class
+  double **get_lambda() const { return _lambda; };
+  double **get_mu() const { return _mu; };
+  double *get_D_values() const { return _D_values; };
+  double *get_fp() const { return _fp; };
+  double *get_rho() const { return _rho; };
+  double *get_rhoB() const { return _rhoB; };
+  void *get_rho1d() const { return _rho1d; };
+
+ private:
+  double eng_vdwl;        // non-bonded non-coulomb energy
+  double eng_coul;        // non-bonded coulomb energy
+  double virial_pair[6];  // virial contribution from non-bonded
+  double *eatom_pair;
+  double **vatom_pair;
+  double eng_bond;        // bond energy
+  double virial_bond[6];  // virial contribution from bonds
+  double *eatom_bond;
+  double **vatom_bond;
+  double eng_angle;       // angle energy
+  double virial_angle[6]; // virial contribution from angles
+  double *eatom_angle;
+  double **vatom_angle;
+  double eng_dihed;       // dihedral energy
+  double virial_dihed[6]; // virial contribution from dihedrals
+  double *eatom_dihed;
+  double **vatom_dihed;
+  double eng_imprp;       // improper energy
+  double virial_imprp[6]; // virial contribution from impropers 
+  double *eatom_imprp;
+  double **vatom_imprp;
+  double eng_kspce;       // kspace energy
+  double virial_kspce[6]; // virial contribution from kspace
+  double *eatom_kspce;
+  double **vatom_kspce;
+
+  // per thread segments of various force or similar arrays
+
+  // these are maintained by atom styles
+  double **_f;
+  double **_torque;
+  double *_erforce;
+  double *_de;
+  double *_drho;
+
+  // these are maintained by individual pair styles
+  double **_mu, **_lambda;   // ADP (+ EAM)
+  double *_rhoB, *_D_values; // CDEAM (+ EAM)
+  double *_rho;              // EAM
+  double *_fp;               // EIM (+ EAM)
+
+  // this is for pppm/omp
+  void *_rho1d;
+
+  // my thread id
+  const int _tid;
+
+ public:
+  // compute global per thread virial contribution from per-thread force
+  void virial_fdotr_compute(double **, int, int, int);
+
+  double memory_usage();
+
+ // disabled default methods
+ private:
+  ThrData() : _tid(-1) {};
+};
+
+////////////////////////////////////////////////////////////////////////
+//  helper functions operating on data replicated for thread support  //
+////////////////////////////////////////////////////////////////////////
+// generic per thread data reduction for continous arrays of nthreads*nmax size
+void data_reduce_thr(double *, int, int, int, int);
+}
+#endif