diff --git a/src/USER-CUDA/cuda.cpp b/src/USER-CUDA/cuda.cpp
index e3f4d47d81..b86908323d 100644
--- a/src/USER-CUDA/cuda.cpp
+++ b/src/USER-CUDA/cuda.cpp
@@ -717,6 +717,113 @@ void Cuda::downloadAll()
   MYDBG(printf("# CUDA: Cuda::downloadAll() ... end\n");)
 }
 
+void Cuda::upload(int datamask)
+{
+  MYDBG(printf("# CUDA: Cuda::upload() ... start\n");)
+  timespec starttime;
+  timespec endtime;
+
+  if(atom->nmax != shared_data.atom.nmax) checkResize();
+
+  clock_gettime(CLOCK_REALTIME, &starttime);
+  if(X_MASK & datamask) cu_x   ->upload();
+  if(V_MASK & datamask) cu_v   ->upload();
+  if(F_MASK & datamask) cu_f   ->upload();
+  if(TYPE_MASK & datamask) cu_type->upload();
+  if(TAG_MASK & datamask) cu_tag ->upload();
+  if(MASK_MASK & datamask) cu_mask->upload();
+  if(IMAGE_MASK & datamask) cu_image->upload();
+
+  //if(shared_data.atom.need_eatom) cu_eatom->upload();
+  //if(shared_data.atom.need_vatom) cu_vatom->upload();
+
+  if(shared_data.atom.q_flag)
+	  if(Q_MASK & datamask) cu_q    ->upload();
+
+  if(atom->rmass)
+	  if(RMASS_MASK & datamask) cu_rmass->upload();
+
+  if(atom->radius)
+	  if(RADIUS_MASK & datamask) cu_radius->upload();
+
+  if(atom->omega)
+	  if(OMEGA_MASK & datamask) cu_omega->upload();
+
+  if(atom->torque)
+	  if(TORQUE_MASK & datamask) cu_torque->upload();
+
+  if(atom->special)
+	  if(SPECIAL_MASK & datamask) cu_special->upload();
+
+  if(atom->nspecial)
+	  if(SPECIAL_MASK & datamask) cu_nspecial->upload();
+
+  if(atom->molecule)
+	  if(MOLECULE_MASK & datamask) cu_molecule->upload();
+
+  if(cu_eatom) cu_eatom->upload();
+
+  if(cu_vatom) cu_vatom->upload();
+
+  clock_gettime(CLOCK_REALTIME, &endtime);
+  uploadtime += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
+  MYDBG(printf("# CUDA: Cuda::upload() ... end\n");)
+}
+
+void Cuda::download(int datamask)
+{
+  MYDBG(printf("# CUDA: Cuda::download() ... start\n");)
+  timespec starttime;
+  timespec endtime;
+
+  if(atom->nmax != shared_data.atom.nmax) checkResize();
+
+  CUDA_IF_BINNING(Cuda_ReverseBinning(& shared_data);)
+  clock_gettime(CLOCK_REALTIME, &starttime);
+  if(X_MASK & datamask) cu_x   ->download();
+  if(V_MASK & datamask) cu_v   ->download();
+  if(F_MASK & datamask) cu_f   ->download();
+  if(TYPE_MASK & datamask) cu_type->download();
+  if(TAG_MASK & datamask) cu_tag ->download();
+  if(MASK_MASK & datamask) cu_mask->download();
+  if(IMAGE_MASK & datamask) cu_image->download();
+
+  //if(shared_data.atom.need_eatom) cu_eatom->download();
+  //if(shared_data.atom.need_vatom) cu_vatom->download();
+
+  if(shared_data.atom.q_flag)
+	  if(Q_MASK & datamask) cu_q    ->download();
+
+  if(atom->rmass)
+	  if(RMASS_MASK & datamask) cu_rmass->download();
+
+  if(atom->radius)
+	  if(RADIUS_MASK & datamask) cu_radius->download();
+
+  if(atom->omega)
+	  if(OMEGA_MASK & datamask) cu_omega->download();
+
+  if(atom->torque)
+	  if(TORQUE_MASK & datamask) cu_torque->download();
+
+  if(atom->special)
+	  if(SPECIAL_MASK & datamask) cu_special->download();
+
+  if(atom->nspecial)
+	  if(SPECIAL_MASK & datamask) cu_nspecial->download();
+
+  if(atom->molecule)
+	  if(MOLECULE_MASK & datamask) cu_molecule->download();
+
+  if(cu_eatom) cu_eatom->download();
+
+  if(cu_vatom) cu_vatom->download();
+
+  clock_gettime(CLOCK_REALTIME, &endtime);
+  downloadtime += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
+  MYDBG(printf("# CUDA: Cuda::download() ... end\n");)
+}
+
 void Cuda::downloadX()
 {
   Cuda_Pair_RevertXType(& this->shared_data);
diff --git a/src/USER-CUDA/domain_cuda.cpp b/src/USER-CUDA/domain_cuda.cpp
index eb5a242457..11dc7f0878 100644
--- a/src/USER-CUDA/domain_cuda.cpp
+++ b/src/USER-CUDA/domain_cuda.cpp
@@ -43,28 +43,28 @@ using namespace LAMMPS_NS;
 #define SMALL 1.0e-4
 #define DELTA 1
 
-enum{NO_REMAP,X_REMAP,V_REMAP};                   // same as fix_deform.cpp
+enum {NO_REMAP, X_REMAP, V_REMAP};                // same as fix_deform.cpp
 
 /* ----------------------------------------------------------------------
    default is periodic
 ------------------------------------------------------------------------- */
 
-DomainCuda::DomainCuda(LAMMPS *lmp) : Domain(lmp)
+DomainCuda::DomainCuda(LAMMPS* lmp) : Domain(lmp)
 {
   cuda = lmp->cuda;
-   if(cuda == NULL)
-        error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
+
+  if(cuda == NULL)
+    error->all(FLERR, "You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 }
 
 /* ---------------------------------------------------------------------- */
 
 void DomainCuda::init()
 {
-  cuda->accelerator(0,NULL);
+  cuda->accelerator(0, NULL);
   Domain::init();
 
-  if(not cuda->finished_run)
-  {
+  if(not cuda->finished_run) {
     cuda->setDomainParams();
     Cuda_Domain_Init(&cuda->shared_data);
   }
@@ -79,8 +79,7 @@ void DomainCuda::set_global_box()
 {
   Domain::set_global_box();
 
-  if(not cuda->finished_run)
-  {
+  if(not cuda->finished_run) {
     cuda->setDomainParams();
   }
 }
@@ -95,8 +94,7 @@ void DomainCuda::set_lamda_box()
 {
   Domain::set_lamda_box();
 
-  if(not cuda->finished_run)
-  {
+  if(not cuda->finished_run) {
     cuda->setDomainParams();
   }
 }
@@ -111,9 +109,8 @@ void DomainCuda::set_local_box()
 {
   Domain::set_local_box();
 
-  if(not cuda->finished_run)
-  {
-   // cuda->setDomainParams();
+  if(not cuda->finished_run) {
+    // cuda->setDomainParams();
     //Cuda_Domain_Init(&cuda->shared_data);
   }
 }
@@ -126,39 +123,37 @@ void DomainCuda::set_local_box()
 
 void DomainCuda::reset_box()
 {
-  if (nonperiodic == 2) {
+  if(nonperiodic == 2) {
 
     // convert back to box coords for shrink-wrap operation
 
-    if (triclinic) lamda2x(atom->nlocal);
+    if(triclinic) lamda2x(atom->nlocal);
 
     // compute extent of atoms on this proc
 
-    double extent[3][2],all[3][2];
+    double extent[3][2], all[3][2];
 
     extent[2][0] = extent[1][0] = extent[0][0] = BIG;
     extent[2][1] = extent[1][1] = extent[0][1] = -BIG;
 
-    double **x = atom->x;
+    double** x = atom->x;
     int nlocal = atom->nlocal;
 
-    if (cuda->finished_setup&&(!cuda->oncpu))
-      {
-        extent[0][0]=cuda->extent[0];
-        extent[0][1]=cuda->extent[1];
-        extent[1][0]=cuda->extent[2];
-        extent[1][1]=cuda->extent[3];
-        extent[2][0]=cuda->extent[4];
-        extent[2][1]=cuda->extent[5];
-      }
-    else
-      for (int i = 0; i < nlocal; i++) {
-        extent[0][0] = MIN(extent[0][0],x[i][0]);
-        extent[0][1] = MAX(extent[0][1],x[i][0]);
-        extent[1][0] = MIN(extent[1][0],x[i][1]);
-        extent[1][1] = MAX(extent[1][1],x[i][1]);
-        extent[2][0] = MIN(extent[2][0],x[i][2]);
-        extent[2][1] = MAX(extent[2][1],x[i][2]);
+    if(cuda->finished_setup && (!cuda->oncpu)) {
+      extent[0][0] = cuda->extent[0];
+      extent[0][1] = cuda->extent[1];
+      extent[1][0] = cuda->extent[2];
+      extent[1][1] = cuda->extent[3];
+      extent[2][0] = cuda->extent[4];
+      extent[2][1] = cuda->extent[5];
+    } else
+      for(int i = 0; i < nlocal; i++) {
+        extent[0][0] = MIN(extent[0][0], x[i][0]);
+        extent[0][1] = MAX(extent[0][1], x[i][0]);
+        extent[1][0] = MIN(extent[1][0], x[i][1]);
+        extent[1][1] = MAX(extent[1][1], x[i][1]);
+        extent[2][0] = MIN(extent[2][0], x[i][2]);
+        extent[2][1] = MAX(extent[2][1], x[i][2]);
       }
 
     // compute extent across all procs
@@ -168,39 +163,116 @@ void DomainCuda::reset_box()
     extent[1][0] = -extent[1][0];
     extent[2][0] = -extent[2][0];
 
-    MPI_Allreduce(extent,all,6,MPI_DOUBLE,MPI_MAX,world);
+    MPI_Allreduce(extent, all, 6, MPI_DOUBLE, MPI_MAX, world);
 
     // in shrink-wrapped dims, set box by atom extent
     // if minimum set, enforce min box size settings
 
-    if (xperiodic == 0) {
-      if (boundary[0][0] == 2) boxlo[0] = -all[0][0] - SMALL;
-      else if (boundary[0][0] == 3) boxlo[0] = MIN(-all[0][0]-SMALL,minxlo);
-      if (boundary[0][1] == 2) boxhi[0] = all[0][1] + SMALL;
-      else if (boundary[0][1] == 3) boxhi[0] = MAX(all[0][1]+SMALL,minxhi);
-      if (boxlo[0] > boxhi[0]) error->all(FLERR,"Illegal simulation box");
-    }
-    if (yperiodic == 0) {
-      if (boundary[1][0] == 2) boxlo[1] = -all[1][0] - SMALL;
-      else if (boundary[1][0] == 3) boxlo[1] = MIN(-all[1][0]-SMALL,minylo);
-      if (boundary[1][1] == 2) boxhi[1] = all[1][1] + SMALL;
-      else if (boundary[1][1] == 3) boxhi[1] = MAX(all[1][1]+SMALL,minyhi);
-      if (boxlo[1] > boxhi[1]) error->all(FLERR,"Illegal simulation box");
-    }
-    if (zperiodic == 0) {
-      if (boundary[2][0] == 2) boxlo[2] = -all[2][0] - SMALL;
-      else if (boundary[2][0] == 3) boxlo[2] = MIN(-all[2][0]-SMALL,minzlo);
-      if (boundary[2][1] == 2) boxhi[2] = all[2][1] + SMALL;
-      else if (boundary[2][1] == 3) boxhi[2] = MAX(all[2][1]+SMALL,minzhi);
-      if (boxlo[2] > boxhi[2]) error->all(FLERR,"Illegal simulation box");
+    if(triclinic == 0) {
+      if(xperiodic == 0) {
+        if(boundary[0][0] == 2) boxlo[0] = -all[0][0] - small[0];
+        else if(boundary[0][0] == 3)
+          boxlo[0] = MIN(-all[0][0] - small[0], minxlo);
+
+        if(boundary[0][1] == 2) boxhi[0] = all[0][1] + small[0];
+        else if(boundary[0][1] == 3) boxhi[0] = MAX(all[0][1] + small[0], minxhi);
+
+        if(boxlo[0] > boxhi[0]) error->all(FLERR, "Illegal simulation box");
+      }
+
+      if(yperiodic == 0) {
+        if(boundary[1][0] == 2) boxlo[1] = -all[1][0] - small[1];
+        else if(boundary[1][0] == 3)
+          boxlo[1] = MIN(-all[1][0] - small[1], minylo);
+
+        if(boundary[1][1] == 2) boxhi[1] = all[1][1] + small[1];
+        else if(boundary[1][1] == 3) boxhi[1] = MAX(all[1][1] + small[1], minyhi);
+
+        if(boxlo[1] > boxhi[1]) error->all(FLERR, "Illegal simulation box");
+      }
+
+      if(zperiodic == 0) {
+        if(boundary[2][0] == 2) boxlo[2] = -all[2][0] - small[2];
+        else if(boundary[2][0] == 3)
+          boxlo[2] = MIN(-all[2][0] - small[2], minzlo);
+
+        if(boundary[2][1] == 2) boxhi[2] = all[2][1] + small[2];
+        else if(boundary[2][1] == 3) boxhi[2] = MAX(all[2][1] + small[2], minzhi);
+
+        if(boxlo[2] > boxhi[2]) error->all(FLERR, "Illegal simulation box");
+      }
+
+    } else {
+      double lo[3], hi[3];
+
+      if(xperiodic == 0) {
+        lo[0] = -all[0][0];
+        lo[1] = 0.0;
+        lo[2] = 0.0;
+        Domain::lamda2x(lo, lo);
+        hi[0] = all[0][1];
+        hi[1] = 0.0;
+        hi[2] = 0.0;
+        Domain::lamda2x(hi, hi);
+
+        if(boundary[0][0] == 2) boxlo[0] = lo[0] - small[0];
+        else if(boundary[0][0] == 3) boxlo[0] = MIN(lo[0] - small[0], minxlo);
+
+        if(boundary[0][1] == 2) boxhi[0] = hi[0] + small[0];
+        else if(boundary[0][1] == 3) boxhi[0] = MAX(hi[0] + small[0], minxhi);
+
+        if(boxlo[0] > boxhi[0]) error->all(FLERR, "Illegal simulation box");
+      }
+
+      if(yperiodic == 0) {
+        lo[0] = 0.0;
+        lo[1] = -all[1][0];
+        lo[2] = 0.0;
+        Domain::lamda2x(lo, lo);
+        hi[0] = 0.0;
+        hi[1] = all[1][1];
+        hi[2] = 0.0;
+        Domain::lamda2x(hi, hi);
+
+        if(boundary[1][0] == 2) boxlo[1] = lo[1] - small[1];
+        else if(boundary[1][0] == 3) boxlo[1] = MIN(lo[1] - small[1], minylo);
+
+        if(boundary[1][1] == 2) boxhi[1] = hi[1] + small[1];
+        else if(boundary[1][1] == 3) boxhi[1] = MAX(hi[1] + small[1], minyhi);
+
+        if(boxlo[1] > boxhi[1]) error->all(FLERR, "Illegal simulation box");
+
+        //xy *= (boxhi[1]-boxlo[1]) / yprd;
+      }
+
+      if(zperiodic == 0) {
+        lo[0] = 0.0;
+        lo[1] = 0.0;
+        lo[2] = -all[2][0];
+        Domain::lamda2x(lo, lo);
+        hi[0] = 0.0;
+        hi[1] = 0.0;
+        hi[2] = all[2][1];
+        Domain::lamda2x(hi, hi);
+
+        if(boundary[2][0] == 2) boxlo[2] = lo[2] - small[2];
+        else if(boundary[2][0] == 3) boxlo[2] = MIN(lo[2] - small[2], minzlo);
+
+        if(boundary[2][1] == 2) boxhi[2] = hi[2] + small[2];
+        else if(boundary[2][1] == 3) boxhi[2] = MAX(hi[2] + small[2], minzhi);
+
+        if(boxlo[2] > boxhi[2]) error->all(FLERR, "Illegal simulation box");
+
+        //xz *= (boxhi[2]-boxlo[2]) / xprd;
+        //yz *= (boxhi[2]-boxlo[2]) / yprd;
+      }
     }
   }
 
   set_global_box();
   set_local_box();
 
-  if(not cuda->finished_run)
-  {
+  if(not cuda->finished_run) {
     cuda->setDomainParams();
     Cuda_Domain_Init(&cuda->shared_data);
   }
@@ -208,7 +280,7 @@ void DomainCuda::reset_box()
   // if shrink-wrapped, convert to lamda coords for new box
   // must re-invoke pbc() b/c x2lamda result can be outside 0,1 due to roundoff
 
-  if (nonperiodic == 2 && triclinic) {
+  if(nonperiodic == 2 && triclinic) {
     x2lamda(atom->nlocal);
     pbc();
   }
@@ -227,10 +299,9 @@ void DomainCuda::reset_box()
 
 void DomainCuda::pbc()
 {
-  if(cuda->finished_setup&&(!cuda->oncpu))
-  {
-          cuda->setDomainParams();
-    Cuda_Domain_PBC(&cuda->shared_data, deform_vremap, deform_groupbit,cuda->extent);
+  if(cuda->finished_setup && (!cuda->oncpu)) {
+    cuda->setDomainParams();
+    Cuda_Domain_PBC(&cuda->shared_data, deform_vremap, deform_groupbit, cuda->extent);
     return;
   }
 
@@ -245,9 +316,8 @@ void DomainCuda::pbc()
 
 void DomainCuda::lamda2x(int n)
 {
-  if(cuda->finished_setup&&(!cuda->oncpu))
-  {
-    Cuda_Domain_lamda2x(&cuda->shared_data,n);
+  if(cuda->finished_setup && (!cuda->oncpu)) {
+    Cuda_Domain_lamda2x(&cuda->shared_data, n);
     return;
   }
 
@@ -261,9 +331,8 @@ void DomainCuda::lamda2x(int n)
 
 void DomainCuda::x2lamda(int n)
 {
-  if(cuda->finished_setup&&(!cuda->oncpu))
-  {
-    Cuda_Domain_x2lamda(&cuda->shared_data,n);
+  if(cuda->finished_setup && (!cuda->oncpu)) {
+    Cuda_Domain_x2lamda(&cuda->shared_data, n);
     return;
   }
 
diff --git a/src/USER-CUDA/fix_aveforce_cuda.cpp b/src/USER-CUDA/fix_aveforce_cuda.cpp
index ed2ca925a6..f000996669 100644
--- a/src/USER-CUDA/fix_aveforce_cuda.cpp
+++ b/src/USER-CUDA/fix_aveforce_cuda.cpp
@@ -34,11 +34,17 @@
 #include "domain.h"
 #include "cuda.h"
 #include "cuda_modify_flags.h"
+#include "variable.h"
+#include "input.h"
+#include "modify.h"
+
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace FixConstCuda;
 
+enum{NONE,CONSTANT,EQUAL};
+
 /* ---------------------------------------------------------------------- */
 
 FixAveForceCuda::FixAveForceCuda(LAMMPS *lmp, int narg, char **arg) :
@@ -55,13 +61,39 @@ FixAveForceCuda::FixAveForceCuda(LAMMPS *lmp, int narg, char **arg) :
   global_freq = 1;
   extvector = 1;
 
-  xflag = yflag = zflag = 1;
-  if (strcmp(arg[3],"NULL") == 0) xflag = 0;
-  else xvalue = atof(arg[3]);
-  if (strcmp(arg[4],"NULL") == 0) yflag = 0;
-  else yvalue = atof(arg[4]);
-  if (strcmp(arg[5],"NULL") == 0) zflag = 0;
-  else zvalue = atof(arg[5]);
+  xstr = ystr = zstr = NULL;
+  xvalue = yvalue = zvalue = 0;
+
+  if (strstr(arg[3],"v_") == arg[3]) {
+    int n = strlen(&arg[3][2]) + 1;
+    xstr = new char[n];
+    strcpy(xstr,&arg[3][2]);
+  } else if (strcmp(arg[3],"NULL") == 0) {
+    xstyle = NONE;
+  } else {
+    xvalue = atof(arg[3]);
+    xstyle = CONSTANT;
+  }
+  if (strstr(arg[4],"v_") == arg[4]) {
+    int n = strlen(&arg[4][2]) + 1;
+    ystr = new char[n];
+    strcpy(ystr,&arg[4][2]);
+  } else if (strcmp(arg[4],"NULL") == 0) {
+    ystyle = NONE;
+  } else {
+    yvalue = atof(arg[4]);
+    ystyle = CONSTANT;
+  }
+  if (strstr(arg[5],"v_") == arg[5]) {
+    int n = strlen(&arg[5][2]) + 1;
+    zstr = new char[n];
+    strcpy(zstr,&arg[5][2]);
+  } else if (strcmp(arg[5],"NULL") == 0) {
+    zstyle = NONE;
+  } else {
+    zvalue = atof(arg[5]);
+    zstyle = CONSTANT;
+  }
 
   // optional args
 
@@ -86,6 +118,13 @@ FixAveForceCuda::FixAveForceCuda(LAMMPS *lmp, int narg, char **arg) :
 
 }
 
+FixAveForceCuda::~FixAveForceCuda()
+{
+  delete [] xstr;
+  delete [] ystr;
+  delete [] zstr;
+}
+
 /* ---------------------------------------------------------------------- */
 
 int FixAveForceCuda::setmask()
@@ -103,8 +142,31 @@ void FixAveForceCuda::init()
 {
   if(not cu_foriginal)
   cu_foriginal = new cCudaData<double, F_FLOAT, x> (foriginal,4);
-  if (strstr(update->integrate_style,"respa"))
-    nlevels_respa = ((Respa *) update->integrate)->nlevels;
+
+  if (xstr) {
+    xvar = input->variable->find(xstr);
+    if (xvar < 0)
+      error->all(FLERR,"Variable name for fix aveforce does not exist");
+    if (input->variable->equalstyle(xvar)) xstyle = EQUAL;
+    else error->all(FLERR,"Variable for fix aveforce is invalid style");
+  }
+  if (ystr) {
+    yvar = input->variable->find(ystr);
+    if (yvar < 0)
+      error->all(FLERR,"Variable name for fix aveforce does not exist");
+    if (input->variable->equalstyle(yvar)) ystyle = EQUAL;
+    else error->all(FLERR,"Variable for fix aveforce is invalid style");
+  }
+  if (zstr) {
+    zvar = input->variable->find(zstr);
+    if (zvar < 0)
+      error->all(FLERR,"Variable name for fix aveforce does not exist");
+    if (input->variable->equalstyle(zvar)) zstyle = EQUAL;
+    else error->all(FLERR,"Variable for fix aveforce is invalid style");
+  }
+
+  if (xstyle == EQUAL || ystyle == EQUAL || zstyle == EQUAL) varflag = EQUAL;
+  else varflag = CONSTANT;
 
   // ncount = total # of atoms in group
 
@@ -126,13 +188,6 @@ void FixAveForceCuda::setup(int vflag)
   }
   else
   {
-    cuda->cu_f->download();
-    for (int ilevel = 0; ilevel < nlevels_respa; ilevel++) {
-      ((Respa *) update->integrate)->copy_flevel_f(ilevel);
-      post_force_respa(vflag,ilevel,0);
-      ((Respa *) update->integrate)->copy_f_flevel(ilevel);
-    }
-    cuda->cu_f->upload();
   }
 }
 
@@ -159,6 +214,21 @@ void FixAveForceCuda::post_force(int vflag)
   MPI_Allreduce(foriginal,foriginal_all,4,MPI_DOUBLE,MPI_SUM,world);
   int ncount = static_cast<int> (foriginal_all[3]);
   if (ncount == 0) return;
+
+  if (varflag == EQUAL) {
+    unsigned int datamask = EMPTY_MASK;
+    if (xstyle == EQUAL) datamask &= input->variable->data_mask(xstr);
+    if (ystyle == EQUAL) datamask &= input->variable->data_mask(ystr);
+    if (zstyle == EQUAL) datamask &= input->variable->data_mask(zstr);
+
+    cuda->download(datamask);
+    modify->clearstep_compute();
+    if (xstyle == EQUAL) xvalue = input->variable->compute_equal(xvar);
+    if (ystyle == EQUAL) yvalue = input->variable->compute_equal(yvar);
+    if (zstyle == EQUAL) zvalue = input->variable->compute_equal(zvar);
+    modify->addstep_compute(update->ntimestep + 1);
+  }
+
   double fave[3];
   fave[0] = foriginal_all[0]/ncount + xvalue;
   fave[1] = foriginal_all[1]/ncount + yvalue;
@@ -167,51 +237,14 @@ void FixAveForceCuda::post_force(int vflag)
   // set force of all participating atoms to same value
   // only for active dimensions
 
-  Cuda_FixAveForceCuda_PostForce_Set(&cuda->shared_data, groupbit,xflag,yflag,zflag,fave[0],fave[1],fave[2]);
+  Cuda_FixAveForceCuda_PostForce_Set(&cuda->shared_data, groupbit,!(xstyle==NONE),!(ystyle==NONE),!(zstyle==NONE),fave[0],fave[1],fave[2]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixAveForceCuda::post_force_respa(int vflag, int ilevel, int iloop)
 {
-  // ave + extra force on outermost level
-  // just ave on inner levels
-  if (ilevel == nlevels_respa-1) post_force(vflag);
-  else {
-    cuda->cu_f->download();
-    cuda->cu_mask->download();
-    double **f = atom->f;
-    int *mask = atom->mask;
-    int nlocal = atom->nlocal;
 
-    double foriginal[4];
-    foriginal[0] = foriginal[1] = foriginal[2] = foriginal[3] = 0.0;
-
-    for (int i = 0; i < nlocal; i++)
-      if (mask[i] & groupbit) {
-        foriginal[0] += f[i][0];
-        foriginal[1] += f[i][1];
-        foriginal[2] += f[i][2];
-        foriginal[3] += 1;
-
-      }
-
-    MPI_Allreduce(foriginal,foriginal_all,4,MPI_DOUBLE,MPI_SUM,world);
-    int ncount = static_cast<int> (foriginal_all[3]);
-    if (ncount == 0) return;
-    double fave[3];
-    fave[0] = foriginal_all[0]/ncount;
-    fave[1] = foriginal_all[1]/ncount;
-    fave[2] = foriginal_all[2]/ncount;
-
-    for (int i = 0; i < nlocal; i++)
-      if (mask[i] & groupbit) {
-        if (xflag) f[i][0] = fave[0];
-        if (yflag) f[i][1] = fave[1];
-        if (zflag) f[i][2] = fave[2];
-      }
-    cuda->cu_f->upload();
-  }
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/USER-CUDA/fix_aveforce_cuda.h b/src/USER-CUDA/fix_aveforce_cuda.h
index 418210e472..4635ebc0cb 100644
--- a/src/USER-CUDA/fix_aveforce_cuda.h
+++ b/src/USER-CUDA/fix_aveforce_cuda.h
@@ -39,6 +39,7 @@ namespace LAMMPS_NS {
 class FixAveForceCuda : public Fix {
  public:
   FixAveForceCuda(class LAMMPS *, int, char **);
+  ~FixAveForceCuda();
   int setmask();
   void init();
   void setup(int);
@@ -50,12 +51,16 @@ class FixAveForceCuda : public Fix {
 
  private:
   class Cuda *cuda;
-  int xflag,yflag,zflag,iregion;
+  char *xstr,*ystr,*zstr;
+  int iregion;
   double xvalue,yvalue,zvalue;
   double foriginal_all[4];
   double foriginal[4];
   cCudaData<double     , F_FLOAT                   , x>* cu_foriginal;
   int nlevels_respa;
+  int varflag;
+  int xvar,yvar,zvar,xstyle,ystyle,zstyle;
+
 };
 
 }
diff --git a/src/USER-CUDA/pair_eam_cuda.cpp b/src/USER-CUDA/pair_eam_cuda.cpp
index 1e65aaf719..551313db12 100644
--- a/src/USER-CUDA/pair_eam_cuda.cpp
+++ b/src/USER-CUDA/pair_eam_cuda.cpp
@@ -64,22 +64,23 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairEAMCuda::PairEAMCuda(LAMMPS *lmp) : PairEAM(lmp)
+PairEAMCuda::PairEAMCuda(LAMMPS* lmp) : PairEAM(lmp)
 {
   cuda = lmp->cuda;
-   if(cuda == NULL)
-        error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
-        allocated2 = false;
-        cuda->shared_data.pair.cudable_force = 1;
-        cuda->shared_data.pair.override_block_per_atom = 0;
+  if(cuda == NULL)
+    error->all(FLERR, "You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
-        cuda->setSystemParams();
-        cu_rho=NULL;
-        cu_fp=NULL;
-    cu_frho_spline = NULL;
-    cu_z2r_spline = NULL;
-    cu_rhor_spline = NULL;
+  allocated2 = false;
+  cuda->shared_data.pair.cudable_force = 1;
+  cuda->shared_data.pair.override_block_per_atom = 0;
+
+  cuda->setSystemParams();
+  cu_rho = NULL;
+  cu_fp = NULL;
+  cu_frho_spline = NULL;
+  cu_z2r_spline = NULL;
+  cu_rhor_spline = NULL;
 }
 
 /* ----------------------------------------------------------------------
@@ -88,161 +89,176 @@ PairEAMCuda::PairEAMCuda(LAMMPS *lmp) : PairEAM(lmp)
 
 void PairEAMCuda::allocate()
 {
-        if(! allocated) PairEAM::allocate();
-                cuda->shared_data.pair.cutsq     = cutsq;
-        cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
+  if(! allocated) PairEAM::allocate();
+
+  cuda->shared_data.pair.cutsq     = cutsq;
+  cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEAMCuda::compute(int eflag, int vflag)
 {
-        cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
-    cuda->shared_data.pair.use_block_per_atom = 0;
-    cuda->shared_data.pair.collect_forces_later = 0;
-    if (atom->nmax > nmax) {
+  cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
+  cuda->shared_data.pair.use_block_per_atom = 0;
+  cuda->shared_data.pair.collect_forces_later = 0;
+
+  if(atom->nmax > nmax || cuda->finished_setup == false) {
     memory->destroy(rho);
     memory->destroy(fp);
     nmax = atom->nmax;
-    memory->create(rho,nmax,"pair:rho");
-    memory->create(fp,nmax,"pair:fp");
-        delete cu_rho;
-        delete cu_fp;
-        cu_rho = new cCudaData<double, F_FLOAT, x> (rho, atom->nmax);
-        cu_fp  = new cCudaData<double, F_FLOAT, x> (fp, atom->nmax);
-        Cuda_PairEAMCuda_Init(&cuda->shared_data,rdr,rdrho,nfrho,nrhor,nr,nrho,nz2r,
-                cu_frho_spline->dev_data(),cu_rhor_spline->dev_data(),cu_z2r_spline->dev_data(),
-                cu_rho->dev_data(),cu_fp->dev_data(),type2frho,type2z2r,type2rhor);
-          }
+    memory->create(rho, nmax, "pair:rho");
+    memory->create(fp, nmax, "pair:fp");
+    delete cu_rho;
+    delete cu_fp;
+    cu_rho = new cCudaData<double, F_FLOAT, x> (rho, atom->nmax);
+    cu_fp  = new cCudaData<double, F_FLOAT, x> (fp, atom->nmax);
+    Cuda_PairEAMCuda_Init(&cuda->shared_data, rdr, rdrho, nfrho, nrhor, nr, nrho, nz2r,
+                          cu_frho_spline->dev_data(), cu_rhor_spline->dev_data(), cu_z2r_spline->dev_data(),
+                          cu_rho->dev_data(), cu_fp->dev_data(), type2frho, type2z2r, type2rhor);
+  }
 
 
 
-        if(eflag || vflag) ev_setup(eflag,vflag);
-        if(eflag) cuda->cu_eng_vdwl->upload();
-        if(vflag) cuda->cu_virial->upload();
+  if(eflag || vflag) ev_setup(eflag, vflag);
 
-        Cuda_PairEAM1Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag,eflag_atom,vflag_atom);
-    comm->forward_comm_pair(this);
-        Cuda_PairEAM2Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag,eflag_atom,vflag_atom);
+  if(eflag) cuda->cu_eng_vdwl->upload();
 
-        if(eflag) cuda->cu_eng_vdwl->download();
-        if(vflag) cuda->cu_virial->download();
+  if(vflag) cuda->cu_virial->upload();
+
+  Cuda_PairEAM1Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
+
+  comm->forward_comm_pair(this);
+
+  Cuda_PairEAM2Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
+
+  if(eflag) cuda->cu_eng_vdwl->download();
+
+  if(vflag) cuda->cu_virial->download();
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairEAMCuda::settings(int narg, char **arg)
+void PairEAMCuda::settings(int narg, char** arg)
 {
-        PairEAM::settings(narg, arg);
-        cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
+  PairEAM::settings(narg, arg);
+  cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairEAMCuda::coeff(int narg, char **arg)
+void PairEAMCuda::coeff(int narg, char** arg)
 {
-        PairEAM::coeff(narg, arg);
-        allocate();
+  PairEAM::coeff(narg, arg);
+  allocate();
 }
 
 void PairEAMCuda::init_style()
 {
-        MYDBG(printf("# CUDA PairEAMCuda::init_style start\n"); )
+  MYDBG(printf("# CUDA PairEAMCuda::init_style start\n");)
   // request regular or rRESPA neighbor lists
   file2array();
   array2spline();
   int irequest;
 
 
-          irequest = neighbor->request(this);
-    neighbor->requests[irequest]->full = 1;
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->cudable = 1;
+  irequest = neighbor->request(this);
+  neighbor->requests[irequest]->full = 1;
+  neighbor->requests[irequest]->half = 0;
+  neighbor->requests[irequest]->cudable = 1;
 
-        delete cu_rhor_spline;
-        delete cu_z2r_spline;
-        delete cu_frho_spline;
+  delete cu_rhor_spline;
+  delete cu_z2r_spline;
+  delete cu_frho_spline;
 
-        cu_rhor_spline = new cCudaData<double, F_FLOAT, xyz>((double*)rhor_spline,nrhor,nr+1,EAM_COEFF_LENGTH);
-        cu_z2r_spline = new cCudaData<double, F_FLOAT, xyz>((double*)z2r_spline,nz2r,nr+1,EAM_COEFF_LENGTH);
-        cu_frho_spline = new cCudaData<double, F_FLOAT, xyz>((double*)frho_spline,nfrho,nrho+1,EAM_COEFF_LENGTH);
+  cu_rhor_spline = new cCudaData<double, F_FLOAT, xyz>((double*)rhor_spline, nrhor, nr + 1, EAM_COEFF_LENGTH);
+  cu_z2r_spline = new cCudaData<double, F_FLOAT, xyz>((double*)z2r_spline, nz2r, nr + 1, EAM_COEFF_LENGTH);
+  cu_frho_spline = new cCudaData<double, F_FLOAT, xyz>((double*)frho_spline, nfrho, nrho + 1, EAM_COEFF_LENGTH);
 
-        cu_rhor_spline->upload();
-        cu_z2r_spline->upload();
-        cu_frho_spline->upload();
+  cu_rhor_spline->upload();
+  cu_z2r_spline->upload();
+  cu_frho_spline->upload();
 
-  MYDBG(printf("# CUDA PairEAMCuda::init_style end\n"); )
+  MYDBG(printf("# CUDA PairEAMCuda::init_style end\n");)
 }
 
-void PairEAMCuda::init_list(int id, NeighList *ptr)
+void PairEAMCuda::init_list(int id, NeighList* ptr)
 {
-        MYDBG(printf("# CUDA PairEAMCuda::init_list\n");)
-        PairEAM::init_list(id, ptr);
-        #ifndef CUDA_USE_BINNING
-        // right now we can only handle verlet (id 0), not respa
-        if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
-        // see Neighbor::init() for details on lammps lists' logic
-        #endif
-        MYDBG(printf("# CUDA PairEAMCuda::init_list end\n");)
+  MYDBG(printf("# CUDA PairEAMCuda::init_list\n");)
+  PairEAM::init_list(id, ptr);
+
+  // right now we can only handle verlet (id 0), not respa
+  if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
+
+  // see Neighbor::init() for details on lammps lists' logic
+  MYDBG(printf("# CUDA PairEAMCuda::init_list end\n");)
 }
 
 void PairEAMCuda::array2spline()
 {
-  rdr = 1.0/dr;
-  rdrho = 1.0/drho;
+  rdr = 1.0 / dr;
+  rdrho = 1.0 / drho;
 
   memory->destroy(frho_spline);
   memory->destroy(rhor_spline);
   memory->destroy(z2r_spline);
 
-  memory->create(frho_spline,nfrho,nrho+1,8,"pair:frho");
-  memory->create(rhor_spline,nrhor,nr+1,8,"pair:rhor");
-  memory->create(z2r_spline,nz2r,nr+1,8,"pair:z2r");
+  memory->create(frho_spline, nfrho, nrho + 1, 8, "pair:frho");
+  memory->create(rhor_spline, nrhor, nr + 1, 8, "pair:rhor");
+  memory->create(z2r_spline, nz2r, nr + 1, 8, "pair:z2r");
 
-  for (int i = 0; i < nfrho; i++){
-    interpolate(nrho,drho,frho[i],frho_spline[i]);
-    for(int j=0;j<nrho+1;j++)
-      frho_spline[i][j][7]=frho_spline[i][j][3];
+  for(int i = 0; i < nfrho; i++) {
+    interpolate(nrho, drho, frho[i], frho_spline[i]);
+
+    for(int j = 0; j < nrho + 1; j++)
+      frho_spline[i][j][7] = frho_spline[i][j][3];
   }
 
-  for (int i = 0; i < nrhor; i++){
-    interpolate(nr,dr,rhor[i],rhor_spline[i]);
-    for(int j=0;j<nr+1;j++)
-      rhor_spline[i][j][7]=rhor_spline[i][j][3];
+  for(int i = 0; i < nrhor; i++) {
+    interpolate(nr, dr, rhor[i], rhor_spline[i]);
+
+    for(int j = 0; j < nr + 1; j++)
+      rhor_spline[i][j][7] = rhor_spline[i][j][3];
   }
 
-  for (int i = 0; i < nz2r; i++){
-    interpolate(nr,dr,z2r[i],z2r_spline[i]);
-    for(int j=0;j<nr+1;j++)
-      z2r_spline[i][j][7]=z2r_spline[i][j][3];
+  for(int i = 0; i < nz2r; i++) {
+    interpolate(nr, dr, z2r[i], z2r_spline[i]);
+
+    for(int j = 0; j < nr + 1; j++)
+      z2r_spline[i][j][7] = z2r_spline[i][j][3];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-int PairEAMCuda::pack_comm(int n, int *iswap, double *buf, int pbc_flag, int *pbc)
+int PairEAMCuda::pack_comm(int n, int* iswap, double* buf, int pbc_flag, int* pbc)
 {
-  Cuda_PairEAMCuda_PackComm(&cuda->shared_data,n,*iswap,buf);
-  if(sizeof(F_FLOAT)<sizeof(double)) return 1;
+  Cuda_PairEAMCuda_PackComm(&cuda->shared_data, n, *iswap, buf);
+
+  if(sizeof(F_FLOAT) < sizeof(double)) return 1;
   else return 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairEAMCuda::unpack_comm(int n, int first, double *buf)
+void PairEAMCuda::unpack_comm(int n, int first, double* buf)
 {
-  Cuda_PairEAMCuda_UnpackComm(&cuda->shared_data,n,first,buf,cu_fp->dev_data());
+  Cuda_PairEAMCuda_UnpackComm(&cuda->shared_data, n, first, buf, cu_fp->dev_data());
 }
 
 void PairEAMCuda::ev_setup(int eflag, int vflag)
 {
-  int maxeatomold=maxeatom;
-  PairEAM::ev_setup(eflag,vflag);
+  int maxeatomold = maxeatom;
+  PairEAM::ev_setup(eflag, vflag);
 
-  if (eflag_atom && atom->nmax > maxeatomold)
-        {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
+  if(eflag_atom && atom->nmax > maxeatomold) {
+    delete cuda->cu_eatom;
+    cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax);
+  }
 
-  if (vflag_atom && atom->nmax > maxeatomold)
-        {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
+  if(vflag_atom && atom->nmax > maxeatomold) {
+    delete cuda->cu_vatom;
+    cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6);
+  }
 
 }
diff --git a/src/USER-CUDA/verlet_cuda.cpp b/src/USER-CUDA/verlet_cuda.cpp
index ea044611f8..ba6289c773 100644
--- a/src/USER-CUDA/verlet_cuda.cpp
+++ b/src/USER-CUDA/verlet_cuda.cpp
@@ -93,6 +93,7 @@ void VerletCuda::setup()
 
   cuda->oncpu = true;
   cuda->begin_setup = true;
+  cuda->finished_setup = false;
   cuda->finished_run = false;
 
   time_pair = 0;
@@ -122,7 +123,6 @@ void VerletCuda::setup()
 
   cuda->allocate();
 
-
   if(comm->me == 0 && screen) fprintf(screen, "Setting up run ...\n");
 
   // setup domain, communication and neighboring
@@ -395,7 +395,7 @@ void VerletCuda::setup()
 void VerletCuda::setup_minimal(int flag)
 {
 
-
+  printf("SetupMinimal\n");
   dotestatom = 0;
   int testatom = 104;
   cuda->oncpu = true;
@@ -828,7 +828,6 @@ void VerletCuda::run(int n)
         endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;
 
       clock_gettime(CLOCK_REALTIME, &starttime);
-
       //atom index maps are generated on CPU, and need to be transfered to GPU if they are used
       if(cuda->cu_map_array)
         cuda->cu_map_array->upload();
@@ -949,7 +948,8 @@ void VerletCuda::run(int n)
       timer->stamp(TIME_PAIR);
 
       if(neighbor->lastcall == update->ntimestep) {
-        neighbor->build_topology();
+        neighbor->build_bonded();
+
         timer->stamp(TIME_NEIGHBOR);
       }
 
diff --git a/src/angle.cpp b/src/angle.cpp
index 9d9ee2fc91..ab5d9b651b 100644
--- a/src/angle.cpp
+++ b/src/angle.cpp
@@ -17,9 +17,10 @@
 #include "comm.h"
 #include "force.h"
 #include "math_const.h"
+#include "suffix.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
-#include "suffix.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
@@ -36,6 +37,9 @@ Angle::Angle(LAMMPS *lmp) : Pointers(lmp)
   maxeatom = maxvatom = 0;
   eatom = NULL;
   vatom = NULL;
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/angle.h b/src/angle.h
index e46ae2f470..e93c72bcbe 100644
--- a/src/angle.h
+++ b/src/angle.h
@@ -28,6 +28,8 @@ class Angle : protected Pointers {
   double energy;                  // accumulated energies
   double virial[6];               // accumlated virial
   double *eatom,**vatom;          // accumulated per-atom energy/virial
+  unsigned int datamask;
+  unsigned int datamask_ext;
 
   Angle(class LAMMPS *);
   virtual ~Angle();
@@ -42,6 +44,9 @@ class Angle : protected Pointers {
   virtual double single(int, int, int, int) = 0;
   virtual double memory_usage();
 
+  virtual unsigned int data_mask() {return datamask;}
+  virtual unsigned int data_mask_ext() {return datamask_ext;}
+
  protected:
   int suffix_flag;             // suffix compatibility flag
 
diff --git a/src/atom.cpp b/src/atom.cpp
index ef89d6d1e9..805d3572be 100644
--- a/src/atom.cpp
+++ b/src/atom.cpp
@@ -32,6 +32,7 @@
 #include "domain.h"
 #include "group.h"
 #include "accelerator_cuda.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
 
@@ -148,6 +149,9 @@ Atom::Atom(LAMMPS *lmp) : Pointers(lmp)
   atom_style = NULL;
   avec = NULL;
   create_avec("atomic",0,NULL,lmp->suffix);
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/atom.h b/src/atom.h
index f463bfb4ef..33ab918af2 100644
--- a/src/atom.h
+++ b/src/atom.h
@@ -83,6 +83,9 @@ class Atom : protected Pointers {
   int **improper_type;
   int **improper_atom1,**improper_atom2,**improper_atom3,**improper_atom4;
 
+  unsigned int datamask;
+  unsigned int datamask_ext;
+
   // atom style and per-atom array existence flags
   // customize by adding new flag
 
diff --git a/src/atom_masks.h b/src/atom_masks.h
new file mode 100644
index 0000000000..21c5e0bb97
--- /dev/null
+++ b/src/atom_masks.h
@@ -0,0 +1,87 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_ATOM_MASK_H
+#define LMP_ATOM_MASK_H
+
+// per-atom data masks
+
+#define EMPTY_MASK     0x00000000
+#define ALL_MASK       0xffffffff
+#define SAMETAG_MASK   0x01000000
+#define EXTENDED_MASK  0x80000000
+
+// standard
+
+#define X_MASK         0x00000001
+#define V_MASK         0x00000002
+#define F_MASK         0x00000004
+#define TAG_MASK       0x00000008
+#define TYPE_MASK      0x00000010
+#define MASK_MASK      0x00000020
+#define IMAGE_MASK     0x00000040
+#define Q_MASK         0x00000080
+#define MOLECULE_MASK  0x00000100
+#define RMASS_MASK     0x00000200
+#define BOND_MASK      0x00000400
+#define ANGLE_MASK     0x00000800
+#define DIHEDRAL_MASK  0x00001000
+#define IMPROPER_MASK  0x00002000
+#define SPECIAL_MASK   0x00004000
+#define MAP_MASK       0x00008000
+
+// granular
+
+#define RADIUS_MASK    0x00010000
+#define DENSITY_MASK   0x00020000
+#define OMEGA_MASK     0x00040000
+#define TORQUE_MASK    0x00080000
+#define ANGMOM_MASK    0x00100000
+#define GRANULAR_MASK  0x001f0000
+
+// peridynamics
+
+#define VFRAC_MASK     0x00200000
+#define S0_MASK        0x00400000
+#define X0_MASK        0x00800000
+#define PERI_MASK      0x00e00000
+
+#define ELLIPSOID_MASK 0x00000001
+#define LINE_MASK      0x00000002
+#define TRI_MASK       0x00000004
+
+// electron
+
+#define SPIN_MASK      0x00000010
+#define ERADIUS_MASK   0x00000020
+#define ERVEL_MASK     0x00000040
+#define ERFORCE_MASK   0x00000080
+#define ERVELFORCE_MASK 0x00000100
+
+#define CS_MASK        0x00000200
+#define CSFORCE_MASK   0x00000400
+#define VFORCE_MASK    0x00000800
+
+#define ELECTRON_MASK  0x00000f00
+
+// SPH
+
+#define ETAG_MASK      0x00001000
+#define RHO_MASK       0x00002000
+#define DRHO_MASK      0x00004000
+#define E_MASK         0x00008000
+#define DE_MASK        0x00010000
+#define VEST_MASK      0x00020000
+#define CV_MASK        0x00040000
+
+#endif
diff --git a/src/bond.cpp b/src/bond.cpp
index a054cac495..8e553c298b 100644
--- a/src/bond.cpp
+++ b/src/bond.cpp
@@ -16,9 +16,10 @@
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
+#include "suffix.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
-#include "suffix.h"
 
 using namespace LAMMPS_NS;
 
@@ -37,6 +38,9 @@ Bond::Bond(LAMMPS *lmp) : Pointers(lmp)
   maxeatom = maxvatom = 0;
   eatom = NULL;
   vatom = NULL;
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/bond.h b/src/bond.h
index e2a977d5a8..3ef25aaca0 100644
--- a/src/bond.h
+++ b/src/bond.h
@@ -28,6 +28,8 @@ class Bond : protected Pointers {
   double energy;                  // accumulated energies
   double virial[6];               // accumlated virial
   double *eatom,**vatom;          // accumulated per-atom energy/virial
+  unsigned int datamask;
+  unsigned int datamask_ext;
 
   Bond(class LAMMPS *);
   virtual ~Bond();
@@ -42,6 +44,9 @@ class Bond : protected Pointers {
   virtual double single(int, double, int, int) = 0;
   virtual double memory_usage();
 
+  virtual unsigned int data_mask() {return datamask;}
+  virtual unsigned int data_mask_ext() {return datamask_ext;}
+
  protected:
   int suffix_flag;             // suffix compatibility flag
 
diff --git a/src/compute.cpp b/src/compute.cpp
index 2be749ac12..d37b1a4899 100644
--- a/src/compute.cpp
+++ b/src/compute.cpp
@@ -21,6 +21,7 @@
 #include "domain.h"
 #include "comm.h"
 #include "group.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
 
@@ -84,6 +85,9 @@ Compute::Compute(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
   // setup map for molecule IDs
 
   molmap = NULL;
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/compute.h b/src/compute.h
index 734a1eefb2..3c770d8fb4 100644
--- a/src/compute.h
+++ b/src/compute.h
@@ -20,6 +20,7 @@ namespace LAMMPS_NS {
 
 class Compute : protected Pointers {
  public:
+
   char *id,*style;
   int igroup,groupbit;
 
@@ -77,6 +78,9 @@ class Compute : protected Pointers {
   int comm_forward;   // size of forward communication (0 if none)
   int comm_reverse;   // size of reverse communication (0 if none)
 
+  unsigned int datamask;
+  unsigned int datamask_ext;
+
   int cudable;        // 1 if compute is CUDA-enabled
 
   Compute(class LAMMPS *, int, char **);
@@ -111,6 +115,9 @@ class Compute : protected Pointers {
 
   virtual double memory_usage() {return 0.0;}
 
+  virtual int unsigned data_mask() {return datamask;}
+  virtual int unsigned data_mask_ext() {return datamask_ext;}
+
  protected:
   int extra_dof;               // extra DOF for temperature computes
   int dynamic;                 // recount atoms for temperature computes
diff --git a/src/dihedral.cpp b/src/dihedral.cpp
index 2e2ae9cc61..9cd41e211d 100644
--- a/src/dihedral.cpp
+++ b/src/dihedral.cpp
@@ -17,9 +17,10 @@
 #include "comm.h"
 #include "force.h"
 #include "pair.h"
+#include "suffix.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
-#include "suffix.h"
 
 using namespace LAMMPS_NS;
 
@@ -37,6 +38,9 @@ Dihedral::Dihedral(LAMMPS *lmp) : Pointers(lmp)
   maxeatom = maxvatom = 0;
   eatom = NULL;
   vatom = NULL;
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/dihedral.h b/src/dihedral.h
index 87fae227a8..ca44f96eb6 100644
--- a/src/dihedral.h
+++ b/src/dihedral.h
@@ -28,6 +28,8 @@ class Dihedral : protected Pointers {
   double energy;                     // accumulated energy
   double virial[6];                  // accumlated virial
   double *eatom,**vatom;             // accumulated per-atom energy/virial
+  unsigned int datamask;
+  unsigned int datamask_ext;
 
   Dihedral(class LAMMPS *);
   virtual ~Dihedral();
@@ -40,6 +42,9 @@ class Dihedral : protected Pointers {
   virtual void read_restart(FILE *) = 0;
   virtual double memory_usage();
 
+  virtual unsigned int data_mask() {return datamask;}
+  virtual unsigned int data_mask_ext() {return datamask_ext;}
+
  protected:
   int suffix_flag;             // suffix compatibility flag
 
diff --git a/src/domain.h b/src/domain.h
index ccfbd3a39d..19ced63bc2 100644
--- a/src/domain.h
+++ b/src/domain.h
@@ -118,8 +118,8 @@ class Domain : protected Pointers {
 
   virtual void lamda2x(int);
   virtual void x2lamda(int);
-  void lamda2x(double *, double *);
-  void x2lamda(double *, double *);
+  virtual void lamda2x(double *, double *);
+  virtual void x2lamda(double *, double *);
   void x2lamda(double *, double *, double *, double *);
   void bbox(double *, double *, double *, double *);
   void box_corners();
@@ -135,7 +135,7 @@ class Domain : protected Pointers {
     return 0;
   }
 
- private:
+ protected:
   double small[3];                  // fractions of box lengths
 };
 
diff --git a/src/fix.cpp b/src/fix.cpp
index 822a2d1fc8..6ba167e221 100644
--- a/src/fix.cpp
+++ b/src/fix.cpp
@@ -16,6 +16,7 @@
 #include "fix.h"
 #include "atom.h"
 #include "group.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
 
@@ -68,6 +69,9 @@ Fix::Fix(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
 
   maxvatom = 0;
   vatom = NULL;
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/fix.h b/src/fix.h
index 4602b3c5b5..b9ed35aba2 100644
--- a/src/fix.h
+++ b/src/fix.h
@@ -76,6 +76,9 @@ class Fix : protected Pointers {
   double virial[6];              // accumlated virial
   double **vatom;                // accumulated per-atom virial
 
+  unsigned int datamask;
+  unsigned int datamask_ext;
+
   Fix(class LAMMPS *, int, char **);
   virtual ~Fix();
   void modify_params(int, char **);
@@ -157,6 +160,9 @@ class Fix : protected Pointers {
 
   virtual double memory_usage() {return 0.0;}
 
+  virtual unsigned int data_mask() {return datamask;}
+  virtual unsigned int data_mask_ext() {return datamask_ext;}
+
  protected:
   int evflag;
   int vflag_global,vflag_atom;
diff --git a/src/improper.cpp b/src/improper.cpp
index 7225d34709..58ca65a6a3 100644
--- a/src/improper.cpp
+++ b/src/improper.cpp
@@ -16,9 +16,10 @@
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
+#include "suffix.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
-#include "suffix.h"
 
 using namespace LAMMPS_NS;
 
@@ -34,6 +35,9 @@ Improper::Improper(LAMMPS *lmp) : Pointers(lmp)
   maxeatom = maxvatom = 0;
   eatom = NULL;
   vatom = NULL;
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/improper.h b/src/improper.h
index dcf0703563..3d7f96d0da 100644
--- a/src/improper.h
+++ b/src/improper.h
@@ -28,6 +28,8 @@ class Improper : protected Pointers {
   double energy;                  // accumulated energies
   double virial[6];               // accumlated virial
   double *eatom,**vatom;          // accumulated per-atom energy/virial
+  unsigned int datamask;
+  unsigned int datamask_ext;
 
   Improper(class LAMMPS *);
   virtual ~Improper();
@@ -39,6 +41,9 @@ class Improper : protected Pointers {
   virtual void read_restart(FILE *) = 0;
   virtual double memory_usage();
 
+  virtual unsigned int data_mask() {return datamask;}
+  virtual unsigned int data_mask_ext() {return datamask_ext;}
+
  protected:
   int suffix_flag;             // suffix compatibility flag
 
diff --git a/src/kspace.cpp b/src/kspace.cpp
index 61a2b0e9a6..8b1ec78e70 100644
--- a/src/kspace.cpp
+++ b/src/kspace.cpp
@@ -19,6 +19,7 @@
 #include "force.h"
 #include "pair.h"
 #include "memory.h"
+#include "atom_masks.h"
 #include "error.h"
 #include "suffix.h"
 
@@ -50,6 +51,9 @@ KSpace::KSpace(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
   maxeatom = maxvatom = 0;
   eatom = NULL;
   vatom = NULL;
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/kspace.h b/src/kspace.h
index c8a19b14f7..5932b1b8ae 100644
--- a/src/kspace.h
+++ b/src/kspace.h
@@ -32,6 +32,9 @@ class KSpace : protected Pointers {
   int nx_pppm,ny_pppm,nz_pppm;
   int group_group_enable;         // 1 if style supports group/group calculation
 
+  unsigned int datamask;
+  unsigned int datamask_ext;
+
   int compute_flag;               // 0 if skip compute()
 
   KSpace(class LAMMPS *, int, char **);
diff --git a/src/pair.cpp b/src/pair.cpp
index 96c6cc9feb..33a8fcafc6 100644
--- a/src/pair.cpp
+++ b/src/pair.cpp
@@ -31,9 +31,10 @@
 #include "force.h"
 #include "update.h"
 #include "accelerator_cuda.h"
+#include "suffix.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
-#include "suffix.h"
 
 using namespace LAMMPS_NS;
 
@@ -77,6 +78,9 @@ Pair::Pair(LAMMPS *lmp) : Pointers(lmp)
   maxeatom = maxvatom = 0;
   eatom = NULL;
   vatom = NULL;
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/pair.h b/src/pair.h
index 3965b03d81..011478547e 100644
--- a/src/pair.h
+++ b/src/pair.h
@@ -74,6 +74,9 @@ class Pair : protected Pointers {
   class NeighList *listmiddle;
   class NeighList *listouter;
 
+  unsigned int datamask;
+  unsigned int datamask_ext;
+
   int compute_flag;              // 0 if skip compute()
 
   Pair(class LAMMPS *);
@@ -140,6 +143,9 @@ class Pair : protected Pointers {
   virtual void min_xf_get(int) {}
   virtual void min_x_set(int) {}
 
+  virtual unsigned int data_mask() {return datamask;}
+  virtual unsigned int data_mask_ext() {return datamask_ext;}
+
  protected:
   enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER};   // mixing options
 
diff --git a/src/variable.cpp b/src/variable.cpp
index cfa17a4f46..6458e2e090 100644
--- a/src/variable.cpp
+++ b/src/variable.cpp
@@ -30,6 +30,7 @@
 #include "thermo.h"
 #include "random_mars.h"
 #include "math_const.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
 
@@ -78,6 +79,8 @@ Variable::Variable(LAMMPS *lmp) : Pointers(lmp)
   pad = NULL;
   data = NULL;
 
+  eval_in_progress = NULL;
+
   randomequal = NULL;
   randomatom = NULL;
 
@@ -109,6 +112,8 @@ Variable::~Variable()
   memory->destroy(pad);
   memory->sfree(data);
 
+  memory->destroy(eval_in_progress);
+
   delete randomequal;
   delete randomatom;
 }
@@ -135,7 +140,7 @@ void Variable::set(int narg, char **arg)
   } else if (strcmp(arg[1],"index") == 0) {
     if (narg < 3) error->all(FLERR,"Illegal variable command");
     if (find(arg[0]) >= 0) return;
-    if (nvar == maxvar) extend();
+    if (nvar == maxvar) grow();
     style[nvar] = INDEX;
     num[nvar] = narg - 2;
     which[nvar] = 0;
@@ -149,7 +154,7 @@ void Variable::set(int narg, char **arg)
 
   } else if (strcmp(arg[1],"loop") == 0) {
     if (find(arg[0]) >= 0) return;
-    if (nvar == maxvar) extend();
+    if (nvar == maxvar) grow();
     style[nvar] = LOOP;
     int nfirst,nlast;
     if (narg == 3 || (narg == 4 && strcmp(arg[3],"pad") == 0)) {
@@ -184,7 +189,7 @@ void Variable::set(int narg, char **arg)
   } else if (strcmp(arg[1],"world") == 0) {
     if (narg < 3) error->all(FLERR,"Illegal variable command");
     if (find(arg[0]) >= 0) return;
-    if (nvar == maxvar) extend();
+    if (nvar == maxvar) grow();
     style[nvar] = WORLD;
     num[nvar] = narg - 2;
     if (num[nvar] != universe->nworlds)
@@ -205,7 +210,7 @@ void Variable::set(int narg, char **arg)
     if (strcmp(arg[1],"universe") == 0) {
       if (narg < 3) error->all(FLERR,"Illegal variable command");
       if (find(arg[0]) >= 0) return;
-      if (nvar == maxvar) extend();
+      if (nvar == maxvar) grow();
       style[nvar] = UNIVERSE;
       num[nvar] = narg - 2;
       pad[nvar] = 0;
@@ -215,7 +220,7 @@ void Variable::set(int narg, char **arg)
       if (narg < 3 || narg > 4 || (narg == 4 && strcmp(arg[3],"pad") != 0))
         error->all(FLERR,"Illegal variable command");
       if (find(arg[0]) >= 0) return;
-      if (nvar == maxvar) extend();
+      if (nvar == maxvar) grow();
       style[nvar] = ULOOP;
       num[nvar] = atoi(arg[2]);
       data[nvar] = new char*[1];
@@ -254,7 +259,7 @@ void Variable::set(int narg, char **arg)
         error->all(FLERR,"Cannot redefine variable as a different style");
       remove(find(arg[0]));
     }
-    if (nvar == maxvar) extend();
+    if (nvar == maxvar) grow();
     style[nvar] = STRING;
     num[nvar] = 1;
     which[nvar] = 0;
@@ -274,7 +279,7 @@ void Variable::set(int narg, char **arg)
         error->all(FLERR,"Cannot redefine variable as a different style");
       remove(find(arg[0]));
     }
-    if (nvar == maxvar) extend();
+    if (nvar == maxvar) grow();
     style[nvar] = EQUAL;
     num[nvar] = 2;
     which[nvar] = 0;
@@ -295,7 +300,7 @@ void Variable::set(int narg, char **arg)
         error->all(FLERR,"Cannot redefine variable as a different style");
       remove(find(arg[0]));
     }
-    if (nvar == maxvar) extend();
+    if (nvar == maxvar) grow();
     style[nvar] = ATOM;
     num[nvar] = 1;
     which[nvar] = 0;
@@ -475,7 +480,13 @@ char *Variable::retrieve(char *name)
 
 double Variable::compute_equal(int ivar)
 {
-  return evaluate(data[ivar][0],NULL);
+  // eval_in_progress used to detect circle dependencies
+  // could extend this later to check v_a = c_b + v_a constructs?
+
+  eval_in_progress[ivar] = 1;
+  double value = evaluate(data[ivar][0],NULL);
+  eval_in_progress[ivar] = 0;
+  return value;
 }
 
 /* ----------------------------------------------------------------------
@@ -573,17 +584,18 @@ void Variable::remove(int n)
   make space in arrays for new variable
 ------------------------------------------------------------------------- */
 
-void Variable::extend()
+void Variable::grow()
 {
   maxvar += VARDELTA;
   names = (char **)
-    memory->srealloc(names,maxvar*sizeof(char *),"var:names");
+  memory->srealloc(names,maxvar*sizeof(char *),"var:names");
   memory->grow(style,maxvar,"var:style");
   memory->grow(num,maxvar,"var:num");
   memory->grow(which,maxvar,"var:which");
   memory->grow(pad,maxvar,"var:pad");
-  data = (char ***)
-    memory->srealloc(data,maxvar*sizeof(char **),"var:data");
+  data = (char ***) memory->srealloc(data,maxvar*sizeof(char **),"var:data");
+  memory->grow(eval_in_progress,maxvar,"var:eval_in_progress");
+  for (int i = 0; i < maxvar; i++) eval_in_progress[i] = 0;
 }
 
 /* ----------------------------------------------------------------------
@@ -1113,6 +1125,8 @@ double Variable::evaluate(char *str, Tree **tree)
         int ivar = find(id);
         if (ivar < 0)
           error->all(FLERR,"Invalid variable name in variable formula");
+        if (eval_in_progress[ivar])
+          error->all(FLERR,"Variable has circular dependency");
 
         // parse zero or one trailing brackets
         // point i beyond last bracket
@@ -3426,3 +3440,79 @@ double Variable::evaluate_boolean(char *str)
   if (nargstack != 1) error->all(FLERR,"Invalid Boolean syntax in if command");
   return argstack[0];
 }
+
+/* ---------------------------------------------------------------------- */
+
+unsigned int Variable::data_mask(int ivar)
+{
+  if (eval_in_progress[ivar]) return EMPTY_MASK;
+  eval_in_progress[ivar] = 1;
+  unsigned int datamask = data_mask(data[ivar][0]);
+  eval_in_progress[ivar] = 0;
+  return datamask;
+}
+
+/* ---------------------------------------------------------------------- */
+
+unsigned int Variable::data_mask(char *str)
+{
+  unsigned int datamask = EMPTY_MASK;
+
+  for (unsigned int i=0; i < strlen(str)-2; i++) {
+    int istart = i;
+    while (isalnum(str[i]) || str[i] == '_') i++;
+    int istop = i-1;
+
+    int n = istop - istart + 1;
+    char *word = new char[n+1];
+    strncpy(word,&str[istart],n);
+    word[n] = '\0';
+
+    // ----------------
+    // compute
+    // ----------------
+
+    if ((strncmp(word,"c_",2) == 0) && (i>0) && (not isalnum(str[i-1]))){
+      if (domain->box_exist == 0)
+        error->all(FLERR,
+                   "Variable evaluation before simulation box is defined");
+
+      n = strlen(word) - 2 + 1;
+      char *id = new char[n];
+      strcpy(id,&word[2]);
+
+      int icompute = modify->find_compute(id);
+      if (icompute < 0) 
+        error->all(FLERR,"Invalid compute ID in variable formula");
+
+      datamask &= modify->compute[icompute]->data_mask();
+
+      delete [] id;
+    }
+
+    if ((strncmp(word,"f_",2) == 0) && (i>0) && (not isalnum(str[i-1]))) {
+        if (domain->box_exist == 0)
+          error->all(FLERR,
+                     "Variable evaluation before simulation box is defined");
+
+        n = strlen(word) - 2 + 1;
+        char *id = new char[n];
+        strcpy(id,&word[2]);
+
+        int ifix = modify->find_fix(id);
+        if (ifix < 0) error->all(FLERR,"Invalid fix ID in variable formula");
+
+        datamask &= modify->fix[ifix]->data_mask();
+        delete [] id;
+    }
+
+    if ((strncmp(word,"v_",2) == 0) && (i>0) && (not isalnum(str[i-1]))) {
+      int ivar = find(word);
+      datamask &= data_mask(ivar);
+    }
+
+    delete [] word;
+  }
+
+  return datamask;
+}
diff --git a/src/variable.h b/src/variable.h
index 407fd17b3c..121dd98196 100644
--- a/src/variable.h
+++ b/src/variable.h
@@ -34,6 +34,9 @@ class Variable : protected Pointers {
   int int_between_brackets(char *&);
   double evaluate_boolean(char *);
 
+  unsigned int data_mask(int ivar);
+  unsigned int data_mask(char *str);
+
  private:
   int nvar;                // # of defined variables
   int maxvar;              // max # of variables arrays can hold
@@ -43,6 +46,7 @@ class Variable : protected Pointers {
   int *which;              // next available value for each variable
   int *pad;                // 1 = pad loop/uloop variables with 0s, 0 = no pad
   char ***data;            // str value of each variable's values
+  int *eval_in_progress;   // flag if evaluation of variable is in progress
 
   class RanMars *randomequal;   // random number generator for equal-style vars
   class RanMars *randomatom;    // random number generator for atom-style vars
@@ -62,7 +66,7 @@ class Variable : protected Pointers {
   };
 
   void remove(int);
-  void extend();
+  void grow();
   void copy(int, char **, char **);
   double evaluate(char *, Tree **);
   double collapse_tree(Tree *);