git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@8723 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2012-08-28 14:21:55 +00:00 · 2012-08-28 14:21:55 +00:00 · dba019f3e4
parent 8972165b3a
commit dba019f3e4
28 changed files with 737 additions and 245 deletions
--- a/src/USER-CUDA/cuda.cpp
+++ b/src/USER-CUDA/cuda.cpp
@ -717,6 +717,113 @@ void Cuda::downloadAll()
  MYDBG(printf("# CUDA: Cuda::downloadAll() ... end\n");)
 }

+void Cuda::upload(int datamask)
+{
+  MYDBG(printf("# CUDA: Cuda::upload() ... start\n");)
+  timespec starttime;
+  timespec endtime;
+
+  if(atom->nmax != shared_data.atom.nmax) checkResize();
+
+  clock_gettime(CLOCK_REALTIME, &starttime);
+  if(X_MASK & datamask) cu_x   ->upload();
+  if(V_MASK & datamask) cu_v   ->upload();
+  if(F_MASK & datamask) cu_f   ->upload();
+  if(TYPE_MASK & datamask) cu_type->upload();
+  if(TAG_MASK & datamask) cu_tag ->upload();
+  if(MASK_MASK & datamask) cu_mask->upload();
+  if(IMAGE_MASK & datamask) cu_image->upload();
+
+  //if(shared_data.atom.need_eatom) cu_eatom->upload();
+  //if(shared_data.atom.need_vatom) cu_vatom->upload();
+
+  if(shared_data.atom.q_flag)
+	  if(Q_MASK & datamask) cu_q    ->upload();
+
+  if(atom->rmass)
+	  if(RMASS_MASK & datamask) cu_rmass->upload();
+
+  if(atom->radius)
+	  if(RADIUS_MASK & datamask) cu_radius->upload();
+
+  if(atom->omega)
+	  if(OMEGA_MASK & datamask) cu_omega->upload();
+
+  if(atom->torque)
+	  if(TORQUE_MASK & datamask) cu_torque->upload();
+
+  if(atom->special)
+	  if(SPECIAL_MASK & datamask) cu_special->upload();
+
+  if(atom->nspecial)
+	  if(SPECIAL_MASK & datamask) cu_nspecial->upload();
+
+  if(atom->molecule)
+	  if(MOLECULE_MASK & datamask) cu_molecule->upload();
+
+  if(cu_eatom) cu_eatom->upload();
+
+  if(cu_vatom) cu_vatom->upload();
+
+  clock_gettime(CLOCK_REALTIME, &endtime);
+  uploadtime += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
+  MYDBG(printf("# CUDA: Cuda::upload() ... end\n");)
+}
+
+void Cuda::download(int datamask)
+{
+  MYDBG(printf("# CUDA: Cuda::download() ... start\n");)
+  timespec starttime;
+  timespec endtime;
+
+  if(atom->nmax != shared_data.atom.nmax) checkResize();
+
+  CUDA_IF_BINNING(Cuda_ReverseBinning(& shared_data);)
+  clock_gettime(CLOCK_REALTIME, &starttime);
+  if(X_MASK & datamask) cu_x   ->download();
+  if(V_MASK & datamask) cu_v   ->download();
+  if(F_MASK & datamask) cu_f   ->download();
+  if(TYPE_MASK & datamask) cu_type->download();
+  if(TAG_MASK & datamask) cu_tag ->download();
+  if(MASK_MASK & datamask) cu_mask->download();
+  if(IMAGE_MASK & datamask) cu_image->download();
+
+  //if(shared_data.atom.need_eatom) cu_eatom->download();
+  //if(shared_data.atom.need_vatom) cu_vatom->download();
+
+  if(shared_data.atom.q_flag)
+	  if(Q_MASK & datamask) cu_q    ->download();
+
+  if(atom->rmass)
+	  if(RMASS_MASK & datamask) cu_rmass->download();
+
+  if(atom->radius)
+	  if(RADIUS_MASK & datamask) cu_radius->download();
+
+  if(atom->omega)
+	  if(OMEGA_MASK & datamask) cu_omega->download();
+
+  if(atom->torque)
+	  if(TORQUE_MASK & datamask) cu_torque->download();
+
+  if(atom->special)
+	  if(SPECIAL_MASK & datamask) cu_special->download();
+
+  if(atom->nspecial)
+	  if(SPECIAL_MASK & datamask) cu_nspecial->download();
+
+  if(atom->molecule)
+	  if(MOLECULE_MASK & datamask) cu_molecule->download();
+
+  if(cu_eatom) cu_eatom->download();
+
+  if(cu_vatom) cu_vatom->download();
+
+  clock_gettime(CLOCK_REALTIME, &endtime);
+  downloadtime += (endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
+  MYDBG(printf("# CUDA: Cuda::download() ... end\n");)
+}
+
 void Cuda::downloadX()
 {
  Cuda_Pair_RevertXType(& this->shared_data);
--- a/src/USER-CUDA/domain_cuda.cpp
+++ b/src/USER-CUDA/domain_cuda.cpp
@ -43,28 +43,28 @@ using namespace LAMMPS_NS;
 #define SMALL 1.0e-4
 #define DELTA 1

-enum{NO_REMAP,X_REMAP,V_REMAP};                   // same as fix_deform.cpp
+enum {NO_REMAP, X_REMAP, V_REMAP};                // same as fix_deform.cpp

 /* ----------------------------------------------------------------------
   default is periodic
 ------------------------------------------------------------------------- */

-DomainCuda::DomainCuda(LAMMPS *lmp) : Domain(lmp)
+DomainCuda::DomainCuda(LAMMPS* lmp) : Domain(lmp)
 {
  cuda = lmp->cuda;
-   if(cuda == NULL)
-        error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
+
+  if(cuda == NULL)
+    error->all(FLERR, "You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 }

 /* ---------------------------------------------------------------------- */

 void DomainCuda::init()
 {
-  cuda->accelerator(0,NULL);
+  cuda->accelerator(0, NULL);
  Domain::init();

-  if(not cuda->finished_run)
-  {
+  if(not cuda->finished_run) {
    cuda->setDomainParams();
    Cuda_Domain_Init(&cuda->shared_data);
  }
@ -79,8 +79,7 @@ void DomainCuda::set_global_box()
 {
  Domain::set_global_box();

-  if(not cuda->finished_run)
-  {
+  if(not cuda->finished_run) {
    cuda->setDomainParams();
  }
 }
@ -95,8 +94,7 @@ void DomainCuda::set_lamda_box()
 {
  Domain::set_lamda_box();

-  if(not cuda->finished_run)
-  {
+  if(not cuda->finished_run) {
    cuda->setDomainParams();
  }
 }
@ -111,9 +109,8 @@ void DomainCuda::set_local_box()
 {
  Domain::set_local_box();

-  if(not cuda->finished_run)
-  {
-   // cuda->setDomainParams();
+  if(not cuda->finished_run) {
+    // cuda->setDomainParams();
    //Cuda_Domain_Init(&cuda->shared_data);
  }
 }
@ -126,39 +123,37 @@ void DomainCuda::set_local_box()

 void DomainCuda::reset_box()
 {
-  if (nonperiodic == 2) {
+  if(nonperiodic == 2) {

    // convert back to box coords for shrink-wrap operation

-    if (triclinic) lamda2x(atom->nlocal);
+    if(triclinic) lamda2x(atom->nlocal);

    // compute extent of atoms on this proc

-    double extent[3][2],all[3][2];
+    double extent[3][2], all[3][2];

    extent[2][0] = extent[1][0] = extent[0][0] = BIG;
    extent[2][1] = extent[1][1] = extent[0][1] = -BIG;

-    double **x = atom->x;
+    double** x = atom->x;
    int nlocal = atom->nlocal;

-    if (cuda->finished_setup&&(!cuda->oncpu))
-      {
-        extent[0][0]=cuda->extent[0];
-        extent[0][1]=cuda->extent[1];
-        extent[1][0]=cuda->extent[2];
-        extent[1][1]=cuda->extent[3];
-        extent[2][0]=cuda->extent[4];
-        extent[2][1]=cuda->extent[5];
-      }
-    else
-      for (int i = 0; i < nlocal; i++) {
-        extent[0][0] = MIN(extent[0][0],x[i][0]);
-        extent[0][1] = MAX(extent[0][1],x[i][0]);
-        extent[1][0] = MIN(extent[1][0],x[i][1]);
-        extent[1][1] = MAX(extent[1][1],x[i][1]);
-        extent[2][0] = MIN(extent[2][0],x[i][2]);
-        extent[2][1] = MAX(extent[2][1],x[i][2]);
+    if(cuda->finished_setup && (!cuda->oncpu)) {
+      extent[0][0] = cuda->extent[0];
+      extent[0][1] = cuda->extent[1];
+      extent[1][0] = cuda->extent[2];
+      extent[1][1] = cuda->extent[3];
+      extent[2][0] = cuda->extent[4];
+      extent[2][1] = cuda->extent[5];
+    } else
+      for(int i = 0; i < nlocal; i++) {
+        extent[0][0] = MIN(extent[0][0], x[i][0]);
+        extent[0][1] = MAX(extent[0][1], x[i][0]);
+        extent[1][0] = MIN(extent[1][0], x[i][1]);
+        extent[1][1] = MAX(extent[1][1], x[i][1]);
+        extent[2][0] = MIN(extent[2][0], x[i][2]);
+        extent[2][1] = MAX(extent[2][1], x[i][2]);
      }

    // compute extent across all procs
@ -168,39 +163,116 @@ void DomainCuda::reset_box()
    extent[1][0] = -extent[1][0];
    extent[2][0] = -extent[2][0];

-    MPI_Allreduce(extent,all,6,MPI_DOUBLE,MPI_MAX,world);
+    MPI_Allreduce(extent, all, 6, MPI_DOUBLE, MPI_MAX, world);

    // in shrink-wrapped dims, set box by atom extent
    // if minimum set, enforce min box size settings

-    if (xperiodic == 0) {
-      if (boundary[0][0] == 2) boxlo[0] = -all[0][0] - SMALL;
-      else if (boundary[0][0] == 3) boxlo[0] = MIN(-all[0][0]-SMALL,minxlo);
-      if (boundary[0][1] == 2) boxhi[0] = all[0][1] + SMALL;
-      else if (boundary[0][1] == 3) boxhi[0] = MAX(all[0][1]+SMALL,minxhi);
-      if (boxlo[0] > boxhi[0]) error->all(FLERR,"Illegal simulation box");
-    }
-    if (yperiodic == 0) {
-      if (boundary[1][0] == 2) boxlo[1] = -all[1][0] - SMALL;
-      else if (boundary[1][0] == 3) boxlo[1] = MIN(-all[1][0]-SMALL,minylo);
-      if (boundary[1][1] == 2) boxhi[1] = all[1][1] + SMALL;
-      else if (boundary[1][1] == 3) boxhi[1] = MAX(all[1][1]+SMALL,minyhi);
-      if (boxlo[1] > boxhi[1]) error->all(FLERR,"Illegal simulation box");
-    }
-    if (zperiodic == 0) {
-      if (boundary[2][0] == 2) boxlo[2] = -all[2][0] - SMALL;
-      else if (boundary[2][0] == 3) boxlo[2] = MIN(-all[2][0]-SMALL,minzlo);
-      if (boundary[2][1] == 2) boxhi[2] = all[2][1] + SMALL;
-      else if (boundary[2][1] == 3) boxhi[2] = MAX(all[2][1]+SMALL,minzhi);
-      if (boxlo[2] > boxhi[2]) error->all(FLERR,"Illegal simulation box");
+    if(triclinic == 0) {
+      if(xperiodic == 0) {
+        if(boundary[0][0] == 2) boxlo[0] = -all[0][0] - small[0];
+        else if(boundary[0][0] == 3)
+          boxlo[0] = MIN(-all[0][0] - small[0], minxlo);
+
+        if(boundary[0][1] == 2) boxhi[0] = all[0][1] + small[0];
+        else if(boundary[0][1] == 3) boxhi[0] = MAX(all[0][1] + small[0], minxhi);
+
+        if(boxlo[0] > boxhi[0]) error->all(FLERR, "Illegal simulation box");
+      }
+
+      if(yperiodic == 0) {
+        if(boundary[1][0] == 2) boxlo[1] = -all[1][0] - small[1];
+        else if(boundary[1][0] == 3)
+          boxlo[1] = MIN(-all[1][0] - small[1], minylo);
+
+        if(boundary[1][1] == 2) boxhi[1] = all[1][1] + small[1];
+        else if(boundary[1][1] == 3) boxhi[1] = MAX(all[1][1] + small[1], minyhi);
+
+        if(boxlo[1] > boxhi[1]) error->all(FLERR, "Illegal simulation box");
+      }
+
+      if(zperiodic == 0) {
+        if(boundary[2][0] == 2) boxlo[2] = -all[2][0] - small[2];
+        else if(boundary[2][0] == 3)
+          boxlo[2] = MIN(-all[2][0] - small[2], minzlo);
+
+        if(boundary[2][1] == 2) boxhi[2] = all[2][1] + small[2];
+        else if(boundary[2][1] == 3) boxhi[2] = MAX(all[2][1] + small[2], minzhi);
+
+        if(boxlo[2] > boxhi[2]) error->all(FLERR, "Illegal simulation box");
+      }
+
+    } else {
+      double lo[3], hi[3];
+
+      if(xperiodic == 0) {
+        lo[0] = -all[0][0];
+        lo[1] = 0.0;
+        lo[2] = 0.0;
+        Domain::lamda2x(lo, lo);
+        hi[0] = all[0][1];
+        hi[1] = 0.0;
+        hi[2] = 0.0;
+        Domain::lamda2x(hi, hi);
+
+        if(boundary[0][0] == 2) boxlo[0] = lo[0] - small[0];
+        else if(boundary[0][0] == 3) boxlo[0] = MIN(lo[0] - small[0], minxlo);
+
+        if(boundary[0][1] == 2) boxhi[0] = hi[0] + small[0];
+        else if(boundary[0][1] == 3) boxhi[0] = MAX(hi[0] + small[0], minxhi);
+
+        if(boxlo[0] > boxhi[0]) error->all(FLERR, "Illegal simulation box");
+      }
+
+      if(yperiodic == 0) {
+        lo[0] = 0.0;
+        lo[1] = -all[1][0];
+        lo[2] = 0.0;
+        Domain::lamda2x(lo, lo);
+        hi[0] = 0.0;
+        hi[1] = all[1][1];
+        hi[2] = 0.0;
+        Domain::lamda2x(hi, hi);
+
+        if(boundary[1][0] == 2) boxlo[1] = lo[1] - small[1];
+        else if(boundary[1][0] == 3) boxlo[1] = MIN(lo[1] - small[1], minylo);
+
+        if(boundary[1][1] == 2) boxhi[1] = hi[1] + small[1];
+        else if(boundary[1][1] == 3) boxhi[1] = MAX(hi[1] + small[1], minyhi);
+
+        if(boxlo[1] > boxhi[1]) error->all(FLERR, "Illegal simulation box");
+
+        //xy *= (boxhi[1]-boxlo[1]) / yprd;
+      }
+
+      if(zperiodic == 0) {
+        lo[0] = 0.0;
+        lo[1] = 0.0;
+        lo[2] = -all[2][0];
+        Domain::lamda2x(lo, lo);
+        hi[0] = 0.0;
+        hi[1] = 0.0;
+        hi[2] = all[2][1];
+        Domain::lamda2x(hi, hi);
+
+        if(boundary[2][0] == 2) boxlo[2] = lo[2] - small[2];
+        else if(boundary[2][0] == 3) boxlo[2] = MIN(lo[2] - small[2], minzlo);
+
+        if(boundary[2][1] == 2) boxhi[2] = hi[2] + small[2];
+        else if(boundary[2][1] == 3) boxhi[2] = MAX(hi[2] + small[2], minzhi);
+
+        if(boxlo[2] > boxhi[2]) error->all(FLERR, "Illegal simulation box");
+
+        //xz *= (boxhi[2]-boxlo[2]) / xprd;
+        //yz *= (boxhi[2]-boxlo[2]) / yprd;
+      }
    }
  }

  set_global_box();
  set_local_box();

-  if(not cuda->finished_run)
-  {
+  if(not cuda->finished_run) {
    cuda->setDomainParams();
    Cuda_Domain_Init(&cuda->shared_data);
  }
@ -208,7 +280,7 @@ void DomainCuda::reset_box()
  // if shrink-wrapped, convert to lamda coords for new box
  // must re-invoke pbc() b/c x2lamda result can be outside 0,1 due to roundoff

-  if (nonperiodic == 2 && triclinic) {
+  if(nonperiodic == 2 && triclinic) {
    x2lamda(atom->nlocal);
    pbc();
  }
@ -227,10 +299,9 @@ void DomainCuda::reset_box()

 void DomainCuda::pbc()
 {
-  if(cuda->finished_setup&&(!cuda->oncpu))
-  {
-          cuda->setDomainParams();
-    Cuda_Domain_PBC(&cuda->shared_data, deform_vremap, deform_groupbit,cuda->extent);
+  if(cuda->finished_setup && (!cuda->oncpu)) {
+    cuda->setDomainParams();
+    Cuda_Domain_PBC(&cuda->shared_data, deform_vremap, deform_groupbit, cuda->extent);
    return;
  }

@ -245,9 +316,8 @@ void DomainCuda::pbc()

 void DomainCuda::lamda2x(int n)
 {
-  if(cuda->finished_setup&&(!cuda->oncpu))
-  {
-    Cuda_Domain_lamda2x(&cuda->shared_data,n);
+  if(cuda->finished_setup && (!cuda->oncpu)) {
+    Cuda_Domain_lamda2x(&cuda->shared_data, n);
    return;
  }

@ -261,9 +331,8 @@ void DomainCuda::lamda2x(int n)

 void DomainCuda::x2lamda(int n)
 {
-  if(cuda->finished_setup&&(!cuda->oncpu))
-  {
-    Cuda_Domain_x2lamda(&cuda->shared_data,n);
+  if(cuda->finished_setup && (!cuda->oncpu)) {
+    Cuda_Domain_x2lamda(&cuda->shared_data, n);
    return;
  }

--- a/src/USER-CUDA/fix_aveforce_cuda.cpp
+++ b/src/USER-CUDA/fix_aveforce_cuda.cpp
@ -34,11 +34,17 @@
 #include "domain.h"
 #include "cuda.h"
 #include "cuda_modify_flags.h"
+#include "variable.h"
+#include "input.h"
+#include "modify.h"
+

 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace FixConstCuda;

+enum{NONE,CONSTANT,EQUAL};
+
 /* ---------------------------------------------------------------------- */

 FixAveForceCuda::FixAveForceCuda(LAMMPS *lmp, int narg, char **arg) :
@ -55,13 +61,39 @@ FixAveForceCuda::FixAveForceCuda(LAMMPS *lmp, int narg, char **arg) :
  global_freq = 1;
  extvector = 1;

-  xflag = yflag = zflag = 1;
-  if (strcmp(arg[3],"NULL") == 0) xflag = 0;
-  else xvalue = atof(arg[3]);
-  if (strcmp(arg[4],"NULL") == 0) yflag = 0;
-  else yvalue = atof(arg[4]);
-  if (strcmp(arg[5],"NULL") == 0) zflag = 0;
-  else zvalue = atof(arg[5]);
+  xstr = ystr = zstr = NULL;
+  xvalue = yvalue = zvalue = 0;
+
+  if (strstr(arg[3],"v_") == arg[3]) {
+    int n = strlen(&arg[3][2]) + 1;
+    xstr = new char[n];
+    strcpy(xstr,&arg[3][2]);
+  } else if (strcmp(arg[3],"NULL") == 0) {
+    xstyle = NONE;
+  } else {
+    xvalue = atof(arg[3]);
+    xstyle = CONSTANT;
+  }
+  if (strstr(arg[4],"v_") == arg[4]) {
+    int n = strlen(&arg[4][2]) + 1;
+    ystr = new char[n];
+    strcpy(ystr,&arg[4][2]);
+  } else if (strcmp(arg[4],"NULL") == 0) {
+    ystyle = NONE;
+  } else {
+    yvalue = atof(arg[4]);
+    ystyle = CONSTANT;
+  }
+  if (strstr(arg[5],"v_") == arg[5]) {
+    int n = strlen(&arg[5][2]) + 1;
+    zstr = new char[n];
+    strcpy(zstr,&arg[5][2]);
+  } else if (strcmp(arg[5],"NULL") == 0) {
+    zstyle = NONE;
+  } else {
+    zvalue = atof(arg[5]);
+    zstyle = CONSTANT;
+  }

  // optional args

@ -86,6 +118,13 @@ FixAveForceCuda::FixAveForceCuda(LAMMPS *lmp, int narg, char **arg) :

 }

+FixAveForceCuda::~FixAveForceCuda()
+{
+  delete [] xstr;
+  delete [] ystr;
+  delete [] zstr;
+}
+
 /* ---------------------------------------------------------------------- */

 int FixAveForceCuda::setmask()
@ -103,8 +142,31 @@ void FixAveForceCuda::init()
 {
  if(not cu_foriginal)
  cu_foriginal = new cCudaData<double, F_FLOAT, x> (foriginal,4);
-  if (strstr(update->integrate_style,"respa"))
-    nlevels_respa = ((Respa *) update->integrate)->nlevels;
+
+  if (xstr) {
+    xvar = input->variable->find(xstr);
+    if (xvar < 0)
+      error->all(FLERR,"Variable name for fix aveforce does not exist");
+    if (input->variable->equalstyle(xvar)) xstyle = EQUAL;
+    else error->all(FLERR,"Variable for fix aveforce is invalid style");
+  }
+  if (ystr) {
+    yvar = input->variable->find(ystr);
+    if (yvar < 0)
+      error->all(FLERR,"Variable name for fix aveforce does not exist");
+    if (input->variable->equalstyle(yvar)) ystyle = EQUAL;
+    else error->all(FLERR,"Variable for fix aveforce is invalid style");
+  }
+  if (zstr) {
+    zvar = input->variable->find(zstr);
+    if (zvar < 0)
+      error->all(FLERR,"Variable name for fix aveforce does not exist");
+    if (input->variable->equalstyle(zvar)) zstyle = EQUAL;
+    else error->all(FLERR,"Variable for fix aveforce is invalid style");
+  }
+
+  if (xstyle == EQUAL || ystyle == EQUAL || zstyle == EQUAL) varflag = EQUAL;
+  else varflag = CONSTANT;

  // ncount = total # of atoms in group

@ -126,13 +188,6 @@ void FixAveForceCuda::setup(int vflag)
  }
  else
  {
-    cuda->cu_f->download();
-    for (int ilevel = 0; ilevel < nlevels_respa; ilevel++) {
-      ((Respa *) update->integrate)->copy_flevel_f(ilevel);
-      post_force_respa(vflag,ilevel,0);
-      ((Respa *) update->integrate)->copy_f_flevel(ilevel);
-    }
-    cuda->cu_f->upload();
  }
 }

@ -159,6 +214,21 @@ void FixAveForceCuda::post_force(int vflag)
  MPI_Allreduce(foriginal,foriginal_all,4,MPI_DOUBLE,MPI_SUM,world);
  int ncount = static_cast<int> (foriginal_all[3]);
  if (ncount == 0) return;
+
+  if (varflag == EQUAL) {
+    unsigned int datamask = EMPTY_MASK;
+    if (xstyle == EQUAL) datamask &= input->variable->data_mask(xstr);
+    if (ystyle == EQUAL) datamask &= input->variable->data_mask(ystr);
+    if (zstyle == EQUAL) datamask &= input->variable->data_mask(zstr);
+
+    cuda->download(datamask);
+    modify->clearstep_compute();
+    if (xstyle == EQUAL) xvalue = input->variable->compute_equal(xvar);
+    if (ystyle == EQUAL) yvalue = input->variable->compute_equal(yvar);
+    if (zstyle == EQUAL) zvalue = input->variable->compute_equal(zvar);
+    modify->addstep_compute(update->ntimestep + 1);
+  }
+
  double fave[3];
  fave[0] = foriginal_all[0]/ncount + xvalue;
  fave[1] = foriginal_all[1]/ncount + yvalue;
@ -167,51 +237,14 @@ void FixAveForceCuda::post_force(int vflag)
  // set force of all participating atoms to same value
  // only for active dimensions

-  Cuda_FixAveForceCuda_PostForce_Set(&cuda->shared_data, groupbit,xflag,yflag,zflag,fave[0],fave[1],fave[2]);
+  Cuda_FixAveForceCuda_PostForce_Set(&cuda->shared_data, groupbit,!(xstyle==NONE),!(ystyle==NONE),!(zstyle==NONE),fave[0],fave[1],fave[2]);
 }

 /* ---------------------------------------------------------------------- */

 void FixAveForceCuda::post_force_respa(int vflag, int ilevel, int iloop)
 {
-  // ave + extra force on outermost level
-  // just ave on inner levels
-  if (ilevel == nlevels_respa-1) post_force(vflag);
-  else {
-    cuda->cu_f->download();
-    cuda->cu_mask->download();
-    double **f = atom->f;
-    int *mask = atom->mask;
-    int nlocal = atom->nlocal;

-    double foriginal[4];
-    foriginal[0] = foriginal[1] = foriginal[2] = foriginal[3] = 0.0;
-
-    for (int i = 0; i < nlocal; i++)
-      if (mask[i] & groupbit) {
-        foriginal[0] += f[i][0];
-        foriginal[1] += f[i][1];
-        foriginal[2] += f[i][2];
-        foriginal[3] += 1;
-
-      }
-
-    MPI_Allreduce(foriginal,foriginal_all,4,MPI_DOUBLE,MPI_SUM,world);
-    int ncount = static_cast<int> (foriginal_all[3]);
-    if (ncount == 0) return;
-    double fave[3];
-    fave[0] = foriginal_all[0]/ncount;
-    fave[1] = foriginal_all[1]/ncount;
-    fave[2] = foriginal_all[2]/ncount;
-
-    for (int i = 0; i < nlocal; i++)
-      if (mask[i] & groupbit) {
-        if (xflag) f[i][0] = fave[0];
-        if (yflag) f[i][1] = fave[1];
-        if (zflag) f[i][2] = fave[2];
-      }
-    cuda->cu_f->upload();
-  }
 }

 /* ---------------------------------------------------------------------- */
--- a/src/USER-CUDA/fix_aveforce_cuda.h
+++ b/src/USER-CUDA/fix_aveforce_cuda.h
@ -39,6 +39,7 @@ namespace LAMMPS_NS {
 class FixAveForceCuda : public Fix {
 public:
  FixAveForceCuda(class LAMMPS *, int, char **);
+  ~FixAveForceCuda();
  int setmask();
  void init();
  void setup(int);
@ -50,12 +51,16 @@ class FixAveForceCuda : public Fix {

 private:
  class Cuda *cuda;
-  int xflag,yflag,zflag,iregion;
+  char *xstr,*ystr,*zstr;
+  int iregion;
  double xvalue,yvalue,zvalue;
  double foriginal_all[4];
  double foriginal[4];
  cCudaData<double     , F_FLOAT                   , x>* cu_foriginal;
  int nlevels_respa;
+  int varflag;
+  int xvar,yvar,zvar,xstyle,ystyle,zstyle;
+
 };

 }
--- a/src/USER-CUDA/pair_eam_cuda.cpp
+++ b/src/USER-CUDA/pair_eam_cuda.cpp
@ -64,22 +64,23 @@ using namespace LAMMPS_NS;

 /* ---------------------------------------------------------------------- */

-PairEAMCuda::PairEAMCuda(LAMMPS *lmp) : PairEAM(lmp)
+PairEAMCuda::PairEAMCuda(LAMMPS* lmp) : PairEAM(lmp)
 {
  cuda = lmp->cuda;
-   if(cuda == NULL)
-        error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");

-        allocated2 = false;
-        cuda->shared_data.pair.cudable_force = 1;
-        cuda->shared_data.pair.override_block_per_atom = 0;
+  if(cuda == NULL)
+    error->all(FLERR, "You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");

-        cuda->setSystemParams();
-        cu_rho=NULL;
-        cu_fp=NULL;
-    cu_frho_spline = NULL;
-    cu_z2r_spline = NULL;
-    cu_rhor_spline = NULL;
+  allocated2 = false;
+  cuda->shared_data.pair.cudable_force = 1;
+  cuda->shared_data.pair.override_block_per_atom = 0;
+
+  cuda->setSystemParams();
+  cu_rho = NULL;
+  cu_fp = NULL;
+  cu_frho_spline = NULL;
+  cu_z2r_spline = NULL;
+  cu_rhor_spline = NULL;
 }

 /* ----------------------------------------------------------------------
@ -88,161 +89,176 @@ PairEAMCuda::PairEAMCuda(LAMMPS *lmp) : PairEAM(lmp)

 void PairEAMCuda::allocate()
 {
-        if(! allocated) PairEAM::allocate();
-                cuda->shared_data.pair.cutsq     = cutsq;
-        cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
+  if(! allocated) PairEAM::allocate();
+
+  cuda->shared_data.pair.cutsq     = cutsq;
+  cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
 }

 /* ---------------------------------------------------------------------- */

 void PairEAMCuda::compute(int eflag, int vflag)
 {
-        cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
-    cuda->shared_data.pair.use_block_per_atom = 0;
-    cuda->shared_data.pair.collect_forces_later = 0;
-    if (atom->nmax > nmax) {
+  cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
+  cuda->shared_data.pair.use_block_per_atom = 0;
+  cuda->shared_data.pair.collect_forces_later = 0;
+
+  if(atom->nmax > nmax || cuda->finished_setup == false) {
    memory->destroy(rho);
    memory->destroy(fp);
    nmax = atom->nmax;
-    memory->create(rho,nmax,"pair:rho");
-    memory->create(fp,nmax,"pair:fp");
-        delete cu_rho;
-        delete cu_fp;
-        cu_rho = new cCudaData<double, F_FLOAT, x> (rho, atom->nmax);
-        cu_fp  = new cCudaData<double, F_FLOAT, x> (fp, atom->nmax);
-        Cuda_PairEAMCuda_Init(&cuda->shared_data,rdr,rdrho,nfrho,nrhor,nr,nrho,nz2r,
-                cu_frho_spline->dev_data(),cu_rhor_spline->dev_data(),cu_z2r_spline->dev_data(),
-                cu_rho->dev_data(),cu_fp->dev_data(),type2frho,type2z2r,type2rhor);
-          }
+    memory->create(rho, nmax, "pair:rho");
+    memory->create(fp, nmax, "pair:fp");
+    delete cu_rho;
+    delete cu_fp;
+    cu_rho = new cCudaData<double, F_FLOAT, x> (rho, atom->nmax);
+    cu_fp  = new cCudaData<double, F_FLOAT, x> (fp, atom->nmax);
+    Cuda_PairEAMCuda_Init(&cuda->shared_data, rdr, rdrho, nfrho, nrhor, nr, nrho, nz2r,
+                          cu_frho_spline->dev_data(), cu_rhor_spline->dev_data(), cu_z2r_spline->dev_data(),
+                          cu_rho->dev_data(), cu_fp->dev_data(), type2frho, type2z2r, type2rhor);
+  }



-        if(eflag || vflag) ev_setup(eflag,vflag);
-        if(eflag) cuda->cu_eng_vdwl->upload();
-        if(vflag) cuda->cu_virial->upload();
+  if(eflag || vflag) ev_setup(eflag, vflag);

-        Cuda_PairEAM1Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag,eflag_atom,vflag_atom);
-    comm->forward_comm_pair(this);
-        Cuda_PairEAM2Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag,eflag_atom,vflag_atom);
+  if(eflag) cuda->cu_eng_vdwl->upload();

-        if(eflag) cuda->cu_eng_vdwl->download();
-        if(vflag) cuda->cu_virial->download();
+  if(vflag) cuda->cu_virial->upload();
+
+  Cuda_PairEAM1Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
+
+  comm->forward_comm_pair(this);
+
+  Cuda_PairEAM2Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
+
+  if(eflag) cuda->cu_eng_vdwl->download();
+
+  if(vflag) cuda->cu_virial->download();
 }

 /* ---------------------------------------------------------------------- */

-void PairEAMCuda::settings(int narg, char **arg)
+void PairEAMCuda::settings(int narg, char** arg)
 {
-        PairEAM::settings(narg, arg);
-        cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
+  PairEAM::settings(narg, arg);
+  cuda->shared_data.pair.cut_global = (F_FLOAT) cutforcesq;
 }

 /* ---------------------------------------------------------------------- */

-void PairEAMCuda::coeff(int narg, char **arg)
+void PairEAMCuda::coeff(int narg, char** arg)
 {
-        PairEAM::coeff(narg, arg);
-        allocate();
+  PairEAM::coeff(narg, arg);
+  allocate();
 }

 void PairEAMCuda::init_style()
 {
-        MYDBG(printf("# CUDA PairEAMCuda::init_style start\n"); )
+  MYDBG(printf("# CUDA PairEAMCuda::init_style start\n");)
  // request regular or rRESPA neighbor lists
  file2array();
  array2spline();
  int irequest;


-          irequest = neighbor->request(this);
-    neighbor->requests[irequest]->full = 1;
-    neighbor->requests[irequest]->half = 0;
-    neighbor->requests[irequest]->cudable = 1;
+  irequest = neighbor->request(this);
+  neighbor->requests[irequest]->full = 1;
+  neighbor->requests[irequest]->half = 0;
+  neighbor->requests[irequest]->cudable = 1;

-        delete cu_rhor_spline;
-        delete cu_z2r_spline;
-        delete cu_frho_spline;
+  delete cu_rhor_spline;
+  delete cu_z2r_spline;
+  delete cu_frho_spline;

-        cu_rhor_spline = new cCudaData<double, F_FLOAT, xyz>((double*)rhor_spline,nrhor,nr+1,EAM_COEFF_LENGTH);
-        cu_z2r_spline = new cCudaData<double, F_FLOAT, xyz>((double*)z2r_spline,nz2r,nr+1,EAM_COEFF_LENGTH);
-        cu_frho_spline = new cCudaData<double, F_FLOAT, xyz>((double*)frho_spline,nfrho,nrho+1,EAM_COEFF_LENGTH);
+  cu_rhor_spline = new cCudaData<double, F_FLOAT, xyz>((double*)rhor_spline, nrhor, nr + 1, EAM_COEFF_LENGTH);
+  cu_z2r_spline = new cCudaData<double, F_FLOAT, xyz>((double*)z2r_spline, nz2r, nr + 1, EAM_COEFF_LENGTH);
+  cu_frho_spline = new cCudaData<double, F_FLOAT, xyz>((double*)frho_spline, nfrho, nrho + 1, EAM_COEFF_LENGTH);

-        cu_rhor_spline->upload();
-        cu_z2r_spline->upload();
-        cu_frho_spline->upload();
+  cu_rhor_spline->upload();
+  cu_z2r_spline->upload();
+  cu_frho_spline->upload();

-  MYDBG(printf("# CUDA PairEAMCuda::init_style end\n"); )
+  MYDBG(printf("# CUDA PairEAMCuda::init_style end\n");)
 }

-void PairEAMCuda::init_list(int id, NeighList *ptr)
+void PairEAMCuda::init_list(int id, NeighList* ptr)
 {
-        MYDBG(printf("# CUDA PairEAMCuda::init_list\n");)
-        PairEAM::init_list(id, ptr);
-        #ifndef CUDA_USE_BINNING
-        // right now we can only handle verlet (id 0), not respa
-        if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
-        // see Neighbor::init() for details on lammps lists' logic
-        #endif
-        MYDBG(printf("# CUDA PairEAMCuda::init_list end\n");)
+  MYDBG(printf("# CUDA PairEAMCuda::init_list\n");)
+  PairEAM::init_list(id, ptr);
+
+  // right now we can only handle verlet (id 0), not respa
+  if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
+
+  // see Neighbor::init() for details on lammps lists' logic
+  MYDBG(printf("# CUDA PairEAMCuda::init_list end\n");)
 }

 void PairEAMCuda::array2spline()
 {
-  rdr = 1.0/dr;
-  rdrho = 1.0/drho;
+  rdr = 1.0 / dr;
+  rdrho = 1.0 / drho;

  memory->destroy(frho_spline);
  memory->destroy(rhor_spline);
  memory->destroy(z2r_spline);

-  memory->create(frho_spline,nfrho,nrho+1,8,"pair:frho");
-  memory->create(rhor_spline,nrhor,nr+1,8,"pair:rhor");
-  memory->create(z2r_spline,nz2r,nr+1,8,"pair:z2r");
+  memory->create(frho_spline, nfrho, nrho + 1, 8, "pair:frho");
+  memory->create(rhor_spline, nrhor, nr + 1, 8, "pair:rhor");
+  memory->create(z2r_spline, nz2r, nr + 1, 8, "pair:z2r");

-  for (int i = 0; i < nfrho; i++){
-    interpolate(nrho,drho,frho[i],frho_spline[i]);
-    for(int j=0;j<nrho+1;j++)
-      frho_spline[i][j][7]=frho_spline[i][j][3];
+  for(int i = 0; i < nfrho; i++) {
+    interpolate(nrho, drho, frho[i], frho_spline[i]);
+
+    for(int j = 0; j < nrho + 1; j++)
+      frho_spline[i][j][7] = frho_spline[i][j][3];
  }

-  for (int i = 0; i < nrhor; i++){
-    interpolate(nr,dr,rhor[i],rhor_spline[i]);
-    for(int j=0;j<nr+1;j++)
-      rhor_spline[i][j][7]=rhor_spline[i][j][3];
+  for(int i = 0; i < nrhor; i++) {
+    interpolate(nr, dr, rhor[i], rhor_spline[i]);
+
+    for(int j = 0; j < nr + 1; j++)
+      rhor_spline[i][j][7] = rhor_spline[i][j][3];
  }

-  for (int i = 0; i < nz2r; i++){
-    interpolate(nr,dr,z2r[i],z2r_spline[i]);
-    for(int j=0;j<nr+1;j++)
-      z2r_spline[i][j][7]=z2r_spline[i][j][3];
+  for(int i = 0; i < nz2r; i++) {
+    interpolate(nr, dr, z2r[i], z2r_spline[i]);
+
+    for(int j = 0; j < nr + 1; j++)
+      z2r_spline[i][j][7] = z2r_spline[i][j][3];
  }
 }

 /* ---------------------------------------------------------------------- */

-int PairEAMCuda::pack_comm(int n, int *iswap, double *buf, int pbc_flag, int *pbc)
+int PairEAMCuda::pack_comm(int n, int* iswap, double* buf, int pbc_flag, int* pbc)
 {
-  Cuda_PairEAMCuda_PackComm(&cuda->shared_data,n,*iswap,buf);
-  if(sizeof(F_FLOAT)<sizeof(double)) return 1;
+  Cuda_PairEAMCuda_PackComm(&cuda->shared_data, n, *iswap, buf);
+
+  if(sizeof(F_FLOAT) < sizeof(double)) return 1;
  else return 1;
 }

 /* ---------------------------------------------------------------------- */

-void PairEAMCuda::unpack_comm(int n, int first, double *buf)
+void PairEAMCuda::unpack_comm(int n, int first, double* buf)
 {
-  Cuda_PairEAMCuda_UnpackComm(&cuda->shared_data,n,first,buf,cu_fp->dev_data());
+  Cuda_PairEAMCuda_UnpackComm(&cuda->shared_data, n, first, buf, cu_fp->dev_data());
 }

 void PairEAMCuda::ev_setup(int eflag, int vflag)
 {
-  int maxeatomold=maxeatom;
-  PairEAM::ev_setup(eflag,vflag);
+  int maxeatomold = maxeatom;
+  PairEAM::ev_setup(eflag, vflag);

-  if (eflag_atom && atom->nmax > maxeatomold)
-        {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
+  if(eflag_atom && atom->nmax > maxeatomold) {
+    delete cuda->cu_eatom;
+    cuda->cu_eatom = new cCudaData<double, ENERGY_FLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax);
+  }

-  if (vflag_atom && atom->nmax > maxeatomold)
-        {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
+  if(vflag_atom && atom->nmax > maxeatomold) {
+    delete cuda->cu_vatom;
+    cuda->cu_vatom = new cCudaData<double, ENERGY_FLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6);
+  }

 }
--- a/src/USER-CUDA/verlet_cuda.cpp
+++ b/src/USER-CUDA/verlet_cuda.cpp
@ -93,6 +93,7 @@ void VerletCuda::setup()

  cuda->oncpu = true;
  cuda->begin_setup = true;
+  cuda->finished_setup = false;
  cuda->finished_run = false;

  time_pair = 0;
@ -122,7 +123,6 @@ void VerletCuda::setup()

  cuda->allocate();

-
  if(comm->me == 0 && screen) fprintf(screen, "Setting up run ...\n");

  // setup domain, communication and neighboring
@ -395,7 +395,7 @@ void VerletCuda::setup()
 void VerletCuda::setup_minimal(int flag)
 {

-
+  printf("SetupMinimal\n");
  dotestatom = 0;
  int testatom = 104;
  cuda->oncpu = true;
@ -828,7 +828,6 @@ void VerletCuda::run(int n)
        endtime.tv_sec - starttime.tv_sec + 1.0 * (endtime.tv_nsec - starttime.tv_nsec) / 1000000000;

      clock_gettime(CLOCK_REALTIME, &starttime);
-
      //atom index maps are generated on CPU, and need to be transfered to GPU if they are used
      if(cuda->cu_map_array)
        cuda->cu_map_array->upload();
@ -949,7 +948,8 @@ void VerletCuda::run(int n)
      timer->stamp(TIME_PAIR);

      if(neighbor->lastcall == update->ntimestep) {
-        neighbor->build_topology();
+        neighbor->build_bonded();
+
        timer->stamp(TIME_NEIGHBOR);
      }

--- a/src/angle.cpp
+++ b/src/angle.cpp
@ -17,9 +17,10 @@
 #include "comm.h"
 #include "force.h"
 #include "math_const.h"
+#include "suffix.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
-#include "suffix.h"

 using namespace LAMMPS_NS;
 using namespace MathConst;
@ -36,6 +37,9 @@ Angle::Angle(LAMMPS *lmp) : Pointers(lmp)
  maxeatom = maxvatom = 0;
  eatom = NULL;
  vatom = NULL;
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }

 /* ---------------------------------------------------------------------- */
--- a/src/angle.h
+++ b/src/angle.h
@ -28,6 +28,8 @@ class Angle : protected Pointers {
  double energy;                  // accumulated energies
  double virial[6];               // accumlated virial
  double *eatom,**vatom;          // accumulated per-atom energy/virial
+  unsigned int datamask;
+  unsigned int datamask_ext;

  Angle(class LAMMPS *);
  virtual ~Angle();
@ -42,6 +44,9 @@ class Angle : protected Pointers {
  virtual double single(int, int, int, int) = 0;
  virtual double memory_usage();

+  virtual unsigned int data_mask() {return datamask;}
+  virtual unsigned int data_mask_ext() {return datamask_ext;}
+
 protected:
  int suffix_flag;             // suffix compatibility flag

--- a/src/atom.cpp
+++ b/src/atom.cpp
@ -32,6 +32,7 @@
 #include "domain.h"
 #include "group.h"
 #include "accelerator_cuda.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"

@ -148,6 +149,9 @@ Atom::Atom(LAMMPS *lmp) : Pointers(lmp)
  atom_style = NULL;
  avec = NULL;
  create_avec("atomic",0,NULL,lmp->suffix);
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }

 /* ---------------------------------------------------------------------- */
--- a/src/atom.h
+++ b/src/atom.h
@ -83,6 +83,9 @@ class Atom : protected Pointers {
  int **improper_type;
  int **improper_atom1,**improper_atom2,**improper_atom3,**improper_atom4;

+  unsigned int datamask;
+  unsigned int datamask_ext;
+
  // atom style and per-atom array existence flags
  // customize by adding new flag

--- a/src/atom_masks.h
+++ b/src/atom_masks.h
@ -0,0 +1,87 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_ATOM_MASK_H
+#define LMP_ATOM_MASK_H
+
+// per-atom data masks
+
+#define EMPTY_MASK     0x00000000
+#define ALL_MASK       0xffffffff
+#define SAMETAG_MASK   0x01000000
+#define EXTENDED_MASK  0x80000000
+
+// standard
+
+#define X_MASK         0x00000001
+#define V_MASK         0x00000002
+#define F_MASK         0x00000004
+#define TAG_MASK       0x00000008
+#define TYPE_MASK      0x00000010
+#define MASK_MASK      0x00000020
+#define IMAGE_MASK     0x00000040
+#define Q_MASK         0x00000080
+#define MOLECULE_MASK  0x00000100
+#define RMASS_MASK     0x00000200
+#define BOND_MASK      0x00000400
+#define ANGLE_MASK     0x00000800
+#define DIHEDRAL_MASK  0x00001000
+#define IMPROPER_MASK  0x00002000
+#define SPECIAL_MASK   0x00004000
+#define MAP_MASK       0x00008000
+
+// granular
+
+#define RADIUS_MASK    0x00010000
+#define DENSITY_MASK   0x00020000
+#define OMEGA_MASK     0x00040000
+#define TORQUE_MASK    0x00080000
+#define ANGMOM_MASK    0x00100000
+#define GRANULAR_MASK  0x001f0000
+
+// peridynamics
+
+#define VFRAC_MASK     0x00200000
+#define S0_MASK        0x00400000
+#define X0_MASK        0x00800000
+#define PERI_MASK      0x00e00000
+
+#define ELLIPSOID_MASK 0x00000001
+#define LINE_MASK      0x00000002
+#define TRI_MASK       0x00000004
+
+// electron
+
+#define SPIN_MASK      0x00000010
+#define ERADIUS_MASK   0x00000020
+#define ERVEL_MASK     0x00000040
+#define ERFORCE_MASK   0x00000080
+#define ERVELFORCE_MASK 0x00000100
+
+#define CS_MASK        0x00000200
+#define CSFORCE_MASK   0x00000400
+#define VFORCE_MASK    0x00000800
+
+#define ELECTRON_MASK  0x00000f00
+
+// SPH
+
+#define ETAG_MASK      0x00001000
+#define RHO_MASK       0x00002000
+#define DRHO_MASK      0x00004000
+#define E_MASK         0x00008000
+#define DE_MASK        0x00010000
+#define VEST_MASK      0x00020000
+#define CV_MASK        0x00040000
+
+#endif
--- a/src/bond.cpp
+++ b/src/bond.cpp
@ -16,9 +16,10 @@
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
+#include "suffix.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
-#include "suffix.h"

 using namespace LAMMPS_NS;

@ -37,6 +38,9 @@ Bond::Bond(LAMMPS *lmp) : Pointers(lmp)
  maxeatom = maxvatom = 0;
  eatom = NULL;
  vatom = NULL;
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }

 /* ---------------------------------------------------------------------- */
--- a/src/bond.h
+++ b/src/bond.h
@ -28,6 +28,8 @@ class Bond : protected Pointers {
  double energy;                  // accumulated energies
  double virial[6];               // accumlated virial
  double *eatom,**vatom;          // accumulated per-atom energy/virial
+  unsigned int datamask;
+  unsigned int datamask_ext;

  Bond(class LAMMPS *);
  virtual ~Bond();
@ -42,6 +44,9 @@ class Bond : protected Pointers {
  virtual double single(int, double, int, int) = 0;
  virtual double memory_usage();

+  virtual unsigned int data_mask() {return datamask;}
+  virtual unsigned int data_mask_ext() {return datamask_ext;}
+
 protected:
  int suffix_flag;             // suffix compatibility flag

--- a/src/compute.cpp
+++ b/src/compute.cpp
@ -21,6 +21,7 @@
 #include "domain.h"
 #include "comm.h"
 #include "group.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"

@ -84,6 +85,9 @@ Compute::Compute(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
  // setup map for molecule IDs

  molmap = NULL;
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }

 /* ---------------------------------------------------------------------- */
--- a/src/compute.h
+++ b/src/compute.h
@ -20,6 +20,7 @@ namespace LAMMPS_NS {

 class Compute : protected Pointers {
 public:
+
  char *id,*style;
  int igroup,groupbit;

@ -77,6 +78,9 @@ class Compute : protected Pointers {
  int comm_forward;   // size of forward communication (0 if none)
  int comm_reverse;   // size of reverse communication (0 if none)

+  unsigned int datamask;
+  unsigned int datamask_ext;
+
  int cudable;        // 1 if compute is CUDA-enabled

  Compute(class LAMMPS *, int, char **);
@ -111,6 +115,9 @@ class Compute : protected Pointers {

  virtual double memory_usage() {return 0.0;}

+  virtual int unsigned data_mask() {return datamask;}
+  virtual int unsigned data_mask_ext() {return datamask_ext;}
+
 protected:
  int extra_dof;               // extra DOF for temperature computes
  int dynamic;                 // recount atoms for temperature computes
--- a/src/dihedral.cpp
+++ b/src/dihedral.cpp
@ -17,9 +17,10 @@
 #include "comm.h"
 #include "force.h"
 #include "pair.h"
+#include "suffix.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
-#include "suffix.h"

 using namespace LAMMPS_NS;

@ -37,6 +38,9 @@ Dihedral::Dihedral(LAMMPS *lmp) : Pointers(lmp)
  maxeatom = maxvatom = 0;
  eatom = NULL;
  vatom = NULL;
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }

 /* ---------------------------------------------------------------------- */
--- a/src/dihedral.h
+++ b/src/dihedral.h
@ -28,6 +28,8 @@ class Dihedral : protected Pointers {
  double energy;                     // accumulated energy
  double virial[6];                  // accumlated virial
  double *eatom,**vatom;             // accumulated per-atom energy/virial
+  unsigned int datamask;
+  unsigned int datamask_ext;

  Dihedral(class LAMMPS *);
  virtual ~Dihedral();
@ -40,6 +42,9 @@ class Dihedral : protected Pointers {
  virtual void read_restart(FILE *) = 0;
  virtual double memory_usage();

+  virtual unsigned int data_mask() {return datamask;}
+  virtual unsigned int data_mask_ext() {return datamask_ext;}
+
 protected:
  int suffix_flag;             // suffix compatibility flag

--- a/src/domain.h
+++ b/src/domain.h
@ -118,8 +118,8 @@ class Domain : protected Pointers {

  virtual void lamda2x(int);
  virtual void x2lamda(int);
-  void lamda2x(double *, double *);
-  void x2lamda(double *, double *);
+  virtual void lamda2x(double *, double *);
+  virtual void x2lamda(double *, double *);
  void x2lamda(double *, double *, double *, double *);
  void bbox(double *, double *, double *, double *);
  void box_corners();
@ -135,7 +135,7 @@ class Domain : protected Pointers {
    return 0;
  }

- private:
+ protected:
  double small[3];                  // fractions of box lengths
 };

--- a/src/fix.cpp
+++ b/src/fix.cpp
@ -16,6 +16,7 @@
 #include "fix.h"
 #include "atom.h"
 #include "group.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"

@ -68,6 +69,9 @@ Fix::Fix(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)

  maxvatom = 0;
  vatom = NULL;
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }

 /* ---------------------------------------------------------------------- */
--- a/src/fix.h
+++ b/src/fix.h
@ -76,6 +76,9 @@ class Fix : protected Pointers {
  double virial[6];              // accumlated virial
  double **vatom;                // accumulated per-atom virial

+  unsigned int datamask;
+  unsigned int datamask_ext;
+
  Fix(class LAMMPS *, int, char **);
  virtual ~Fix();
  void modify_params(int, char **);
@ -157,6 +160,9 @@ class Fix : protected Pointers {

  virtual double memory_usage() {return 0.0;}

+  virtual unsigned int data_mask() {return datamask;}
+  virtual unsigned int data_mask_ext() {return datamask_ext;}
+
 protected:
  int evflag;
  int vflag_global,vflag_atom;
--- a/src/improper.cpp
+++ b/src/improper.cpp
@ -16,9 +16,10 @@
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
+#include "suffix.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
-#include "suffix.h"

 using namespace LAMMPS_NS;

@ -34,6 +35,9 @@ Improper::Improper(LAMMPS *lmp) : Pointers(lmp)
  maxeatom = maxvatom = 0;
  eatom = NULL;
  vatom = NULL;
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }

 /* ---------------------------------------------------------------------- */
--- a/src/improper.h
+++ b/src/improper.h
@ -28,6 +28,8 @@ class Improper : protected Pointers {
  double energy;                  // accumulated energies
  double virial[6];               // accumlated virial
  double *eatom,**vatom;          // accumulated per-atom energy/virial
+  unsigned int datamask;
+  unsigned int datamask_ext;

  Improper(class LAMMPS *);
  virtual ~Improper();
@ -39,6 +41,9 @@ class Improper : protected Pointers {
  virtual void read_restart(FILE *) = 0;
  virtual double memory_usage();

+  virtual unsigned int data_mask() {return datamask;}
+  virtual unsigned int data_mask_ext() {return datamask_ext;}
+
 protected:
  int suffix_flag;             // suffix compatibility flag

--- a/src/kspace.cpp
+++ b/src/kspace.cpp
@ -19,6 +19,7 @@
 #include "force.h"
 #include "pair.h"
 #include "memory.h"
+#include "atom_masks.h"
 #include "error.h"
 #include "suffix.h"

@ -50,6 +51,9 @@ KSpace::KSpace(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
  maxeatom = maxvatom = 0;
  eatom = NULL;
  vatom = NULL;
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }

 /* ---------------------------------------------------------------------- */
--- a/src/kspace.h
+++ b/src/kspace.h
@ -32,6 +32,9 @@ class KSpace : protected Pointers {
  int nx_pppm,ny_pppm,nz_pppm;
  int group_group_enable;         // 1 if style supports group/group calculation

+  unsigned int datamask;
+  unsigned int datamask_ext;
+
  int compute_flag;               // 0 if skip compute()

  KSpace(class LAMMPS *, int, char **);
--- a/src/pair.cpp
+++ b/src/pair.cpp
@ -31,9 +31,10 @@
 #include "force.h"
 #include "update.h"
 #include "accelerator_cuda.h"
+#include "suffix.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
-#include "suffix.h"

 using namespace LAMMPS_NS;

@ -77,6 +78,9 @@ Pair::Pair(LAMMPS *lmp) : Pointers(lmp)
  maxeatom = maxvatom = 0;
  eatom = NULL;
  vatom = NULL;
+
+  datamask = ALL_MASK;
+  datamask_ext = ALL_MASK;
 }

 /* ---------------------------------------------------------------------- */
--- a/src/pair.h
+++ b/src/pair.h
@ -74,6 +74,9 @@ class Pair : protected Pointers {
  class NeighList *listmiddle;
  class NeighList *listouter;

+  unsigned int datamask;
+  unsigned int datamask_ext;
+
  int compute_flag;              // 0 if skip compute()

  Pair(class LAMMPS *);
@ -140,6 +143,9 @@ class Pair : protected Pointers {
  virtual void min_xf_get(int) {}
  virtual void min_x_set(int) {}

+  virtual unsigned int data_mask() {return datamask;}
+  virtual unsigned int data_mask_ext() {return datamask_ext;}
+
 protected:
  enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER};   // mixing options

--- a/src/variable.cpp
+++ b/src/variable.cpp
@ -30,6 +30,7 @@
 #include "thermo.h"
 #include "random_mars.h"
 #include "math_const.h"
+#include "atom_masks.h"
 #include "memory.h"
 #include "error.h"

@ -78,6 +79,8 @@ Variable::Variable(LAMMPS *lmp) : Pointers(lmp)
  pad = NULL;
  data = NULL;

+  eval_in_progress = NULL;
+
  randomequal = NULL;
  randomatom = NULL;

@ -109,6 +112,8 @@ Variable::~Variable()
  memory->destroy(pad);
  memory->sfree(data);

+  memory->destroy(eval_in_progress);
+
  delete randomequal;
  delete randomatom;
 }
@ -135,7 +140,7 @@ void Variable::set(int narg, char **arg)
  } else if (strcmp(arg[1],"index") == 0) {
    if (narg < 3) error->all(FLERR,"Illegal variable command");
    if (find(arg[0]) >= 0) return;
-    if (nvar == maxvar) extend();
+    if (nvar == maxvar) grow();
    style[nvar] = INDEX;
    num[nvar] = narg - 2;
    which[nvar] = 0;
@ -149,7 +154,7 @@ void Variable::set(int narg, char **arg)

  } else if (strcmp(arg[1],"loop") == 0) {
    if (find(arg[0]) >= 0) return;
-    if (nvar == maxvar) extend();
+    if (nvar == maxvar) grow();
    style[nvar] = LOOP;
    int nfirst,nlast;
    if (narg == 3 || (narg == 4 && strcmp(arg[3],"pad") == 0)) {
@ -184,7 +189,7 @@ void Variable::set(int narg, char **arg)
  } else if (strcmp(arg[1],"world") == 0) {
    if (narg < 3) error->all(FLERR,"Illegal variable command");
    if (find(arg[0]) >= 0) return;
-    if (nvar == maxvar) extend();
+    if (nvar == maxvar) grow();
    style[nvar] = WORLD;
    num[nvar] = narg - 2;
    if (num[nvar] != universe->nworlds)
@ -205,7 +210,7 @@ void Variable::set(int narg, char **arg)
    if (strcmp(arg[1],"universe") == 0) {
      if (narg < 3) error->all(FLERR,"Illegal variable command");
      if (find(arg[0]) >= 0) return;
-      if (nvar == maxvar) extend();
+      if (nvar == maxvar) grow();
      style[nvar] = UNIVERSE;
      num[nvar] = narg - 2;
      pad[nvar] = 0;
@ -215,7 +220,7 @@ void Variable::set(int narg, char **arg)
      if (narg < 3 || narg > 4 || (narg == 4 && strcmp(arg[3],"pad") != 0))
        error->all(FLERR,"Illegal variable command");
      if (find(arg[0]) >= 0) return;
-      if (nvar == maxvar) extend();
+      if (nvar == maxvar) grow();
      style[nvar] = ULOOP;
      num[nvar] = atoi(arg[2]);
      data[nvar] = new char*[1];
@ -254,7 +259,7 @@ void Variable::set(int narg, char **arg)
        error->all(FLERR,"Cannot redefine variable as a different style");
      remove(find(arg[0]));
    }
-    if (nvar == maxvar) extend();
+    if (nvar == maxvar) grow();
    style[nvar] = STRING;
    num[nvar] = 1;
    which[nvar] = 0;
@ -274,7 +279,7 @@ void Variable::set(int narg, char **arg)
        error->all(FLERR,"Cannot redefine variable as a different style");
      remove(find(arg[0]));
    }
-    if (nvar == maxvar) extend();
+    if (nvar == maxvar) grow();
    style[nvar] = EQUAL;
    num[nvar] = 2;
    which[nvar] = 0;
@ -295,7 +300,7 @@ void Variable::set(int narg, char **arg)
        error->all(FLERR,"Cannot redefine variable as a different style");
      remove(find(arg[0]));
    }
-    if (nvar == maxvar) extend();
+    if (nvar == maxvar) grow();
    style[nvar] = ATOM;
    num[nvar] = 1;
    which[nvar] = 0;
@ -475,7 +480,13 @@ char *Variable::retrieve(char *name)

 double Variable::compute_equal(int ivar)
 {
-  return evaluate(data[ivar][0],NULL);
+  // eval_in_progress used to detect circle dependencies
+  // could extend this later to check v_a = c_b + v_a constructs?
+
+  eval_in_progress[ivar] = 1;
+  double value = evaluate(data[ivar][0],NULL);
+  eval_in_progress[ivar] = 0;
+  return value;
 }

 /* ----------------------------------------------------------------------
@ -573,17 +584,18 @@ void Variable::remove(int n)
  make space in arrays for new variable
 ------------------------------------------------------------------------- */

-void Variable::extend()
+void Variable::grow()
 {
  maxvar += VARDELTA;
  names = (char **)
-    memory->srealloc(names,maxvar*sizeof(char *),"var:names");
+  memory->srealloc(names,maxvar*sizeof(char *),"var:names");
  memory->grow(style,maxvar,"var:style");
  memory->grow(num,maxvar,"var:num");
  memory->grow(which,maxvar,"var:which");
  memory->grow(pad,maxvar,"var:pad");
-  data = (char ***)
-    memory->srealloc(data,maxvar*sizeof(char **),"var:data");
+  data = (char ***) memory->srealloc(data,maxvar*sizeof(char **),"var:data");
+  memory->grow(eval_in_progress,maxvar,"var:eval_in_progress");
+  for (int i = 0; i < maxvar; i++) eval_in_progress[i] = 0;
 }

 /* ----------------------------------------------------------------------
@ -1113,6 +1125,8 @@ double Variable::evaluate(char *str, Tree **tree)
        int ivar = find(id);
        if (ivar < 0)
          error->all(FLERR,"Invalid variable name in variable formula");
+        if (eval_in_progress[ivar])
+          error->all(FLERR,"Variable has circular dependency");

        // parse zero or one trailing brackets
        // point i beyond last bracket
@ -3426,3 +3440,79 @@ double Variable::evaluate_boolean(char *str)
  if (nargstack != 1) error->all(FLERR,"Invalid Boolean syntax in if command");
  return argstack[0];
 }
+
+/* ---------------------------------------------------------------------- */
+
+unsigned int Variable::data_mask(int ivar)
+{
+  if (eval_in_progress[ivar]) return EMPTY_MASK;
+  eval_in_progress[ivar] = 1;
+  unsigned int datamask = data_mask(data[ivar][0]);
+  eval_in_progress[ivar] = 0;
+  return datamask;
+}
+
+/* ---------------------------------------------------------------------- */
+
+unsigned int Variable::data_mask(char *str)
+{
+  unsigned int datamask = EMPTY_MASK;
+
+  for (unsigned int i=0; i < strlen(str)-2; i++) {
+    int istart = i;
+    while (isalnum(str[i]) || str[i] == '_') i++;
+    int istop = i-1;
+
+    int n = istop - istart + 1;
+    char *word = new char[n+1];
+    strncpy(word,&str[istart],n);
+    word[n] = '\0';
+
+    // ----------------
+    // compute
+    // ----------------
+
+    if ((strncmp(word,"c_",2) == 0) && (i>0) && (not isalnum(str[i-1]))){
+      if (domain->box_exist == 0)
+        error->all(FLERR,
+                   "Variable evaluation before simulation box is defined");
+
+      n = strlen(word) - 2 + 1;
+      char *id = new char[n];
+      strcpy(id,&word[2]);
+
+      int icompute = modify->find_compute(id);
+      if (icompute < 0) 
+        error->all(FLERR,"Invalid compute ID in variable formula");
+
+      datamask &= modify->compute[icompute]->data_mask();
+
+      delete [] id;
+    }
+
+    if ((strncmp(word,"f_",2) == 0) && (i>0) && (not isalnum(str[i-1]))) {
+        if (domain->box_exist == 0)
+          error->all(FLERR,
+                     "Variable evaluation before simulation box is defined");
+
+        n = strlen(word) - 2 + 1;
+        char *id = new char[n];
+        strcpy(id,&word[2]);
+
+        int ifix = modify->find_fix(id);
+        if (ifix < 0) error->all(FLERR,"Invalid fix ID in variable formula");
+
+        datamask &= modify->fix[ifix]->data_mask();
+        delete [] id;
+    }
+
+    if ((strncmp(word,"v_",2) == 0) && (i>0) && (not isalnum(str[i-1]))) {
+      int ivar = find(word);
+      datamask &= data_mask(ivar);
+    }
+
+    delete [] word;
+  }
+
+  return datamask;
+}
--- a/src/variable.h
+++ b/src/variable.h
@ -34,6 +34,9 @@ class Variable : protected Pointers {
  int int_between_brackets(char *&);
  double evaluate_boolean(char *);

+  unsigned int data_mask(int ivar);
+  unsigned int data_mask(char *str);
+
 private:
  int nvar;                // # of defined variables
  int maxvar;              // max # of variables arrays can hold
@ -43,6 +46,7 @@ class Variable : protected Pointers {
  int *which;              // next available value for each variable
  int *pad;                // 1 = pad loop/uloop variables with 0s, 0 = no pad
  char ***data;            // str value of each variable's values
+  int *eval_in_progress;   // flag if evaluation of variable is in progress

  class RanMars *randomequal;   // random number generator for equal-style vars
  class RanMars *randomatom;    // random number generator for atom-style vars
@ -62,7 +66,7 @@ class Variable : protected Pointers {
  };

  void remove(int);
-  void extend();
+  void grow();
  void copy(int, char **, char **);
  double evaluate(char *, Tree **);
  double collapse_tree(Tree *);