diff --git a/src/MOLECULE/atom_vec_angle.h b/src/MOLECULE/atom_vec_angle.h
index a9f35573ee..cfe83c716e 100644
--- a/src/MOLECULE/atom_vec_angle.h
+++ b/src/MOLECULE/atom_vec_angle.h
@@ -27,24 +27,24 @@ namespace LAMMPS_NS {
 class AtomVecAngle : public AtomVec {
  public:
   AtomVecAngle(class LAMMPS *, int, char **);
-  virtual ~AtomVecAngle() {}
+  ~AtomVecAngle() {}
   void grow(int);
   void grow_reset();
   void copy(int, int, int);
-  int pack_comm(int, int *, double *, int, int *);
-  int pack_comm_vel(int, int *, double *, int, int *);
-  void unpack_comm(int, int, double *);
-  void unpack_comm_vel(int, int, double *);
+  virtual int pack_comm(int, int *, double *, int, int *);
+  virtual int pack_comm_vel(int, int *, double *, int, int *);
+  virtual void unpack_comm(int, int, double *);
+  virtual void unpack_comm_vel(int, int, double *);
   int pack_reverse(int, int, double *);
   void unpack_reverse(int, int *, double *);
-  int pack_border(int, int *, double *, int, int *);
-  int pack_border_vel(int, int *, double *, int, int *);
+  virtual int pack_border(int, int *, double *, int, int *);
+  virtual int pack_border_vel(int, int *, double *, int, int *);
   int pack_border_hybrid(int, int *, double *);
-  void unpack_border(int, int, double *);
-  void unpack_border_vel(int, int, double *);
+  virtual void unpack_border(int, int, double *);
+  virtual void unpack_border_vel(int, int, double *);
   int unpack_border_hybrid(int, int, double *);
-  int pack_exchange(int, double *);
-  int unpack_exchange(double *);
+  virtual int pack_exchange(int, double *);
+  virtual int unpack_exchange(double *);
   int size_restart();
   int pack_restart(int, double *);
   int unpack_restart(double *);
diff --git a/src/MOLECULE/atom_vec_full.h b/src/MOLECULE/atom_vec_full.h
index de68deb86c..a457c04660 100644
--- a/src/MOLECULE/atom_vec_full.h
+++ b/src/MOLECULE/atom_vec_full.h
@@ -27,24 +27,24 @@ namespace LAMMPS_NS {
 class AtomVecFull : public AtomVec {
  public:
   AtomVecFull(class LAMMPS *, int, char **);
-  virtual ~AtomVecFull() {}
+  ~AtomVecFull() {}
   void grow(int);
   void grow_reset();
   void copy(int, int, int);
-  int pack_comm(int, int *, double *, int, int *);
-  int pack_comm_vel(int, int *, double *, int, int *);
-  void unpack_comm(int, int, double *);
-  void unpack_comm_vel(int, int, double *);
+  virtual int pack_comm(int, int *, double *, int, int *);
+  virtual int pack_comm_vel(int, int *, double *, int, int *);
+  virtual void unpack_comm(int, int, double *);
+  virtual void unpack_comm_vel(int, int, double *);
   int pack_reverse(int, int, double *);
   void unpack_reverse(int, int *, double *);
-  int pack_border(int, int *, double *, int, int *);
-  int pack_border_vel(int, int *, double *, int, int *);
+  virtual int pack_border(int, int *, double *, int, int *);
+  virtual int pack_border_vel(int, int *, double *, int, int *);
   int pack_border_hybrid(int, int *, double *);
-  void unpack_border(int, int, double *);
-  void unpack_border_vel(int, int, double *);
+  virtual void unpack_border(int, int, double *);
+  virtual void unpack_border_vel(int, int, double *);
   int unpack_border_hybrid(int, int, double *);
-  int pack_exchange(int, double *);
-  int unpack_exchange(double *);
+  virtual int pack_exchange(int, double *);
+  virtual int unpack_exchange(double *);
   int size_restart();
   int pack_restart(int, double *);
   int unpack_restart(double *);
diff --git a/src/atom.cpp b/src/atom.cpp
index 9abc972f3c..d7c08d9cbb 100644
--- a/src/atom.cpp
+++ b/src/atom.cpp
@@ -1446,11 +1446,11 @@ void Atom::setup_sort_bins()
       double vol = (domain->boxhi[0]-domain->boxlo[0]) * 
 	(domain->boxhi[1]-domain->boxlo[1]) * 
 	(domain->boxhi[2]-domain->boxlo[2]);
-      binsize = pow(CUDA_CHUNK/natoms*vol,1.0/3.0);
+      binsize = pow(1.0*CUDA_CHUNK/natoms*vol,1.0/3.0);
     } else {
       double area = (domain->boxhi[0]-domain->boxlo[0]) * 
 	(domain->boxhi[1]-domain->boxlo[1]);
-      binsize = pow(CUDA_CHUNK/natoms*area,1.0/2.0);
+      binsize = pow(1.0*CUDA_CHUNK/natoms*area,1.0/2.0);
     }
   } else binsize = 0.5 * neighbor->cutneighmax;
   if (binsize == 0.0) error->all("Atom sorting has bin size = 0.0");
diff --git a/src/atom_vec_atomic.h b/src/atom_vec_atomic.h
index 177038729b..537065c553 100644
--- a/src/atom_vec_atomic.h
+++ b/src/atom_vec_atomic.h
@@ -27,22 +27,22 @@ namespace LAMMPS_NS {
 class AtomVecAtomic : public AtomVec {
  public:
   AtomVecAtomic(class LAMMPS *, int, char **);
-  virtual ~AtomVecAtomic() {}
+  ~AtomVecAtomic() {}
   void grow(int);
   void grow_reset();
   void copy(int, int, int);
-  int pack_comm(int, int *, double *, int, int *);
-  int pack_comm_vel(int, int *, double *, int, int *);
-  void unpack_comm(int, int, double *);
-  void unpack_comm_vel(int, int, double *);
+  virtual int pack_comm(int, int *, double *, int, int *);
+  virtual int pack_comm_vel(int, int *, double *, int, int *);
+  virtual void unpack_comm(int, int, double *);
+  virtual void unpack_comm_vel(int, int, double *);
   int pack_reverse(int, int, double *);
   void unpack_reverse(int, int *, double *);
-  int pack_border(int, int *, double *, int, int *);
-  int pack_border_vel(int, int *, double *, int, int *);
-  void unpack_border(int, int, double *);
-  void unpack_border_vel(int, int, double *);
-  int pack_exchange(int, double *);
-  int unpack_exchange(double *);
+  virtual int pack_border(int, int *, double *, int, int *);
+  virtual int pack_border_vel(int, int *, double *, int, int *);
+  virtual void unpack_border(int, int, double *);
+  virtual void unpack_border_vel(int, int, double *);
+  virtual int pack_exchange(int, double *);
+  virtual int unpack_exchange(double *);
   int size_restart();
   int pack_restart(int, double *);
   int unpack_restart(double *);
diff --git a/src/atom_vec_charge.h b/src/atom_vec_charge.h
index cc37810598..cfd6555acb 100644
--- a/src/atom_vec_charge.h
+++ b/src/atom_vec_charge.h
@@ -27,24 +27,24 @@ namespace LAMMPS_NS {
 class AtomVecCharge : public AtomVec {
  public:
   AtomVecCharge(class LAMMPS *, int, char **);
-  virtual ~AtomVecCharge() {}
+  ~AtomVecCharge() {}
   void grow(int);
   void grow_reset();
   void copy(int, int, int);
-  int pack_comm(int, int *, double *, int, int *);
-  int pack_comm_vel(int, int *, double *, int, int *);
-  void unpack_comm(int, int, double *);
-  void unpack_comm_vel(int, int, double *);
+  virtual int pack_comm(int, int *, double *, int, int *);
+  virtual int pack_comm_vel(int, int *, double *, int, int *);
+  virtual void unpack_comm(int, int, double *);
+  virtual void unpack_comm_vel(int, int, double *);
   int pack_reverse(int, int, double *);
   void unpack_reverse(int, int *, double *);
-  int pack_border(int, int *, double *, int, int *);
-  int pack_border_vel(int, int *, double *, int, int *);
+  virtual int pack_border(int, int *, double *, int, int *);
+  virtual int pack_border_vel(int, int *, double *, int, int *);
   int pack_border_hybrid(int, int *, double *);
-  void unpack_border(int, int, double *);
-  void unpack_border_vel(int, int, double *);
+  virtual void unpack_border(int, int, double *);
+  virtual void unpack_border_vel(int, int, double *);
   int unpack_border_hybrid(int, int, double *);
-  int pack_exchange(int, double *);
-  int unpack_exchange(double *);
+  virtual int pack_exchange(int, double *);
+  virtual int unpack_exchange(double *);
   int size_restart();
   int pack_restart(int, double *);
   int unpack_restart(double *);
diff --git a/src/comm.h b/src/comm.h
index d6934775bf..868f6bec2e 100644
--- a/src/comm.h
+++ b/src/comm.h
@@ -37,23 +37,23 @@ class Comm : protected Pointers {
   Comm(class LAMMPS *);
   virtual ~Comm();
 
-  void init();
-  void set_procs();                 // setup 3d grid of procs
-  void setup();                     // setup 3d communication pattern
-  void forward_comm(int dummy = 0); // forward communication of atom coords
-  void reverse_comm();              // reverse communication of forces
-  void exchange();                  // move atoms to new procs
-  void borders();                   // setup list of atoms to communicate
+  virtual void init();
+  virtual void set_procs();                 // setup 3d grid of procs
+  virtual void setup();                     // setup 3d communication pattern
+  virtual void forward_comm(int dummy = 0); // forward communication of atom coords
+  virtual void reverse_comm();              // reverse communication of forces
+  virtual void exchange();                  // move atoms to new procs
+  virtual void borders();                   // setup list of atoms to communicate
 
-  void forward_comm_pair(class Pair *);        // forward comm from a Pair
-  void reverse_comm_pair(class Pair *);        // reverse comm from a Pair
-  void forward_comm_fix(class Fix *);          // forward comm from a Fix
-  void reverse_comm_fix(class Fix *);          // reverse comm from a Fix
-  void forward_comm_compute(class Compute *);  // forward comm from a Compute
-  void reverse_comm_compute(class Compute *);  // reverse comm from a Compute
+  virtual void forward_comm_pair(class Pair *);        // forward comm from a Pair
+  virtual void reverse_comm_pair(class Pair *);        // reverse comm from a Pair
+  virtual void forward_comm_fix(class Fix *);          // forward comm from a Fix
+  virtual void reverse_comm_fix(class Fix *);          // reverse comm from a Fix
+  virtual void forward_comm_compute(class Compute *);  // forward comm from a Compute
+  virtual void reverse_comm_compute(class Compute *);  // reverse comm from a Compute
 
-  void set(int, char **);           // set communication style
-  bigint memory_usage();
+  virtual void set(int, char **);           // set communication style
+  virtual bigint memory_usage();
 
  protected:
   int style;                        // single vs multi-type comm
@@ -87,18 +87,18 @@ class Comm : protected Pointers {
   int maxsend,maxrecv;              // current size of send/recv buffer
   int maxforward,maxreverse;        // max # of datums in forward/reverse comm
 
-  void procs2box();                 // map procs to 3d box
-  void cross(double, double, double,
+  virtual void procs2box();                 // map procs to 3d box
+  virtual void cross(double, double, double,
 	     double, double, double,
 	     double &, double &, double &);    // cross product
-  void grow_send(int,int);          // reallocate send buffer
-  void grow_recv(int);              // free/allocate recv buffer
-  void grow_list(int, int);         // reallocate one sendlist
-  void grow_swap(int);              // grow swap and multi arrays
-  void allocate_swap(int);          // allocate swap arrays
-  void allocate_multi(int);         // allocate multi arrays
-  void free_swap();                 // free swap arrays
-  void free_multi();                // free multi arrays
+  virtual void grow_send(int,int);          // reallocate send buffer
+  virtual void grow_recv(int);              // free/allocate recv buffer
+  virtual void grow_list(int, int);         // reallocate one sendlist
+  virtual void grow_swap(int);              // grow swap and multi arrays
+  virtual void allocate_swap(int);          // allocate swap arrays
+  virtual void allocate_multi(int);         // allocate multi arrays
+  virtual void free_swap();                 // free swap arrays
+  virtual void free_multi();                // free multi arrays
 };
 
 }
diff --git a/src/domain.h b/src/domain.h
index d7cb88ce5c..3e8eba5d3e 100644
--- a/src/domain.h
+++ b/src/domain.h
@@ -85,14 +85,14 @@ class Domain : protected Pointers {
   class Region **regions;                  // list of defined Regions
 
   Domain(class LAMMPS *);
-  ~Domain();
-  void init();
+  virtual ~Domain();
+  virtual void init();
   void set_initial_box();
-  void set_global_box();
-  void set_lamda_box();
-  void set_local_box();
-  void reset_box();
-  void pbc();
+  virtual void set_global_box();
+  virtual void set_lamda_box();
+  virtual void set_local_box();
+  virtual void reset_box();
+  virtual void pbc();
   void remap(double *, int &);
   void remap(double *);
   void remap_near(double *, double *);
@@ -107,8 +107,8 @@ class Domain : protected Pointers {
   void set_boundary(int, char **);
   void print_box(const char *);
 
-  void lamda2x(int);
-  void x2lamda(int);
+  virtual void lamda2x(int);
+  virtual void x2lamda(int);
   void lamda2x(double *, double *);
   void x2lamda(double *, double *);
   void bbox(double *, double *, double *, double *);
diff --git a/src/input.cpp b/src/input.cpp
index 370f6ceb8b..079879574f 100644
--- a/src/input.cpp
+++ b/src/input.cpp
@@ -808,7 +808,7 @@ void Input::accelerator()
   if (domain->box_exist) 
     error->all("Accelerator command after simulation box is defined");
   if (narg < 1) error->all("Illegal accelerator command");
-  if (strcmp(lmp->asuffix,arg[0]) != 0)
+  if (!lmp->asuffix || (strcmp(lmp->asuffix,arg[0]) != 0))
     error->all("Accelerator command requires matching command-line -a switch");
 
   if (strcmp(arg[0],"off") == 0) {
diff --git a/src/lammps.cpp b/src/lammps.cpp
index daf9c5ac50..f0b79ec45b 100644
--- a/src/lammps.cpp
+++ b/src/lammps.cpp
@@ -107,6 +107,7 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator)
       else error->universe_all("Invalid command-line argument");
       asuffix = new char[8];
       strcpy(asuffix,arg[iarg+1]);
+      iarg += 2;
     } else error->universe_all("Invalid command-line argument");
   }
 
@@ -363,7 +364,7 @@ void LAMMPS::create()
 
 void LAMMPS::init()
 {
-  if (accelerator == USERCUDA) cuda->setDevice(this);
+  if (accelerator == USERCUDA) cuda->accelerator(0,NULL);
  
   update->init();
   force->init();         // pair must come after update due to minimizer
diff --git a/src/modify.h b/src/modify.h
index fdd25b1045..0175cc06a0 100644
--- a/src/modify.h
+++ b/src/modify.h
@@ -41,21 +41,21 @@ class Modify : protected Pointers {
 
   Modify(class LAMMPS *);
   virtual ~Modify();
-  void init();
-  void setup(int);
-  void setup_pre_exchange();
-  void setup_pre_force(int);
-  void initial_integrate(int);
-  void post_integrate();
+  virtual void init();
+  virtual void setup(int);
+  virtual void setup_pre_exchange();
+  virtual void setup_pre_force(int);
+  virtual void initial_integrate(int);
+  virtual void post_integrate();
   void pre_decide();
-  void pre_exchange();
-  void pre_neighbor();
-  void pre_force(int);
-  void post_force(int);
-  void final_integrate();
-  void end_of_step();
-  double thermo_energy();
-  void post_run();
+  virtual void pre_exchange();
+  virtual void pre_neighbor();
+  virtual void pre_force(int);
+  virtual void post_force(int);
+  virtual void final_integrate();
+  virtual void end_of_step();
+  virtual double thermo_energy();
+  virtual void post_run();
 
   void setup_pre_force_respa(int, int);
   void initial_integrate_respa(int, int, int);
diff --git a/src/output.cpp b/src/output.cpp
index a7339c1f58..95808a9b5e 100644
--- a/src/output.cpp
+++ b/src/output.cpp
@@ -32,9 +32,12 @@
 #include "write_restart.h"
 #include "memory.h"
 #include "error.h"
+#include "accelerator.h"
 
 using namespace LAMMPS_NS;
 
+enum{NOACCEL,OPT,GPU,USERCUDA};     // same as lammps.cpp
+
 #define DELTA 1
 
 #define MYMIN(a,b) ((a) < (b) ? (a) : (b))
@@ -243,8 +246,13 @@ void Output::write(bigint ntimestep)
 {
   // next_dump does not force output on last step of run
   // wrap dumps that invoke computes with clear/add
+  // download data from GPU if necessary
 
   if (next_dump_any == ntimestep) {
+
+    if (lmp->accelerator == USERCUDA && !lmp->cuda->oncpu)
+      lmp->cuda->downloadAll();    
+    
     for (int idump = 0; idump < ndump; idump++) {
       if (next_dump[idump] == ntimestep && last_dump[idump] != ntimestep) {
         if (dump[idump]->clearstep) modify->clearstep_compute();
@@ -267,8 +275,13 @@ void Output::write(bigint ntimestep)
 
   // next_restart does not force output on last step of run
   // for toggle = 0, replace "*" with current timestep in restart filename
+  // download data from GPU if necessary
 
   if (next_restart == ntimestep && last_restart != ntimestep) {
+
+    if (lmp->accelerator == USERCUDA && !lmp->cuda->oncpu) 
+      lmp->cuda->downloadAll();    
+    
     if (restart_toggle == 0) {
       char *file = new char[strlen(restart1) + 16];
       char *ptr = strchr(restart1,'*');
diff --git a/src/update.cpp b/src/update.cpp
index 1c70ad7047..c8efe4b796 100644
--- a/src/update.cpp
+++ b/src/update.cpp
@@ -216,15 +216,20 @@ void Update::create_integrate(int narg, char **arg, char *suffix)
   }
 }
 
-/* ---------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   create the Integrate style, first with suffix appended
+------------------------------------------------------------------------- */
 
 void Update::new_integrate(char *style, int narg, char **arg,
 			   char *suffix, int &sflag)
 {
+  int success = 0;
+
   if (suffix && lmp->offaccel == 0) {
     sflag = 1;
     char estyle[256];
     sprintf(estyle,"%s/%s",style,suffix);
+    success = 1;
 
     if (0) return;
 
@@ -235,20 +240,23 @@ void Update::new_integrate(char *style, int narg, char **arg,
 #undef IntegrateStyle
 #undef INTEGRATE_CLASS
 
+    else success = 0;
   }
 
   sflag = 0;
 
-  if (0) return;
+  if (!success) {
+    if (0) return;
 
 #define INTEGRATE_CLASS
 #define IntegrateStyle(key,Class) \
-  else if (strcmp(style,#key) == 0) integrate = new Class(lmp,narg,arg);
+    else if (strcmp(style,#key) == 0) integrate = new Class(lmp,narg,arg);
 #include "style_integrate.h"
 #undef IntegrateStyle
 #undef INTEGRATE_CLASS
 
-  else error->all("Illegal integrate style");
+    else error->all("Illegal integrate style");
+  }
 }
 
 /* ---------------------------------------------------------------------- */