From 6ad1c9618b98a5521abb323cff4e2dca99fc7bdd Mon Sep 17 00:00:00 2001
From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa>
Date: Fri, 23 Aug 2013 14:47:08 +0000
Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@10670
 f3b2605a-c512-4ea7-a41b-209d697bcdaa

---
 src/GPU/Install.sh   | 10 ++++++++++
 src/GPU/fix_gpu.cpp  | 42 ++++++++++++++++++++++++++----------------
 src/GPU/gpu_extra.h  | 17 +++++++++++++++++
 src/GPU/pppm_gpu.cpp |  8 ++++++--
 4 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/src/GPU/Install.sh b/src/GPU/Install.sh
index 46644debb7..89a342496b 100644
--- a/src/GPU/Install.sh
+++ b/src/GPU/Install.sh
@@ -27,6 +27,8 @@ action () {
 action fix_gpu.cpp
 action fix_gpu.h
 action gpu_extra.h
+action pair_beck_gpu.cpp
+action pair_beck_gpu.h 
 action pair_born_coul_long_gpu.cpp pair_born_coul_long.cpp
 action pair_born_coul_long_gpu.h pair_born_coul_long.cpp
 action pair_born_coul_wolf_gpu.cpp
@@ -75,6 +77,8 @@ action pair_lj_cut_coul_dsf_gpu.cpp
 action pair_lj_cut_coul_dsf_gpu.h
 action pair_lj_cut_coul_long_gpu.cpp pair_lj_cut_coul_long.cpp
 action pair_lj_cut_coul_long_gpu.h pair_lj_cut_coul_long.cpp
+action pair_lj_cut_coul_msm_gpu.cpp
+action pair_lj_cut_coul_msm_gpu.h
 action pair_lj_cut_gpu.cpp
 action pair_lj_cut_gpu.h
 action pair_lj_expand_gpu.cpp
@@ -83,10 +87,16 @@ action pair_lj_sdk_coul_long_gpu.cpp pair_lj_sdk_coul_long.cpp
 action pair_lj_sdk_coul_long_gpu.h pair_lj_sdk_coul_long.cpp
 action pair_lj_sdk_gpu.cpp pair_lj_sdk.cpp
 action pair_lj_sdk_gpu.h pair_lj_sdk.cpp
+action pair_mie_cut_gpu.cpp
+action pair_mie_cut_gpu.h 
 action pair_morse_gpu.cpp
 action pair_morse_gpu.h
 action pair_resquared_gpu.cpp pair_resquared.cpp
 action pair_resquared_gpu.h pair_resquared.cpp
+action pair_soft_gpu.cpp
+action pair_soft_gpu.h
+action pair_sw_gpu.cpp pair_sw.cpp
+action pair_sw_gpu.h pair_sw.h
 action pair_table_gpu.cpp pair_table.cpp
 action pair_table_gpu.h pair_table.cpp
 action pair_yukawa_colloid_gpu.cpp pair_yukawa_colloid.cpp
diff --git a/src/GPU/fix_gpu.cpp b/src/GPU/fix_gpu.cpp
index 7a8098fd69..6fd8e06fce 100644
--- a/src/GPU/fix_gpu.cpp
+++ b/src/GPU/fix_gpu.cpp
@@ -17,6 +17,8 @@
 #include "atom.h"
 #include "force.h"
 #include "pair.h"
+#include "pair_hybrid.h"
+#include "pair_hybrid_overlay.h"
 #include "respa.h"
 #include "input.h"
 #include "timer.h"
@@ -38,7 +40,7 @@ extern int lmp_init_device(MPI_Comm world, MPI_Comm replica,
                            const int first_gpu, const int last_gpu,
                            const int gpu_mode, const double particle_split,
                            const int nthreads, const int t_per_atom,
-                           const double cell_size);
+                           const double cell_size, char *opencl_flags);
 extern void lmp_clear_device();
 extern double lmp_gpu_forces(double **f, double **tor, double *eatom,
                              double **vatom, double *virial, double &ecoul);
@@ -103,6 +105,7 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
   double cell_size = -1;
 
   int iarg = 7;
+  char *opencl_flags = NULL;
   while (iarg < narg) {
     if (iarg+2 > narg) error->all(FLERR,"Illegal fix GPU command");
 
@@ -112,6 +115,8 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
       nthreads = force->inumeric(FLERR,arg[iarg+1]);
     else if (strcmp(arg[iarg],"cellsize") == 0)
       cell_size = force->numeric(FLERR,arg[iarg+1]);
+    else if (strcmp(arg[iarg],"device") == 0)
+      opencl_flags = arg[iarg+1];
     else
       error->all(FLERR,"Illegal fix GPU command");
 
@@ -128,7 +133,7 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
 
   int gpu_flag = lmp_init_device(universe->uworld, world, first_gpu, last_gpu,
                                  _gpu_mode, _particle_split, nthreads,
-                                 threads_per_atom, cell_size);
+                                 threads_per_atom, cell_size, opencl_flags);
   GPU_EXTRA::check_flag(gpu_flag,error,world);
 }
 
@@ -165,21 +170,24 @@ void FixGPU::init()
         force->pair_match("hybrid/overlay",1) != NULL)
       error->all(FLERR,"GPU 'split' must be positive for hybrid pair styles");
 
+  // Make sure fdotr virial is not accumulated multiple times
+  
+  if (force->pair_match("hybrid",1) != NULL) {
+    PairHybrid *hybrid = (PairHybrid *) force->pair;
+    for (int i = 0; i < hybrid->nstyles; i++)
+      if (strstr(hybrid->keywords[i],"/gpu")==NULL)
+        force->pair->no_virial_fdotr_compute = 1;
+  } else if (force->pair_match("hybrid/overlay",1) != NULL) {
+    PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair;
+    for (int i = 0; i < hybrid->nstyles; i++)
+      if (strstr(hybrid->keywords[i],"/gpu")==NULL)
+        force->pair->no_virial_fdotr_compute = 1;
+  }
+
   // r-RESPA support
 
-  if (strstr(update->integrate_style,"respa")) {
+  if (strstr(update->integrate_style,"respa"))
     _nlevels_respa = ((Respa *) update->integrate)->nlevels;
-
-    // need to check that gpu accelerated styles are at the outmost levels
-    
-    if ((force->pair_match("/gpu",0) != NULL) &&
-        (((Respa *) update->integrate)->level_pair != _nlevels_respa-1))
-      error->all(FLERR,"GPU pair style must be at outermost respa level");
-
-    if ((force->kspace_match("/gpu",0) != NULL) &&
-        (((Respa *) update->integrate)->level_kspace != _nlevels_respa-1))
-      error->all(FLERR,"GPU Kspace style must be at outermost respa level");
-  }
 }
 
 /* ---------------------------------------------------------------------- */
@@ -194,8 +202,9 @@ void FixGPU::setup(int vflag)
   if (strstr(update->integrate_style,"verlet"))
     post_force(vflag);
   else {
+    // In setup only, all forces calculated on gpu are put in the outer level
     ((Respa *) update->integrate)->copy_flevel_f(_nlevels_respa-1);
-    post_force_respa(vflag,_nlevels_respa-1,0);
+    post_force(vflag);
     ((Respa *) update->integrate)->copy_f_flevel(_nlevels_respa-1);
   }
 }
@@ -241,7 +250,7 @@ void FixGPU::min_post_force(int vflag)
 
 void FixGPU::post_force_respa(int vflag, int ilevel, int iloop)
 {
-  if (ilevel == _nlevels_respa-1) post_force(vflag);
+  post_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
@@ -252,3 +261,4 @@ double FixGPU::memory_usage()
   // Memory usage currently returned by pair routine
   return bytes;
 }
+
diff --git a/src/GPU/gpu_extra.h b/src/GPU/gpu_extra.h
index f1b8de65cf..5bfb38f369 100644
--- a/src/GPU/gpu_extra.h
+++ b/src/GPU/gpu_extra.h
@@ -52,6 +52,12 @@ namespace GPU_EXTRA {
       else if (all_success == -9)
         error->all(FLERR,
                    "CPU neighbor lists must be used for ellipsoid/sphere mix.");
+      else if (all_success == -10)
+        error->all(FLERR,
+                   "Invalid threads_per_atom specified.");
+      else if (all_success == -11)
+        error->all(FLERR,
+                   "Invalid custom OpenCL parameter string.");
       else
         error->all(FLERR,"Unknown error in GPU library");
     }
@@ -110,8 +116,19 @@ E: CPU neighbor lists must be used for ellipsoid/sphere mix
 When using Gay-Berne or RE-squared pair styles with both ellipsoidal and
 spherical particles, the neighbor list must be built on the CPU
 
+E: Invalid threads_per_atom specified.
+
+For 3-body potentials on the GPU, the threads_per_atom setting cannot be
+greater than 4 for NVIDIA GPUs.
+
 E: Unknown error in GPU library
 
 Self-explanatory.
 
+E: Invalid custom OpenCL parameter string.
+
+There are not enough or too many parameters in the custom string for package
+GPU.
+
 */
+
diff --git a/src/GPU/pppm_gpu.cpp b/src/GPU/pppm_gpu.cpp
index c085d2b6a2..e815dbcbe0 100644
--- a/src/GPU/pppm_gpu.cpp
+++ b/src/GPU/pppm_gpu.cpp
@@ -78,7 +78,7 @@ FFT_SCALAR* PPPM_GPU_API(init)(const int nlocal, const int nall, FILE *screen,
                                const double slab_volfactor,
                                const int nx_pppm, const int ny_pppm,
                                const int nz_pppm, const bool split,
-                               int &success);
+                               const bool respa, int &success);
 void PPPM_GPU_API(clear)(const double poisson_time);
 int PPPM_GPU_API(spread)(const int ago, const int nlocal, const int nall,
                       double **host_x, int *host_type, bool &success,
@@ -152,6 +152,10 @@ void PPPMGPU::init()
 
   // GPU precision specific init
 
+  bool respa_value=false;
+  if (strstr(update->integrate_style,"respa"))
+    respa_value=true;  
+
   if (order>8)
     error->all(FLERR,"Cannot use order greater than 8 with pppm/gpu.");
   PPPM_GPU_API(clear)(poisson_time);
@@ -162,7 +166,7 @@ void PPPMGPU::init()
                                order, nxlo_out, nylo_out, nzlo_out, nxhi_out,
                                nyhi_out, nzhi_out, rho_coeff, &data,
                                slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
-                               kspace_split,success);
+                               kspace_split,respa_value,success);
 
   GPU_EXTRA::check_flag(success,error,world);