From 6ad1c9618b98a5521abb323cff4e2dca99fc7bdd Mon Sep 17 00:00:00 2001 From: sjplimp Date: Fri, 23 Aug 2013 14:47:08 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@10670 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/GPU/Install.sh | 10 ++++++++++ src/GPU/fix_gpu.cpp | 42 ++++++++++++++++++++++++++---------------- src/GPU/gpu_extra.h | 17 +++++++++++++++++ src/GPU/pppm_gpu.cpp | 8 ++++++-- 4 files changed, 59 insertions(+), 18 deletions(-) diff --git a/src/GPU/Install.sh b/src/GPU/Install.sh index 46644debb7..89a342496b 100644 --- a/src/GPU/Install.sh +++ b/src/GPU/Install.sh @@ -27,6 +27,8 @@ action () { action fix_gpu.cpp action fix_gpu.h action gpu_extra.h +action pair_beck_gpu.cpp +action pair_beck_gpu.h action pair_born_coul_long_gpu.cpp pair_born_coul_long.cpp action pair_born_coul_long_gpu.h pair_born_coul_long.cpp action pair_born_coul_wolf_gpu.cpp @@ -75,6 +77,8 @@ action pair_lj_cut_coul_dsf_gpu.cpp action pair_lj_cut_coul_dsf_gpu.h action pair_lj_cut_coul_long_gpu.cpp pair_lj_cut_coul_long.cpp action pair_lj_cut_coul_long_gpu.h pair_lj_cut_coul_long.cpp +action pair_lj_cut_coul_msm_gpu.cpp +action pair_lj_cut_coul_msm_gpu.h action pair_lj_cut_gpu.cpp action pair_lj_cut_gpu.h action pair_lj_expand_gpu.cpp @@ -83,10 +87,16 @@ action pair_lj_sdk_coul_long_gpu.cpp pair_lj_sdk_coul_long.cpp action pair_lj_sdk_coul_long_gpu.h pair_lj_sdk_coul_long.cpp action pair_lj_sdk_gpu.cpp pair_lj_sdk.cpp action pair_lj_sdk_gpu.h pair_lj_sdk.cpp +action pair_mie_cut_gpu.cpp +action pair_mie_cut_gpu.h action pair_morse_gpu.cpp action pair_morse_gpu.h action pair_resquared_gpu.cpp pair_resquared.cpp action pair_resquared_gpu.h pair_resquared.cpp +action pair_soft_gpu.cpp +action pair_soft_gpu.h +action pair_sw_gpu.cpp pair_sw.cpp +action pair_sw_gpu.h pair_sw.h action pair_table_gpu.cpp pair_table.cpp action pair_table_gpu.h pair_table.cpp action pair_yukawa_colloid_gpu.cpp pair_yukawa_colloid.cpp diff --git a/src/GPU/fix_gpu.cpp b/src/GPU/fix_gpu.cpp index 7a8098fd69..6fd8e06fce 100644 --- a/src/GPU/fix_gpu.cpp +++ b/src/GPU/fix_gpu.cpp @@ -17,6 +17,8 @@ #include "atom.h" #include "force.h" #include "pair.h" +#include "pair_hybrid.h" +#include "pair_hybrid_overlay.h" #include "respa.h" #include "input.h" #include "timer.h" @@ -38,7 +40,7 @@ extern int lmp_init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu, const int last_gpu, const int gpu_mode, const double particle_split, const int nthreads, const int t_per_atom, - const double cell_size); + const double cell_size, char *opencl_flags); extern void lmp_clear_device(); extern double lmp_gpu_forces(double **f, double **tor, double *eatom, double **vatom, double *virial, double &ecoul); @@ -103,6 +105,7 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : double cell_size = -1; int iarg = 7; + char *opencl_flags = NULL; while (iarg < narg) { if (iarg+2 > narg) error->all(FLERR,"Illegal fix GPU command"); @@ -112,6 +115,8 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : nthreads = force->inumeric(FLERR,arg[iarg+1]); else if (strcmp(arg[iarg],"cellsize") == 0) cell_size = force->numeric(FLERR,arg[iarg+1]); + else if (strcmp(arg[iarg],"device") == 0) + opencl_flags = arg[iarg+1]; else error->all(FLERR,"Illegal fix GPU command"); @@ -128,7 +133,7 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : int gpu_flag = lmp_init_device(universe->uworld, world, first_gpu, last_gpu, _gpu_mode, _particle_split, nthreads, - threads_per_atom, cell_size); + threads_per_atom, cell_size, opencl_flags); GPU_EXTRA::check_flag(gpu_flag,error,world); } @@ -165,21 +170,24 @@ void FixGPU::init() force->pair_match("hybrid/overlay",1) != NULL) error->all(FLERR,"GPU 'split' must be positive for hybrid pair styles"); + // Make sure fdotr virial is not accumulated multiple times + + if (force->pair_match("hybrid",1) != NULL) { + PairHybrid *hybrid = (PairHybrid *) force->pair; + for (int i = 0; i < hybrid->nstyles; i++) + if (strstr(hybrid->keywords[i],"/gpu")==NULL) + force->pair->no_virial_fdotr_compute = 1; + } else if (force->pair_match("hybrid/overlay",1) != NULL) { + PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair; + for (int i = 0; i < hybrid->nstyles; i++) + if (strstr(hybrid->keywords[i],"/gpu")==NULL) + force->pair->no_virial_fdotr_compute = 1; + } + // r-RESPA support - if (strstr(update->integrate_style,"respa")) { + if (strstr(update->integrate_style,"respa")) _nlevels_respa = ((Respa *) update->integrate)->nlevels; - - // need to check that gpu accelerated styles are at the outmost levels - - if ((force->pair_match("/gpu",0) != NULL) && - (((Respa *) update->integrate)->level_pair != _nlevels_respa-1)) - error->all(FLERR,"GPU pair style must be at outermost respa level"); - - if ((force->kspace_match("/gpu",0) != NULL) && - (((Respa *) update->integrate)->level_kspace != _nlevels_respa-1)) - error->all(FLERR,"GPU Kspace style must be at outermost respa level"); - } } /* ---------------------------------------------------------------------- */ @@ -194,8 +202,9 @@ void FixGPU::setup(int vflag) if (strstr(update->integrate_style,"verlet")) post_force(vflag); else { + // In setup only, all forces calculated on gpu are put in the outer level ((Respa *) update->integrate)->copy_flevel_f(_nlevels_respa-1); - post_force_respa(vflag,_nlevels_respa-1,0); + post_force(vflag); ((Respa *) update->integrate)->copy_f_flevel(_nlevels_respa-1); } } @@ -241,7 +250,7 @@ void FixGPU::min_post_force(int vflag) void FixGPU::post_force_respa(int vflag, int ilevel, int iloop) { - if (ilevel == _nlevels_respa-1) post_force(vflag); + post_force(vflag); } /* ---------------------------------------------------------------------- */ @@ -252,3 +261,4 @@ double FixGPU::memory_usage() // Memory usage currently returned by pair routine return bytes; } + diff --git a/src/GPU/gpu_extra.h b/src/GPU/gpu_extra.h index f1b8de65cf..5bfb38f369 100644 --- a/src/GPU/gpu_extra.h +++ b/src/GPU/gpu_extra.h @@ -52,6 +52,12 @@ namespace GPU_EXTRA { else if (all_success == -9) error->all(FLERR, "CPU neighbor lists must be used for ellipsoid/sphere mix."); + else if (all_success == -10) + error->all(FLERR, + "Invalid threads_per_atom specified."); + else if (all_success == -11) + error->all(FLERR, + "Invalid custom OpenCL parameter string."); else error->all(FLERR,"Unknown error in GPU library"); } @@ -110,8 +116,19 @@ E: CPU neighbor lists must be used for ellipsoid/sphere mix When using Gay-Berne or RE-squared pair styles with both ellipsoidal and spherical particles, the neighbor list must be built on the CPU +E: Invalid threads_per_atom specified. + +For 3-body potentials on the GPU, the threads_per_atom setting cannot be +greater than 4 for NVIDIA GPUs. + E: Unknown error in GPU library Self-explanatory. +E: Invalid custom OpenCL parameter string. + +There are not enough or too many parameters in the custom string for package +GPU. + */ + diff --git a/src/GPU/pppm_gpu.cpp b/src/GPU/pppm_gpu.cpp index c085d2b6a2..e815dbcbe0 100644 --- a/src/GPU/pppm_gpu.cpp +++ b/src/GPU/pppm_gpu.cpp @@ -78,7 +78,7 @@ FFT_SCALAR* PPPM_GPU_API(init)(const int nlocal, const int nall, FILE *screen, const double slab_volfactor, const int nx_pppm, const int ny_pppm, const int nz_pppm, const bool split, - int &success); + const bool respa, int &success); void PPPM_GPU_API(clear)(const double poisson_time); int PPPM_GPU_API(spread)(const int ago, const int nlocal, const int nall, double **host_x, int *host_type, bool &success, @@ -152,6 +152,10 @@ void PPPMGPU::init() // GPU precision specific init + bool respa_value=false; + if (strstr(update->integrate_style,"respa")) + respa_value=true; + if (order>8) error->all(FLERR,"Cannot use order greater than 8 with pppm/gpu."); PPPM_GPU_API(clear)(poisson_time); @@ -162,7 +166,7 @@ void PPPMGPU::init() order, nxlo_out, nylo_out, nzlo_out, nxhi_out, nyhi_out, nzhi_out, rho_coeff, &data, slab_volfactor,nx_pppm,ny_pppm,nz_pppm, - kspace_split,success); + kspace_split,respa_value,success); GPU_EXTRA::check_flag(success,error,world);