git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6621 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2011-08-08 19:25:08 +00:00 · 2011-08-08 19:25:08 +00:00 · a54f7e6b5b
parent e8a5fbdd3a
commit a54f7e6b5b
8 changed files with 218 additions and 1690 deletions
--- a/src/GPU/Install.sh
+++ b/src/GPU/Install.sh
@ -56,11 +56,7 @@ if (test $1 = 1) then

  if (test -e ../pppm.cpp) then
    cp pppm_gpu.cpp ..
-    cp pppm_gpu_single.cpp ..
-    cp pppm_gpu_double.cpp ..
    cp pppm_gpu.h ..
-    cp pppm_gpu_single.h ..
-    cp pppm_gpu_double.h ..
  fi

  cp pair_lj_cut_gpu.cpp ..
@ -95,8 +91,6 @@ elif (test $1 = 0) then
  fi
  
  rm -f ../pppm_gpu.cpp
-  rm -f ../pppm_gpu_single.cpp
-  rm -f ../pppm_gpu_double.cpp
  rm -f ../pair_gayberne_gpu.cpp
  rm -f ../pair_resquared_gpu.cpp
  rm -f ../pair_lj_cut_gpu.cpp
@ -119,8 +113,6 @@ elif (test $1 = 0) then
  rm -f ../pair_lj_cut_tgpu.cpp

  rm -f ../pppm_gpu.h
-  rm -f ../pppm_gpu_single.h
-  rm -f ../pppm_gpu_double.h
  rm -f ../pair_gayberne_gpu.h
  rm -f ../pair_resquared_gpu.h
  rm -f ../pair_lj_cut_gpu.h
--- a/src/GPU/pair_cg_cmm_coul_msm.cpp
+++ b/src/GPU/pair_cg_cmm_coul_msm.cpp
@ -247,7 +247,7 @@ void PairCGCMMCoulMSM::compute(int eflag, int vflag)
    }
  }

-  if (vflag_fdotr) virial_compute();
+  if (vflag_fdotr) virial_fdotr_compute();
 }

 /* ---------------------------------------------------------------------- */
--- a/src/GPU/pppm_gpu.cpp
+++ b/src/GPU/pppm_gpu.cpp
--- a/src/GPU/pppm_gpu.h
+++ b/src/GPU/pppm_gpu.h
@ -11,93 +11,45 @@
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */

+#ifdef KSPACE_CLASS
+
+KSpaceStyle(pppm/gpu,PPPMGPU)
+
+#else
+
 #ifndef LMP_PPPM_GPU_H
 #define LMP_PPPM_GPU_H

-#include "kspace.h"
-#include "lmptype.h"
+#include "pppm.h"

 namespace LAMMPS_NS {

-template <class grdtyp>
-class PPPMGPU : public KSpace {
+class PPPMGPU : public PPPM {
 public:
  PPPMGPU(class LAMMPS *, int, char **);
  virtual ~PPPMGPU();
-  virtual void init() = 0;
-  void base_init();
-  void setup();
-  virtual void compute(int, int) = 0;
-  void timing(int, double &, double &);
-  virtual double memory_usage() = 0;
+  virtual void init();
+  virtual void compute(int, int);
+  virtual double memory_usage();

 protected:
-  int me,nprocs;
-  double PI;
-  double precision;
-  int nfactors;
-  int *factors;
-  double qsum,qsqsum;
-  double qqrd2e;
-  double cutoff;
-  double volume;
-  double delxinv,delyinv,delzinv,delvolinv;
-  double shift,shiftone;

-  int nxlo_in,nylo_in,nzlo_in,nxhi_in,nyhi_in,nzhi_in;
-  int nxlo_out,nylo_out,nzlo_out,nxhi_out,nyhi_out,nzhi_out;
-  int nxlo_ghost,nxhi_ghost,nylo_ghost,nyhi_ghost,nzlo_ghost,nzhi_ghost;
-  int nxlo_fft,nylo_fft,nzlo_fft,nxhi_fft,nyhi_fft,nzhi_fft;
-  int nlower,nupper;
-  int ngrid,nfft,nbuf,nfft_both;
+  FFT_SCALAR ***density_brick_gpu, ***vd_brick;

-  grdtyp ***density_brick;
-  grdtyp ***vd_brick;
-  double *greensfn;
-  double **vg;
-  double *fkx,*fky,*fkz;
-  double *density_fft;
-  double *work1,*work2;
-  double *buf1,*buf2;
-
-  double *gf_b;
-  double **rho1d,**rho_coeff;
-
-  class FFT3d *fft1,*fft2;
-  class Remap *remap;
-
-  int nmax;
-
-  int triclinic;               // domain settings, orthog or triclinic
-  double *boxlo;
-                               // TIP4P settings
-  int typeH,typeO;             // atom types of TIP4P water H and O atoms
-  double qdist;                // distance from O site to negative charge
-  double alpha;                // geometric factor
-
-  void set_grid();
-  void allocate();
-  void deallocate();
-  int factorable(int);
-  double rms(double, double, bigint, double, double **);
-  double diffpr(double, double, double, double, double **);
-  void compute_gf_denom();
-  double gf_denom(double, double, double);
-  void brick2fft();
-  void fillbrick();
-  void poisson(int, int);
-  void procs2grid2d(int,int,int,int *, int*);
-  void compute_rho1d(double, double, double);
-  void compute_rho_coeff();
-  void slabcorr(int);
+  virtual void allocate();
+  virtual void deallocate();
+  virtual void brick2fft();
+  virtual void fillbrick();
+  virtual void poisson(int, int);

  double poisson_time;  

-  grdtyp ***create_3d_offset(int, int, int, int, int, int, const char *,
-			     grdtyp *, int);
-  void destroy_3d_offset(grdtyp ***, int, int);
+  FFT_SCALAR ***create_3d_offset(int, int, int, int, int, int, const char *,
+			     FFT_SCALAR *, int);
+  void destroy_3d_offset(FFT_SCALAR ***, int, int);
 };

 }

 #endif
+#endif
--- a/src/GPU/pppm_gpu_double.cpp
+++ b/src/GPU/pppm_gpu_double.cpp
@ -1,217 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL)
------------------------------------------------------------------------- */
-
-#include "mpi.h"
-#include "string.h"
-#include "stdio.h"
-#include "stdlib.h"
-#include "math.h"
-#include "pppm_gpu_double.h"
-#include "lmptype.h"
-#include "atom.h"
-#include "comm.h"
-#include "neighbor.h"
-#include "force.h"
-#include "pair.h"
-#include "bond.h"
-#include "angle.h"
-#include "domain.h"
-#include "fft3d_wrap.h"
-#include "remap_wrap.h"
-#include "memory.h"
-#include "error.h"
-#include "gpu_extra.h"
-
-#define grdtyp double
-
-// External functions from cuda library for atom decomposition
-
-grdtyp* pppm_gpu_init_d(const int nlocal, const int nall, FILE *screen,
-		        const int order, const int nxlo_out, 
-			const int nylo_out, const int nzlo_out,
-			const int nxhi_out, const int nyhi_out,
-			const int nzhi_out, double **rho_coeff,
-			grdtyp **_vd_brick, const double slab_volfactor,
-			const int nx_pppm, const int ny_pppm,
-			const int nz_pppm, int &success);
-void pppm_gpu_clear_d(const double poisson_time);
-int pppm_gpu_spread_d(const int ago, const int nlocal, const int nall,
-                      double **host_x, int *host_type, bool &success,
-                      double *host_q, double *boxlo, const double delxinv,
-                      const double delyinv, const double delzinv);
-void pppm_gpu_interp_d(const grdtyp qqrd2e_scale);
-double pppm_gpu_bytes_d();
-
-using namespace LAMMPS_NS;
-
-#define MAXORDER 7
-#define OFFSET 16384
-#define SMALL 0.00001
-#define LARGE 10000.0
-#define EPS_HOC 1.0e-7
-
-#define MIN(a,b) ((a) < (b) ? (a) : (b))
-#define MAX(a,b) ((a) > (b) ? (a) : (b))
-
-/* ---------------------------------------------------------------------- */
-
-PPPMGPUDouble::PPPMGPUDouble(LAMMPS *lmp, int narg, char **arg) :
-  PPPMGPU<grdtyp>(lmp, narg, arg)
-{
-}
-
-/* ----------------------------------------------------------------------
-   free all memory 
------------------------------------------------------------------------- */
-
-PPPMGPUDouble::~PPPMGPUDouble()
-{
-  pppm_gpu_clear_d(poisson_time);
-}
-
-/* ----------------------------------------------------------------------
-   called once before run 
------------------------------------------------------------------------- */
-
-void PPPMGPUDouble::init()
-{
-  base_init();
-
-  if (order>8)
-    error->all("Cannot use order greater than 8 with pppm/gpu.");
-  pppm_gpu_clear_d(poisson_time);
-
-  int success;
-  grdtyp *data, *h_brick;
-  h_brick = pppm_gpu_init_d(atom->nlocal, atom->nlocal+atom->nghost, screen,
-			    order, nxlo_out, nylo_out, nzlo_out, nxhi_out,
-			    nyhi_out, nzhi_out, rho_coeff, &data,
-			    slab_volfactor, nx_pppm, ny_pppm, nz_pppm, 
-			    success);
-
-  GPU_EXTRA::check_flag(success,error,world);
-
-  density_brick =
-    create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
-		     nxlo_out,nxhi_out,"pppm:density_brick",h_brick,1);
-  vd_brick =
-    create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
-		     nxlo_out,nxhi_out,"pppm:vd_brick",data,4);
-
-  poisson_time=0;
-}
-
-/* ----------------------------------------------------------------------
-   compute the PPPMGPU long-range force, energy, virial 
------------------------------------------------------------------------- */
-
-void PPPMGPUDouble::compute(int eflag, int vflag)
-{
-  bool success = true;
-  int flag=pppm_gpu_spread_d(neighbor->ago, atom->nlocal, atom->nlocal + 
-			     atom->nghost, atom->x, atom->type, success,
-			     atom->q, domain->boxlo, delxinv, delyinv,
-			     delzinv);
-  if (!success)
-    error->one("Out of memory on GPGPU");
-  if (flag != 0)
-    error->one("Out of range atoms - cannot compute PPPM");
-
-  int i;
-
-  // convert atoms from box to lamda coords
-  
-  if (triclinic == 0) boxlo = domain->boxlo;
-  else {
-    boxlo = domain->boxlo_lamda;
-    domain->x2lamda(atom->nlocal);
-  }
-
-  energy = 0.0;
-  if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0;
-
-  double t3=MPI_Wtime();
-
-  // all procs communicate density values from their ghost cells
-  //   to fully sum contribution in their 3d bricks
-  // remap from 3d decomposition to FFT decomposition
-
-  brick2fft();
-
-  // compute potential gradient on my FFT grid and
-  //   portion of e_long on this proc's FFT grid
-  // return gradients (electric fields) in 3d brick decomposition
-  
-  poisson(eflag,vflag);
-
-  // all procs communicate E-field values to fill ghost cells
-  //   surrounding their 3d bricks
-
-  fillbrick();
-
-  poisson_time+=MPI_Wtime()-t3;
-
-  // calculate the force on my particles
-
-  grdtyp qqrd2e_scale=qqrd2e*scale;
-  pppm_gpu_interp_d(qqrd2e_scale);
-
-  // sum energy across procs and add in volume-dependent term
-
-  if (eflag) {
-    double energy_all;
-    MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
-    energy = energy_all;
-   
-    energy *= 0.5*volume;
-    energy -= g_ewald*qsqsum/1.772453851 +
-      0.5*PI*qsum*qsum / (g_ewald*g_ewald*volume);
-    energy *= qqrd2e*scale;
-  }
-
-  // sum virial across procs
-
-  if (vflag) {
-    double virial_all[6];
-    MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
-    for (i = 0; i < 6; i++) virial[i] = 0.5*qqrd2e*scale*volume*virial_all[i];
-  }
-
-  // 2d slab correction
-
-  if (slabflag) slabcorr(eflag);
-
-  // convert atoms back from lamda to box coords
-  
-  if (triclinic) domain->lamda2x(atom->nlocal);
-}
-
-/* ----------------------------------------------------------------------
-   memory usage of local arrays 
------------------------------------------------------------------------- */
-
-double PPPMGPUDouble::memory_usage()
-{
-  double bytes = nmax*3 * sizeof(double);
-  int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * 
-    (nzhi_out-nzlo_out+1);
-  bytes += 4 * nbrick * sizeof(grdtyp);
-  bytes += 6 * nfft_both * sizeof(double);
-  bytes += nfft_both*6 * sizeof(double);
-  bytes += 2 * nbuf * sizeof(double);
-  return bytes + pppm_gpu_bytes_d();
-}
--- a/src/GPU/pppm_gpu_double.h
+++ b/src/GPU/pppm_gpu_double.h
@ -1,42 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
-
-#ifdef KSPACE_CLASS
-
-KSpaceStyle(pppm/gpu/double,PPPMGPUDouble)
-
-#else
-
-#ifndef LMP_PPPM_GPU_DOUBLE_H
-#define LMP_PPPM_GPU_DOUBLE_H
-
-#include "pppm_gpu.h"
-#include "lmptype.h"
-
-namespace LAMMPS_NS {
-
-class PPPMGPUDouble : public PPPMGPU<double> {
- public:
-  PPPMGPUDouble(class LAMMPS *, int, char **);
-  ~PPPMGPUDouble();
-  void init();
-  void compute(int, int);
-  double memory_usage();
-
- protected:
-};
-
-}
-
-#endif
-#endif
--- a/src/GPU/pppm_gpu_single.cpp
+++ b/src/GPU/pppm_gpu_single.cpp
@ -1,216 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing authors: Mike Brown (ORNL)
------------------------------------------------------------------------- */
-
-#include "mpi.h"
-#include "string.h"
-#include "stdio.h"
-#include "stdlib.h"
-#include "math.h"
-#include "pppm_gpu_single.h"
-#include "lmptype.h"
-#include "atom.h"
-#include "comm.h"
-#include "neighbor.h"
-#include "force.h"
-#include "pair.h"
-#include "bond.h"
-#include "angle.h"
-#include "domain.h"
-#include "fft3d_wrap.h"
-#include "remap_wrap.h"
-#include "memory.h"
-#include "error.h"
-#include "gpu_extra.h"
-
-#define grdtyp float
-
-// External functions from cuda library for atom decomposition
-
-grdtyp* pppm_gpu_init_f(const int nlocal, const int nall, FILE *screen,
-		        const int order, const int nxlo_out, 
-			const int nylo_out, const int nzlo_out,
-			const int nxhi_out, const int nyhi_out,
-			const int nzhi_out, double **rho_coeff,
-			grdtyp **_vd_brick, const double slab_volfactor,
-			const int nx_pppm, const int ny_pppm,
-			const int nz_pppm, int &success);
-void pppm_gpu_clear_f(const double poisson_time);
-int pppm_gpu_spread_f(const int ago, const int nlocal, const int nall,
-                      double **host_x, int *host_type, bool &success,
-                      double *host_q, double *boxlo, const double delxinv,
-                      const double delyinv, const double delzinv);
-void pppm_gpu_interp_f(const grdtyp qqrd2e_scale);
-double pppm_gpu_bytes_f();
-
-using namespace LAMMPS_NS;
-
-#define MAXORDER 7
-#define OFFSET 16384
-#define SMALL 0.00001
-#define LARGE 10000.0
-#define EPS_HOC 1.0e-7
-
-#define MIN(a,b) ((a) < (b) ? (a) : (b))
-#define MAX(a,b) ((a) > (b) ? (a) : (b))
-
-/* ---------------------------------------------------------------------- */
-
-PPPMGPUSingle::PPPMGPUSingle(LAMMPS *lmp, int narg, char **arg) :
-  PPPMGPU<grdtyp>(lmp, narg, arg)
-{
-}
-
-/* ----------------------------------------------------------------------
-   free all memory 
------------------------------------------------------------------------- */
-
-PPPMGPUSingle::~PPPMGPUSingle()
-{
-  pppm_gpu_clear_f(poisson_time);
-}
-
-/* ----------------------------------------------------------------------
-   called once before run 
------------------------------------------------------------------------- */
-
-void PPPMGPUSingle::init()
-{
-  base_init();
-
-  if (order>8)
-    error->all("Cannot use order greater than 8 with pppm/gpu.");
-  pppm_gpu_clear_f(poisson_time);
-
-  int success;
-  grdtyp *data, *h_brick;
-  h_brick = pppm_gpu_init_f(atom->nlocal, atom->nlocal+atom->nghost, screen,
-			    order, nxlo_out, nylo_out, nzlo_out, nxhi_out,
-			    nyhi_out, nzhi_out, rho_coeff, &data, 
-			    slab_volfactor,nx_pppm,ny_pppm,nz_pppm,success);
-
-  GPU_EXTRA::check_flag(success,error,world);
-
-  density_brick =
-    create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
-		     nxlo_out,nxhi_out,"pppm:density_brick",h_brick,1);
-  vd_brick =
-    create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
-		     nxlo_out,nxhi_out,"pppm:vd_brick",data,4);
-
-  poisson_time=0;
-}
-
-/* ----------------------------------------------------------------------
-   compute the PPPMGPU long-range force, energy, virial 
------------------------------------------------------------------------- */
-
-void PPPMGPUSingle::compute(int eflag, int vflag)
-{
-  bool success = true;
-  int flag=pppm_gpu_spread_f(neighbor->ago, atom->nlocal, atom->nlocal + 
-			     atom->nghost, atom->x, atom->type, success,
-			     atom->q, domain->boxlo, delxinv, delyinv,
-			     delzinv);
-  if (!success)
-    error->one("Out of memory on GPGPU");
-  if (flag != 0)
-    error->one("Out of range atoms - cannot compute PPPM");
-
-  int i;
-
-  // convert atoms from box to lamda coords
-  
-  if (triclinic == 0) boxlo = domain->boxlo;
-  else {
-    boxlo = domain->boxlo_lamda;
-    domain->x2lamda(atom->nlocal);
-  }
-
-  energy = 0.0;
-  if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0;
-
-  double t3=MPI_Wtime();
-
-  // all procs communicate density values from their ghost cells
-  //   to fully sum contribution in their 3d bricks
-  // remap from 3d decomposition to FFT decomposition
-
-  brick2fft();
-
-  // compute potential gradient on my FFT grid and
-  //   portion of e_long on this proc's FFT grid
-  // return gradients (electric fields) in 3d brick decomposition
-  
-  poisson(eflag,vflag);
-
-  // all procs communicate E-field values to fill ghost cells
-  //   surrounding their 3d bricks
-
-  fillbrick();
-
-  poisson_time+=MPI_Wtime()-t3;
-
-  // calculate the force on my particles
-
-  grdtyp qqrd2e_scale=qqrd2e*scale;
-  pppm_gpu_interp_f(qqrd2e_scale);
-
-  // sum energy across procs and add in volume-dependent term
-
-  if (eflag) {
-    double energy_all;
-    MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
-    energy = energy_all;
-   
-    energy *= 0.5*volume;
-    energy -= g_ewald*qsqsum/1.772453851 +
-      0.5*PI*qsum*qsum / (g_ewald*g_ewald*volume);
-    energy *= qqrd2e*scale;
-  }
-
-  // sum virial across procs
-
-  if (vflag) {
-    double virial_all[6];
-    MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
-    for (i = 0; i < 6; i++) virial[i] = 0.5*qqrd2e*scale*volume*virial_all[i];
-  }
-
-  // 2d slab correction
-
-  if (slabflag) slabcorr(eflag);
-
-  // convert atoms back from lamda to box coords
-  
-  if (triclinic) domain->lamda2x(atom->nlocal);
-}
-
-/* ----------------------------------------------------------------------
-   memory usage of local arrays 
------------------------------------------------------------------------- */
-
-double PPPMGPUSingle::memory_usage()
-{
-  double bytes = nmax*3 * sizeof(double);
-  int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * 
-    (nzhi_out-nzlo_out+1);
-  bytes += 4 * nbrick * sizeof(grdtyp);
-  bytes += 6 * nfft_both * sizeof(double);
-  bytes += nfft_both*6 * sizeof(double);
-  bytes += 2 * nbuf * sizeof(double);
-  return bytes + pppm_gpu_bytes_f();
-}
--- a/src/GPU/pppm_gpu_single.h
+++ b/src/GPU/pppm_gpu_single.h
@ -1,42 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under 
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
-
-#ifdef KSPACE_CLASS
-
-KSpaceStyle(pppm/gpu/single,PPPMGPUSingle)
-
-#else
-
-#ifndef LMP_PPPM_GPU_SINGLE_H
-#define LMP_PPPM_GPU_SINGLE_H
-
-#include "pppm_gpu.h"
-#include "lmptype.h"
-
-namespace LAMMPS_NS {
-
-class PPPMGPUSingle : public PPPMGPU<float> {
- public:
-  PPPMGPUSingle(class LAMMPS *, int, char **);
-  ~PPPMGPUSingle();
-  void init();
-  void compute(int, int);
-  double memory_usage();
-
- protected:
-};
-
-}
-
-#endif
-#endif