git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6621 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2011-08-08 19:25:08 +00:00
parent e8a5fbdd3a
commit a54f7e6b5b
8 changed files with 218 additions and 1690 deletions

View File

@ -56,11 +56,7 @@ if (test $1 = 1) then
if (test -e ../pppm.cpp) then
cp pppm_gpu.cpp ..
cp pppm_gpu_single.cpp ..
cp pppm_gpu_double.cpp ..
cp pppm_gpu.h ..
cp pppm_gpu_single.h ..
cp pppm_gpu_double.h ..
fi
cp pair_lj_cut_gpu.cpp ..
@ -95,8 +91,6 @@ elif (test $1 = 0) then
fi
rm -f ../pppm_gpu.cpp
rm -f ../pppm_gpu_single.cpp
rm -f ../pppm_gpu_double.cpp
rm -f ../pair_gayberne_gpu.cpp
rm -f ../pair_resquared_gpu.cpp
rm -f ../pair_lj_cut_gpu.cpp
@ -119,8 +113,6 @@ elif (test $1 = 0) then
rm -f ../pair_lj_cut_tgpu.cpp
rm -f ../pppm_gpu.h
rm -f ../pppm_gpu_single.h
rm -f ../pppm_gpu_double.h
rm -f ../pair_gayberne_gpu.h
rm -f ../pair_resquared_gpu.h
rm -f ../pair_lj_cut_gpu.h

View File

@ -247,7 +247,7 @@ void PairCGCMMCoulMSM::compute(int eflag, int vflag)
}
}
if (vflag_fdotr) virial_compute();
if (vflag_fdotr) virial_fdotr_compute();
}
/* ---------------------------------------------------------------------- */

File diff suppressed because it is too large Load Diff

View File

@ -11,93 +11,45 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef KSPACE_CLASS
KSpaceStyle(pppm/gpu,PPPMGPU)
#else
#ifndef LMP_PPPM_GPU_H
#define LMP_PPPM_GPU_H
#include "kspace.h"
#include "lmptype.h"
#include "pppm.h"
namespace LAMMPS_NS {
template <class grdtyp>
class PPPMGPU : public KSpace {
class PPPMGPU : public PPPM {
public:
PPPMGPU(class LAMMPS *, int, char **);
virtual ~PPPMGPU();
virtual void init() = 0;
void base_init();
void setup();
virtual void compute(int, int) = 0;
void timing(int, double &, double &);
virtual double memory_usage() = 0;
virtual void init();
virtual void compute(int, int);
virtual double memory_usage();
protected:
int me,nprocs;
double PI;
double precision;
int nfactors;
int *factors;
double qsum,qsqsum;
double qqrd2e;
double cutoff;
double volume;
double delxinv,delyinv,delzinv,delvolinv;
double shift,shiftone;
int nxlo_in,nylo_in,nzlo_in,nxhi_in,nyhi_in,nzhi_in;
int nxlo_out,nylo_out,nzlo_out,nxhi_out,nyhi_out,nzhi_out;
int nxlo_ghost,nxhi_ghost,nylo_ghost,nyhi_ghost,nzlo_ghost,nzhi_ghost;
int nxlo_fft,nylo_fft,nzlo_fft,nxhi_fft,nyhi_fft,nzhi_fft;
int nlower,nupper;
int ngrid,nfft,nbuf,nfft_both;
FFT_SCALAR ***density_brick_gpu, ***vd_brick;
grdtyp ***density_brick;
grdtyp ***vd_brick;
double *greensfn;
double **vg;
double *fkx,*fky,*fkz;
double *density_fft;
double *work1,*work2;
double *buf1,*buf2;
double *gf_b;
double **rho1d,**rho_coeff;
class FFT3d *fft1,*fft2;
class Remap *remap;
int nmax;
int triclinic; // domain settings, orthog or triclinic
double *boxlo;
// TIP4P settings
int typeH,typeO; // atom types of TIP4P water H and O atoms
double qdist; // distance from O site to negative charge
double alpha; // geometric factor
void set_grid();
void allocate();
void deallocate();
int factorable(int);
double rms(double, double, bigint, double, double **);
double diffpr(double, double, double, double, double **);
void compute_gf_denom();
double gf_denom(double, double, double);
void brick2fft();
void fillbrick();
void poisson(int, int);
void procs2grid2d(int,int,int,int *, int*);
void compute_rho1d(double, double, double);
void compute_rho_coeff();
void slabcorr(int);
virtual void allocate();
virtual void deallocate();
virtual void brick2fft();
virtual void fillbrick();
virtual void poisson(int, int);
double poisson_time;
grdtyp ***create_3d_offset(int, int, int, int, int, int, const char *,
grdtyp *, int);
void destroy_3d_offset(grdtyp ***, int, int);
FFT_SCALAR ***create_3d_offset(int, int, int, int, int, int, const char *,
FFT_SCALAR *, int);
void destroy_3d_offset(FFT_SCALAR ***, int, int);
};
}
#endif
#endif

View File

@ -1,217 +0,0 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL)
------------------------------------------------------------------------- */
#include "mpi.h"
#include "string.h"
#include "stdio.h"
#include "stdlib.h"
#include "math.h"
#include "pppm_gpu_double.h"
#include "lmptype.h"
#include "atom.h"
#include "comm.h"
#include "neighbor.h"
#include "force.h"
#include "pair.h"
#include "bond.h"
#include "angle.h"
#include "domain.h"
#include "fft3d_wrap.h"
#include "remap_wrap.h"
#include "memory.h"
#include "error.h"
#include "gpu_extra.h"
#define grdtyp double
// External functions from cuda library for atom decomposition
grdtyp* pppm_gpu_init_d(const int nlocal, const int nall, FILE *screen,
const int order, const int nxlo_out,
const int nylo_out, const int nzlo_out,
const int nxhi_out, const int nyhi_out,
const int nzhi_out, double **rho_coeff,
grdtyp **_vd_brick, const double slab_volfactor,
const int nx_pppm, const int ny_pppm,
const int nz_pppm, int &success);
void pppm_gpu_clear_d(const double poisson_time);
int pppm_gpu_spread_d(const int ago, const int nlocal, const int nall,
double **host_x, int *host_type, bool &success,
double *host_q, double *boxlo, const double delxinv,
const double delyinv, const double delzinv);
void pppm_gpu_interp_d(const grdtyp qqrd2e_scale);
double pppm_gpu_bytes_d();
using namespace LAMMPS_NS;
#define MAXORDER 7
#define OFFSET 16384
#define SMALL 0.00001
#define LARGE 10000.0
#define EPS_HOC 1.0e-7
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PPPMGPUDouble::PPPMGPUDouble(LAMMPS *lmp, int narg, char **arg) :
PPPMGPU<grdtyp>(lmp, narg, arg)
{
}
/* ----------------------------------------------------------------------
free all memory
------------------------------------------------------------------------- */
PPPMGPUDouble::~PPPMGPUDouble()
{
pppm_gpu_clear_d(poisson_time);
}
/* ----------------------------------------------------------------------
called once before run
------------------------------------------------------------------------- */
void PPPMGPUDouble::init()
{
base_init();
if (order>8)
error->all("Cannot use order greater than 8 with pppm/gpu.");
pppm_gpu_clear_d(poisson_time);
int success;
grdtyp *data, *h_brick;
h_brick = pppm_gpu_init_d(atom->nlocal, atom->nlocal+atom->nghost, screen,
order, nxlo_out, nylo_out, nzlo_out, nxhi_out,
nyhi_out, nzhi_out, rho_coeff, &data,
slab_volfactor, nx_pppm, ny_pppm, nz_pppm,
success);
GPU_EXTRA::check_flag(success,error,world);
density_brick =
create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:density_brick",h_brick,1);
vd_brick =
create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:vd_brick",data,4);
poisson_time=0;
}
/* ----------------------------------------------------------------------
compute the PPPMGPU long-range force, energy, virial
------------------------------------------------------------------------- */
void PPPMGPUDouble::compute(int eflag, int vflag)
{
bool success = true;
int flag=pppm_gpu_spread_d(neighbor->ago, atom->nlocal, atom->nlocal +
atom->nghost, atom->x, atom->type, success,
atom->q, domain->boxlo, delxinv, delyinv,
delzinv);
if (!success)
error->one("Out of memory on GPGPU");
if (flag != 0)
error->one("Out of range atoms - cannot compute PPPM");
int i;
// convert atoms from box to lamda coords
if (triclinic == 0) boxlo = domain->boxlo;
else {
boxlo = domain->boxlo_lamda;
domain->x2lamda(atom->nlocal);
}
energy = 0.0;
if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0;
double t3=MPI_Wtime();
// all procs communicate density values from their ghost cells
// to fully sum contribution in their 3d bricks
// remap from 3d decomposition to FFT decomposition
brick2fft();
// compute potential gradient on my FFT grid and
// portion of e_long on this proc's FFT grid
// return gradients (electric fields) in 3d brick decomposition
poisson(eflag,vflag);
// all procs communicate E-field values to fill ghost cells
// surrounding their 3d bricks
fillbrick();
poisson_time+=MPI_Wtime()-t3;
// calculate the force on my particles
grdtyp qqrd2e_scale=qqrd2e*scale;
pppm_gpu_interp_d(qqrd2e_scale);
// sum energy across procs and add in volume-dependent term
if (eflag) {
double energy_all;
MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
energy = energy_all;
energy *= 0.5*volume;
energy -= g_ewald*qsqsum/1.772453851 +
0.5*PI*qsum*qsum / (g_ewald*g_ewald*volume);
energy *= qqrd2e*scale;
}
// sum virial across procs
if (vflag) {
double virial_all[6];
MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
for (i = 0; i < 6; i++) virial[i] = 0.5*qqrd2e*scale*volume*virial_all[i];
}
// 2d slab correction
if (slabflag) slabcorr(eflag);
// convert atoms back from lamda to box coords
if (triclinic) domain->lamda2x(atom->nlocal);
}
/* ----------------------------------------------------------------------
memory usage of local arrays
------------------------------------------------------------------------- */
double PPPMGPUDouble::memory_usage()
{
double bytes = nmax*3 * sizeof(double);
int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
(nzhi_out-nzlo_out+1);
bytes += 4 * nbrick * sizeof(grdtyp);
bytes += 6 * nfft_both * sizeof(double);
bytes += nfft_both*6 * sizeof(double);
bytes += 2 * nbuf * sizeof(double);
return bytes + pppm_gpu_bytes_d();
}

View File

@ -1,42 +0,0 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef KSPACE_CLASS
KSpaceStyle(pppm/gpu/double,PPPMGPUDouble)
#else
#ifndef LMP_PPPM_GPU_DOUBLE_H
#define LMP_PPPM_GPU_DOUBLE_H
#include "pppm_gpu.h"
#include "lmptype.h"
namespace LAMMPS_NS {
class PPPMGPUDouble : public PPPMGPU<double> {
public:
PPPMGPUDouble(class LAMMPS *, int, char **);
~PPPMGPUDouble();
void init();
void compute(int, int);
double memory_usage();
protected:
};
}
#endif
#endif

View File

@ -1,216 +0,0 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL)
------------------------------------------------------------------------- */
#include "mpi.h"
#include "string.h"
#include "stdio.h"
#include "stdlib.h"
#include "math.h"
#include "pppm_gpu_single.h"
#include "lmptype.h"
#include "atom.h"
#include "comm.h"
#include "neighbor.h"
#include "force.h"
#include "pair.h"
#include "bond.h"
#include "angle.h"
#include "domain.h"
#include "fft3d_wrap.h"
#include "remap_wrap.h"
#include "memory.h"
#include "error.h"
#include "gpu_extra.h"
#define grdtyp float
// External functions from cuda library for atom decomposition
grdtyp* pppm_gpu_init_f(const int nlocal, const int nall, FILE *screen,
const int order, const int nxlo_out,
const int nylo_out, const int nzlo_out,
const int nxhi_out, const int nyhi_out,
const int nzhi_out, double **rho_coeff,
grdtyp **_vd_brick, const double slab_volfactor,
const int nx_pppm, const int ny_pppm,
const int nz_pppm, int &success);
void pppm_gpu_clear_f(const double poisson_time);
int pppm_gpu_spread_f(const int ago, const int nlocal, const int nall,
double **host_x, int *host_type, bool &success,
double *host_q, double *boxlo, const double delxinv,
const double delyinv, const double delzinv);
void pppm_gpu_interp_f(const grdtyp qqrd2e_scale);
double pppm_gpu_bytes_f();
using namespace LAMMPS_NS;
#define MAXORDER 7
#define OFFSET 16384
#define SMALL 0.00001
#define LARGE 10000.0
#define EPS_HOC 1.0e-7
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
/* ---------------------------------------------------------------------- */
PPPMGPUSingle::PPPMGPUSingle(LAMMPS *lmp, int narg, char **arg) :
PPPMGPU<grdtyp>(lmp, narg, arg)
{
}
/* ----------------------------------------------------------------------
free all memory
------------------------------------------------------------------------- */
PPPMGPUSingle::~PPPMGPUSingle()
{
pppm_gpu_clear_f(poisson_time);
}
/* ----------------------------------------------------------------------
called once before run
------------------------------------------------------------------------- */
void PPPMGPUSingle::init()
{
base_init();
if (order>8)
error->all("Cannot use order greater than 8 with pppm/gpu.");
pppm_gpu_clear_f(poisson_time);
int success;
grdtyp *data, *h_brick;
h_brick = pppm_gpu_init_f(atom->nlocal, atom->nlocal+atom->nghost, screen,
order, nxlo_out, nylo_out, nzlo_out, nxhi_out,
nyhi_out, nzhi_out, rho_coeff, &data,
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,success);
GPU_EXTRA::check_flag(success,error,world);
density_brick =
create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:density_brick",h_brick,1);
vd_brick =
create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:vd_brick",data,4);
poisson_time=0;
}
/* ----------------------------------------------------------------------
compute the PPPMGPU long-range force, energy, virial
------------------------------------------------------------------------- */
void PPPMGPUSingle::compute(int eflag, int vflag)
{
bool success = true;
int flag=pppm_gpu_spread_f(neighbor->ago, atom->nlocal, atom->nlocal +
atom->nghost, atom->x, atom->type, success,
atom->q, domain->boxlo, delxinv, delyinv,
delzinv);
if (!success)
error->one("Out of memory on GPGPU");
if (flag != 0)
error->one("Out of range atoms - cannot compute PPPM");
int i;
// convert atoms from box to lamda coords
if (triclinic == 0) boxlo = domain->boxlo;
else {
boxlo = domain->boxlo_lamda;
domain->x2lamda(atom->nlocal);
}
energy = 0.0;
if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0;
double t3=MPI_Wtime();
// all procs communicate density values from their ghost cells
// to fully sum contribution in their 3d bricks
// remap from 3d decomposition to FFT decomposition
brick2fft();
// compute potential gradient on my FFT grid and
// portion of e_long on this proc's FFT grid
// return gradients (electric fields) in 3d brick decomposition
poisson(eflag,vflag);
// all procs communicate E-field values to fill ghost cells
// surrounding their 3d bricks
fillbrick();
poisson_time+=MPI_Wtime()-t3;
// calculate the force on my particles
grdtyp qqrd2e_scale=qqrd2e*scale;
pppm_gpu_interp_f(qqrd2e_scale);
// sum energy across procs and add in volume-dependent term
if (eflag) {
double energy_all;
MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
energy = energy_all;
energy *= 0.5*volume;
energy -= g_ewald*qsqsum/1.772453851 +
0.5*PI*qsum*qsum / (g_ewald*g_ewald*volume);
energy *= qqrd2e*scale;
}
// sum virial across procs
if (vflag) {
double virial_all[6];
MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
for (i = 0; i < 6; i++) virial[i] = 0.5*qqrd2e*scale*volume*virial_all[i];
}
// 2d slab correction
if (slabflag) slabcorr(eflag);
// convert atoms back from lamda to box coords
if (triclinic) domain->lamda2x(atom->nlocal);
}
/* ----------------------------------------------------------------------
memory usage of local arrays
------------------------------------------------------------------------- */
double PPPMGPUSingle::memory_usage()
{
double bytes = nmax*3 * sizeof(double);
int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
(nzhi_out-nzlo_out+1);
bytes += 4 * nbrick * sizeof(grdtyp);
bytes += 6 * nfft_both * sizeof(double);
bytes += nfft_both*6 * sizeof(double);
bytes += 2 * nbuf * sizeof(double);
return bytes + pppm_gpu_bytes_f();
}

View File

@ -1,42 +0,0 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef KSPACE_CLASS
KSpaceStyle(pppm/gpu/single,PPPMGPUSingle)
#else
#ifndef LMP_PPPM_GPU_SINGLE_H
#define LMP_PPPM_GPU_SINGLE_H
#include "pppm_gpu.h"
#include "lmptype.h"
namespace LAMMPS_NS {
class PPPMGPUSingle : public PPPMGPU<float> {
public:
PPPMGPUSingle(class LAMMPS *, int, char **);
~PPPMGPUSingle();
void init();
void compute(int, int);
double memory_usage();
protected:
};
}
#endif
#endif