git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@9176 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2013-01-02 16:27:38 +00:00
parent c8624bc7eb
commit 54f60bf717
3 changed files with 78 additions and 49 deletions

View File

@ -49,6 +49,9 @@ namespace GPU_EXTRA {
else if (all_success == -8)
error->all(FLERR,
"GPU particle split must be set to 1 for this pair style.");
else if (all_success == -9)
error->all(FLERR,
"CPU neighbor lists must be used for ellipsoid/sphere mix.");
else
error->all(FLERR,"Unknown error in GPU library");
}
@ -102,6 +105,11 @@ E: GPU particle split must be set to 1 for this pair style.
For this pair style, you cannot run part of the force calculation on
the host. See the package command.
E: CPU neighbor lists must be used for ellipsoid/sphere mix
When using Gay-Berne or RE-squared pair styles with both ellipsoidal and
spherical particles, the neighbor list must be built on the CPU
E: Unknown error in GPU library
Self-explanatory.

View File

@ -39,6 +39,7 @@
#include "error.h"
#include "update.h"
#include "universe.h"
#include "fix.h"
using namespace LAMMPS_NS;
using namespace MathConst;
@ -120,9 +121,6 @@ void PPPMGPU::init()
// NOTE: could free density_brick and vdxyz_brick after PPPM allocates them,
// before allocating db_gpu and vd_brick down below, if don't need,
// if do this, make sure to set them to NULL
// NOTE: delete/realloc of cg necessary b/c packing 4 values per grid pt,
// not 3 as PPPM does - probably a better way to account for this
// in PPPM::init()
destroy_3d_offset(density_brick_gpu,nzlo_out,nylo_out);
destroy_3d_offset(vd_brick,nzlo_out,nylo_out);
@ -130,15 +128,11 @@ void PPPMGPU::init()
PPPM::init();
if (differentiation_flag == 0) {
delete cg;
int (*procneigh)[2] = comm->procneigh;
cg = new CommGrid(lmp,world,4,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
}
// insure no conflict with fix balance
for (int i = 0; i < modify->nfix; i++)
if (strcmp(modify->fix[i]->style,"balance") == 0)
error->all(FLERR,"Cannot currently use pppm/gpu with fix balance.");
// unsupported option
@ -189,6 +183,8 @@ void PPPMGPU::init()
void PPPMGPU::compute(int eflag, int vflag)
{
int i,j;
int nago;
if (kspace_split) {
if (im_real_space) return;
@ -205,8 +201,12 @@ void PPPMGPU::compute(int eflag, int vflag)
else evflag = evflag_atom = eflag_global = vflag_global =
eflag_atom = vflag_atom = 0;
// If need per-atom energies/virials, also do particle map on host
// concurrently with GPU calculations
if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom->ghost_notify();
cg_peratom->setup();
peratom_allocate_flag = 1;
}
@ -220,18 +220,6 @@ void PPPMGPU::compute(int eflag, int vflag)
if (flag != 0)
error->one(FLERR,"Out of range atoms - cannot compute PPPM");
// If need per-atom energies/virials, also do particle map on host
// concurrently with GPU calculations
if (evflag_atom) {
memory->destroy(part2grid);
nmax = atom->nmax;
memory->create(part2grid,nmax,3,"pppm:part2grid");
particle_map();
}
int i,j;
// convert atoms from box to lamda coords
if (triclinic == 0) boxlo = domain->boxlo;
@ -240,6 +228,15 @@ void PPPMGPU::compute(int eflag, int vflag)
domain->x2lamda(atom->nlocal);
}
// extend size of per-atom arrays if necessary
if (evflag_atom && atom->nlocal > nmax) {
memory->destroy(part2grid);
nmax = atom->nmax;
memory->create(part2grid,nmax,3,"pppm:part2grid");
particle_map();
}
double t3 = MPI_Wtime();
// all procs communicate density values from their ghost cells
@ -280,24 +277,7 @@ void PPPMGPU::compute(int eflag, int vflag)
// per-atom energy/virial
// energy includes self-energy correction
if (evflag_atom) {
double *q = atom->q;
int nlocal = atom->nlocal;
if (eflag_atom) {
for (i = 0; i < nlocal; i++) {
eatom[i] *= 0.5;
eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
(g_ewald*g_ewald*volume);
eatom[i] *= qscale;
}
}
if (vflag_atom) {
for (i = 0; i < nlocal; i++)
for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
}
}
if (evflag_atom) fieldforce_peratom();
// sum energy across procs and add in volume-dependent term
@ -320,6 +300,28 @@ void PPPMGPU::compute(int eflag, int vflag)
for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
}
// per-atom energy/virial
// energy includes self-energy correction
if (evflag_atom) {
double *q = atom->q;
int nlocal = atom->nlocal;
if (eflag_atom) {
for (i = 0; i < nlocal; i++) {
eatom[i] *= 0.5;
eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
(g_ewald*g_ewald*volume);
eatom[i] *= qscale;
}
}
if (vflag_atom) {
for (i = 0; i < nlocal; i++)
for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
}
}
// 2d slab correction
if (slabflag) slabcorr();
@ -555,7 +557,7 @@ void PPPMGPU::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
if (flag == FORWARD_IK) {
int offset;
FFT_SCALAR *dest = &vdx_brick[nzlo_out][nylo_out][4*nxlo_out];
FFT_SCALAR *dest = &vd_brick[nzlo_out][nylo_out][4*nxlo_out];
for (int i = 0; i < nlist; i++) {
offset = 4*list[i];
dest[offset++] = buf[n++];
@ -565,7 +567,7 @@ void PPPMGPU::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
} else if (flag == FORWARD_AD) {
FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++)
dest[list[i]] = buf[n++];
dest[list[i]] = buf[i];
} else if (flag == FORWARD_IK_PERATOM) {
FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
@ -690,16 +692,30 @@ double PPPMGPU::memory_usage()
}
/* ----------------------------------------------------------------------
perform and time the 4 FFTs required for N timesteps
perform and time the 1d FFTs required for N timesteps
------------------------------------------------------------------------- */
int PPPMGPU::timing(int n, double &time3d, double &time1d) {
int PPPMGPU::timing_1d(int n, double &time1d)
{
if (im_real_space) {
time3d = 1.0;
time1d = 1.0;
return 4;
}
PPPM::timing(n,time3d,time1d);
PPPM::timing_1d(n,time1d);
return 4;
}
/* ----------------------------------------------------------------------
perform and time the 3d FFTs required for N timesteps
------------------------------------------------------------------------- */
int PPPMGPU::timing_3d(int n, double &time3d)
{
if (im_real_space) {
time3d = 1.0;
return 4;
}
PPPM::timing_3d(n,time3d);
return 4;
}

View File

@ -31,7 +31,8 @@ class PPPMGPU : public PPPM {
void init();
void setup();
void compute(int, int);
int timing(int, double &, double &);
int timing_1d(int, double &);
int timing_3d(int, double &);
double memory_usage();
protected:
@ -67,6 +68,10 @@ Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Cannot currently use pppm/gpu with fix balance.
Self-explanatory.
E: Cannot (yet) do analytic differentiation with pppm/gpu.
Self-explanatory.