forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@9176 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
c8624bc7eb
commit
54f60bf717
|
@ -49,6 +49,9 @@ namespace GPU_EXTRA {
|
|||
else if (all_success == -8)
|
||||
error->all(FLERR,
|
||||
"GPU particle split must be set to 1 for this pair style.");
|
||||
else if (all_success == -9)
|
||||
error->all(FLERR,
|
||||
"CPU neighbor lists must be used for ellipsoid/sphere mix.");
|
||||
else
|
||||
error->all(FLERR,"Unknown error in GPU library");
|
||||
}
|
||||
|
@ -102,6 +105,11 @@ E: GPU particle split must be set to 1 for this pair style.
|
|||
For this pair style, you cannot run part of the force calculation on
|
||||
the host. See the package command.
|
||||
|
||||
E: CPU neighbor lists must be used for ellipsoid/sphere mix
|
||||
|
||||
When using Gay-Berne or RE-squared pair styles with both ellipsoidal and
|
||||
spherical particles, the neighbor list must be built on the CPU
|
||||
|
||||
E: Unknown error in GPU library
|
||||
|
||||
Self-explanatory.
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
#include "error.h"
|
||||
#include "update.h"
|
||||
#include "universe.h"
|
||||
#include "fix.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
|
@ -120,9 +121,6 @@ void PPPMGPU::init()
|
|||
// NOTE: could free density_brick and vdxyz_brick after PPPM allocates them,
|
||||
// before allocating db_gpu and vd_brick down below, if don't need,
|
||||
// if do this, make sure to set them to NULL
|
||||
// NOTE: delete/realloc of cg necessary b/c packing 4 values per grid pt,
|
||||
// not 3 as PPPM does - probably a better way to account for this
|
||||
// in PPPM::init()
|
||||
|
||||
destroy_3d_offset(density_brick_gpu,nzlo_out,nylo_out);
|
||||
destroy_3d_offset(vd_brick,nzlo_out,nylo_out);
|
||||
|
@ -130,15 +128,11 @@ void PPPMGPU::init()
|
|||
|
||||
PPPM::init();
|
||||
|
||||
if (differentiation_flag == 0) {
|
||||
delete cg;
|
||||
int (*procneigh)[2] = comm->procneigh;
|
||||
cg = new CommGrid(lmp,world,4,1,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
|
||||
procneigh[0][0],procneigh[0][1],procneigh[1][0],
|
||||
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
|
||||
}
|
||||
// insure no conflict with fix balance
|
||||
|
||||
for (int i = 0; i < modify->nfix; i++)
|
||||
if (strcmp(modify->fix[i]->style,"balance") == 0)
|
||||
error->all(FLERR,"Cannot currently use pppm/gpu with fix balance.");
|
||||
|
||||
// unsupported option
|
||||
|
||||
|
@ -189,6 +183,8 @@ void PPPMGPU::init()
|
|||
|
||||
void PPPMGPU::compute(int eflag, int vflag)
|
||||
{
|
||||
int i,j;
|
||||
|
||||
int nago;
|
||||
if (kspace_split) {
|
||||
if (im_real_space) return;
|
||||
|
@ -205,8 +201,12 @@ void PPPMGPU::compute(int eflag, int vflag)
|
|||
else evflag = evflag_atom = eflag_global = vflag_global =
|
||||
eflag_atom = vflag_atom = 0;
|
||||
|
||||
// If need per-atom energies/virials, also do particle map on host
|
||||
// concurrently with GPU calculations
|
||||
if (evflag_atom && !peratom_allocate_flag) {
|
||||
allocate_peratom();
|
||||
cg_peratom->ghost_notify();
|
||||
cg_peratom->setup();
|
||||
peratom_allocate_flag = 1;
|
||||
}
|
||||
|
||||
|
@ -220,18 +220,6 @@ void PPPMGPU::compute(int eflag, int vflag)
|
|||
if (flag != 0)
|
||||
error->one(FLERR,"Out of range atoms - cannot compute PPPM");
|
||||
|
||||
// If need per-atom energies/virials, also do particle map on host
|
||||
// concurrently with GPU calculations
|
||||
|
||||
if (evflag_atom) {
|
||||
memory->destroy(part2grid);
|
||||
nmax = atom->nmax;
|
||||
memory->create(part2grid,nmax,3,"pppm:part2grid");
|
||||
particle_map();
|
||||
}
|
||||
|
||||
int i,j;
|
||||
|
||||
// convert atoms from box to lamda coords
|
||||
|
||||
if (triclinic == 0) boxlo = domain->boxlo;
|
||||
|
@ -240,6 +228,15 @@ void PPPMGPU::compute(int eflag, int vflag)
|
|||
domain->x2lamda(atom->nlocal);
|
||||
}
|
||||
|
||||
// extend size of per-atom arrays if necessary
|
||||
|
||||
if (evflag_atom && atom->nlocal > nmax) {
|
||||
memory->destroy(part2grid);
|
||||
nmax = atom->nmax;
|
||||
memory->create(part2grid,nmax,3,"pppm:part2grid");
|
||||
particle_map();
|
||||
}
|
||||
|
||||
double t3 = MPI_Wtime();
|
||||
|
||||
// all procs communicate density values from their ghost cells
|
||||
|
@ -280,24 +277,7 @@ void PPPMGPU::compute(int eflag, int vflag)
|
|||
// per-atom energy/virial
|
||||
// energy includes self-energy correction
|
||||
|
||||
if (evflag_atom) {
|
||||
double *q = atom->q;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
if (eflag_atom) {
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
eatom[i] *= 0.5;
|
||||
eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
|
||||
(g_ewald*g_ewald*volume);
|
||||
eatom[i] *= qscale;
|
||||
}
|
||||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
for (i = 0; i < nlocal; i++)
|
||||
for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
|
||||
}
|
||||
}
|
||||
if (evflag_atom) fieldforce_peratom();
|
||||
|
||||
// sum energy across procs and add in volume-dependent term
|
||||
|
||||
|
@ -320,6 +300,28 @@ void PPPMGPU::compute(int eflag, int vflag)
|
|||
for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
|
||||
}
|
||||
|
||||
// per-atom energy/virial
|
||||
// energy includes self-energy correction
|
||||
|
||||
if (evflag_atom) {
|
||||
double *q = atom->q;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
if (eflag_atom) {
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
eatom[i] *= 0.5;
|
||||
eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
|
||||
(g_ewald*g_ewald*volume);
|
||||
eatom[i] *= qscale;
|
||||
}
|
||||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
for (i = 0; i < nlocal; i++)
|
||||
for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
|
||||
}
|
||||
}
|
||||
|
||||
// 2d slab correction
|
||||
|
||||
if (slabflag) slabcorr();
|
||||
|
@ -555,7 +557,7 @@ void PPPMGPU::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
|||
|
||||
if (flag == FORWARD_IK) {
|
||||
int offset;
|
||||
FFT_SCALAR *dest = &vdx_brick[nzlo_out][nylo_out][4*nxlo_out];
|
||||
FFT_SCALAR *dest = &vd_brick[nzlo_out][nylo_out][4*nxlo_out];
|
||||
for (int i = 0; i < nlist; i++) {
|
||||
offset = 4*list[i];
|
||||
dest[offset++] = buf[n++];
|
||||
|
@ -565,7 +567,7 @@ void PPPMGPU::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
|||
} else if (flag == FORWARD_AD) {
|
||||
FFT_SCALAR *dest = &u_brick[nzlo_out][nylo_out][nxlo_out];
|
||||
for (int i = 0; i < nlist; i++)
|
||||
dest[list[i]] = buf[n++];
|
||||
dest[list[i]] = buf[i];
|
||||
} else if (flag == FORWARD_IK_PERATOM) {
|
||||
FFT_SCALAR *esrc = &u_brick[nzlo_out][nylo_out][nxlo_out];
|
||||
FFT_SCALAR *v0src = &v0_brick[nzlo_out][nylo_out][nxlo_out];
|
||||
|
@ -690,16 +692,30 @@ double PPPMGPU::memory_usage()
|
|||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
perform and time the 4 FFTs required for N timesteps
|
||||
perform and time the 1d FFTs required for N timesteps
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int PPPMGPU::timing(int n, double &time3d, double &time1d) {
|
||||
int PPPMGPU::timing_1d(int n, double &time1d)
|
||||
{
|
||||
if (im_real_space) {
|
||||
time3d = 1.0;
|
||||
time1d = 1.0;
|
||||
return 4;
|
||||
}
|
||||
PPPM::timing(n,time3d,time1d);
|
||||
PPPM::timing_1d(n,time1d);
|
||||
return 4;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
perform and time the 3d FFTs required for N timesteps
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int PPPMGPU::timing_3d(int n, double &time3d)
|
||||
{
|
||||
if (im_real_space) {
|
||||
time3d = 1.0;
|
||||
return 4;
|
||||
}
|
||||
PPPM::timing_3d(n,time3d);
|
||||
return 4;
|
||||
}
|
||||
|
||||
|
|
|
@ -31,7 +31,8 @@ class PPPMGPU : public PPPM {
|
|||
void init();
|
||||
void setup();
|
||||
void compute(int, int);
|
||||
int timing(int, double &, double &);
|
||||
int timing_1d(int, double &);
|
||||
int timing_3d(int, double &);
|
||||
double memory_usage();
|
||||
|
||||
protected:
|
||||
|
@ -67,6 +68,10 @@ Self-explanatory. Check the input script syntax and compare to the
|
|||
documentation for the command. You can use -echo screen as a
|
||||
command-line option when running LAMMPS to see the offending line.
|
||||
|
||||
E: Cannot currently use pppm/gpu with fix balance.
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Cannot (yet) do analytic differentiation with pppm/gpu.
|
||||
|
||||
Self-explanatory.
|
||||
|
|
Loading…
Reference in New Issue