forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6068 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
6df2d87723
commit
b8f1ff821f
|
@ -0,0 +1,47 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Mike Brown (ORNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_GPU_EXTRA_H
|
||||
#define LMP_GPU_EXTRA_H
|
||||
|
||||
#include "error.h"
|
||||
|
||||
namespace GPU_EXTRA {
|
||||
|
||||
inline void check_flag(int error_flag, LAMMPS_NS::Error *error,
|
||||
MPI_Comm &world) {
|
||||
int all_success;
|
||||
MPI_Allreduce(&error_flag, &all_success, 1, MPI_INT, MPI_MIN, world);
|
||||
if (all_success != 0) {
|
||||
if (all_success == -1)
|
||||
error->all("Accelerated style in input script but no fix gpu.");
|
||||
else if (all_success == -2)
|
||||
error->all("Could not find/initialize a specified accelerator device.");
|
||||
else if (all_success == -3)
|
||||
error->all("Insufficient memory on accelerator.");
|
||||
else if (all_success == -4)
|
||||
error->all("GPU library not compiled for this accelerator.");
|
||||
else if (all_success == -5)
|
||||
error->all("Double precision is not supported on this accelerator.");
|
||||
else
|
||||
error->all("Unknown error in GPU library.");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,236 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Inderaj Bains (NVIDIA), ibains@nvidia.com
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "pair_lj_expand_gpu.h"
|
||||
#include "lmptype.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "integrate.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "neigh_request.h"
|
||||
#include "universe.h"
|
||||
#include "update.h"
|
||||
#include "domain.h"
|
||||
#include "string.h"
|
||||
#include "gpu_extra.h"
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
// External functions from cuda library for atom decomposition
|
||||
|
||||
int lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **offset, double **shift, double *special_lj,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size, int &gpu_mode,
|
||||
FILE *screen);
|
||||
void lje_gpu_clear();
|
||||
int ** lje_gpu_compute_n(const int ago, const int inum, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum,
|
||||
const double cpu_time, bool &success);
|
||||
void lje_gpu_compute(const int ago, const int inum, const int nall,
|
||||
double **host_x, int *host_type, int *ilist, int *numj,
|
||||
int **firstneigh, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success);
|
||||
double lje_gpu_bytes();
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJExpandGPU::PairLJExpandGPU(LAMMPS *lmp) : PairLJExpand(lmp), gpu_mode(GPU_PAIR)
|
||||
{
|
||||
respa_enable = 0;
|
||||
cpu_time = 0.0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
free all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
PairLJExpandGPU::~PairLJExpandGPU()
|
||||
{
|
||||
lje_gpu_clear();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJExpandGPU::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
int nall = atom->nlocal + atom->nghost;
|
||||
int inum, host_start;
|
||||
|
||||
bool success = true;
|
||||
int *ilist, *numneigh, **firstneigh;
|
||||
if (gpu_mode == GPU_NEIGH) {
|
||||
inum = atom->nlocal;
|
||||
firstneigh = lje_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
|
||||
atom->type, domain->sublo, domain->subhi,
|
||||
atom->tag, atom->nspecial, atom->special,
|
||||
eflag, vflag, eflag_atom, vflag_atom,
|
||||
host_start, &ilist, &numneigh, cpu_time,
|
||||
success);
|
||||
} else {
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
lje_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
|
||||
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
|
||||
vflag_atom, host_start, cpu_time, success);
|
||||
}
|
||||
if (!success)
|
||||
error->one("Out of memory on GPGPU");
|
||||
|
||||
if (host_start<inum) {
|
||||
cpu_time = MPI_Wtime();
|
||||
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
|
||||
cpu_time = MPI_Wtime() - cpu_time;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJExpandGPU::init_style()
|
||||
{
|
||||
if (force->newton_pair)
|
||||
error->all("Cannot use newton pair with GPU LJ pair style");
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double maxcut = -1.0;
|
||||
double cut;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
cut = init_one(i,j);
|
||||
cut *= cut;
|
||||
if (cut > maxcut)
|
||||
maxcut = cut;
|
||||
cutsq[i][j] = cutsq[j][i] = cut;
|
||||
} else
|
||||
cutsq[i][j] = cutsq[j][i] = 0.0;
|
||||
}
|
||||
}
|
||||
double cell_size = sqrt(maxcut) + neighbor->skin;
|
||||
|
||||
int maxspecial=0;
|
||||
if (atom->molecular)
|
||||
maxspecial=atom->maxspecial;
|
||||
int success = lje_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
|
||||
offset, shift, force->special_lj, atom->nlocal,
|
||||
atom->nlocal+atom->nghost, 300, maxspecial,
|
||||
cell_size, gpu_mode, screen);
|
||||
GPU_EXTRA::check_flag(success,error,world);
|
||||
|
||||
if (gpu_mode != GPU_NEIGH) {
|
||||
int irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double PairLJExpandGPU::memory_usage()
|
||||
{
|
||||
double bytes = Pair::memory_usage();
|
||||
return bytes + lje_gpu_bytes();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJExpandGPU::cpu_compute(int start, int inum, int eflag, int vflag,
|
||||
int *ilist, int *numneigh, int **firstneigh)
|
||||
{
|
||||
int i,j,ii,jj,jnum,itype,jtype;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
|
||||
double rsq,r2inv,r6inv,forcelj,factor_lj;
|
||||
double r,rshift,rshiftsq;
|
||||
int *jlist;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
double *special_lj = force->special_lj;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (ii = start; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
factor_lj = special_lj[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
r = sqrt(rsq);
|
||||
rshift = r - shift[itype][jtype];
|
||||
rshiftsq = rshift*rshift;
|
||||
r2inv = 1.0/rshiftsq;
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
|
||||
fpair = factor_lj*forcelj/rshift/r;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
|
||||
if (eflag) {
|
||||
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
|
||||
offset[itype][jtype];
|
||||
evdwl *= factor_lj;
|
||||
}
|
||||
|
||||
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj/expand/gpu,PairLJExpandGPU)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_LJE_LIGHT_GPU_H
|
||||
#define LMP_PAIR_LJE_LIGHT_GPU_H
|
||||
|
||||
#include "pair_lj_expand.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairLJExpandGPU : public PairLJExpand {
|
||||
public:
|
||||
PairLJExpandGPU(LAMMPS *lmp);
|
||||
~PairLJExpandGPU();
|
||||
void cpu_compute(int, int, int, int, int *, int *, int **);
|
||||
void compute(int, int);
|
||||
void init_style();
|
||||
double memory_usage();
|
||||
|
||||
enum { GPU_PAIR, GPU_NEIGH };
|
||||
|
||||
private:
|
||||
int gpu_mode;
|
||||
double cpu_time;
|
||||
int *gpulist;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -0,0 +1,230 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Mike Brown (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "pair_morse_gpu.h"
|
||||
#include "lmptype.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "integrate.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "neigh_request.h"
|
||||
#include "universe.h"
|
||||
#include "update.h"
|
||||
#include "domain.h"
|
||||
#include "string.h"
|
||||
#include "gpu_extra.h"
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
// External functions from cuda library for atom decomposition
|
||||
|
||||
int mor_gpu_init(const int ntypes, double **cutsq, double **host_morse1,
|
||||
double **host_r0, double **host_alpha, double **host_d0,
|
||||
double **offset, double *special_lj, const int nlocal,
|
||||
const int nall, const int max_nbors, const int maxspecial,
|
||||
const double cell_size, int &gpu_mode, FILE *screen);
|
||||
void mor_gpu_clear();
|
||||
int ** mor_gpu_compute_n(const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum,
|
||||
const double cpu_time, bool &success);
|
||||
void mor_gpu_compute(const int ago, const int inum, const int nall,
|
||||
double **host_x, int *host_type, int *ilist, int *numj,
|
||||
int **firstneigh, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success);
|
||||
double mor_gpu_bytes();
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairMorseGPU::PairMorseGPU(LAMMPS *lmp) : PairMorse(lmp), gpu_mode(GPU_PAIR)
|
||||
{
|
||||
cpu_time = 0.0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
free all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
PairMorseGPU::~PairMorseGPU()
|
||||
{
|
||||
mor_gpu_clear();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairMorseGPU::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
int nall = atom->nlocal + atom->nghost;
|
||||
int inum, host_start;
|
||||
|
||||
bool success = true;
|
||||
int *ilist, *numneigh, **firstneigh;
|
||||
if (gpu_mode == GPU_NEIGH) {
|
||||
inum = atom->nlocal;
|
||||
firstneigh = mor_gpu_compute_n(neighbor->ago, inum, nall,
|
||||
atom->x, atom->type, domain->sublo,
|
||||
domain->subhi, atom->tag, atom->nspecial,
|
||||
atom->special, eflag, vflag, eflag_atom,
|
||||
vflag_atom, host_start, &ilist, &numneigh,
|
||||
cpu_time, success);
|
||||
} else {
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
mor_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
|
||||
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
|
||||
vflag_atom, host_start, cpu_time, success);
|
||||
}
|
||||
if (!success)
|
||||
error->one("Out of memory on GPGPU");
|
||||
|
||||
if (host_start<inum) {
|
||||
cpu_time = MPI_Wtime();
|
||||
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
|
||||
cpu_time = MPI_Wtime() - cpu_time;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairMorseGPU::init_style()
|
||||
{
|
||||
if (force->newton_pair)
|
||||
error->all("Cannot use newton pair with GPU Morse pair style");
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double maxcut = -1.0;
|
||||
double cut;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
cut = init_one(i,j);
|
||||
cut *= cut;
|
||||
if (cut > maxcut)
|
||||
maxcut = cut;
|
||||
cutsq[i][j] = cutsq[j][i] = cut;
|
||||
} else
|
||||
cutsq[i][j] = cutsq[j][i] = 0.0;
|
||||
}
|
||||
}
|
||||
double cell_size = sqrt(maxcut) + neighbor->skin;
|
||||
|
||||
int maxspecial=0;
|
||||
if (atom->molecular)
|
||||
maxspecial=atom->maxspecial;
|
||||
int success = mor_gpu_init(atom->ntypes+1, cutsq, morse1, r0, alpha, d0,
|
||||
offset, force->special_lj, atom->nlocal,
|
||||
atom->nlocal+atom->nghost, 300, maxspecial,
|
||||
cell_size, gpu_mode, screen);
|
||||
GPU_EXTRA::check_flag(success,error,world);
|
||||
|
||||
if (gpu_mode != GPU_NEIGH) {
|
||||
int irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double PairMorseGPU::memory_usage()
|
||||
{
|
||||
double bytes = Pair::memory_usage();
|
||||
return bytes + mor_gpu_bytes();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairMorseGPU::cpu_compute(int start, int inum, int eflag, int vflag,
|
||||
int *ilist, int *numneigh, int **firstneigh)
|
||||
{
|
||||
int i,j,ii,jj,jnum,itype,jtype;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
|
||||
double rsq,r,dr,dexp,factor_lj;
|
||||
int *jlist;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
double *special_lj = force->special_lj;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (ii = start; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
factor_lj = special_lj[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
r = sqrt(rsq);
|
||||
dr = r - r0[itype][jtype];
|
||||
dexp = exp(-alpha[itype][jtype] * dr);
|
||||
fpair = factor_lj * morse1[itype][jtype] * (dexp*dexp - dexp) / r;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
|
||||
if (eflag) {
|
||||
evdwl = d0[itype][jtype] * (dexp*dexp - 2.0*dexp) -
|
||||
offset[itype][jtype];
|
||||
evdwl *= factor_lj;
|
||||
}
|
||||
|
||||
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(morse/gpu,PairMorseGPU)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_MORSE_GPU_H
|
||||
#define LMP_PAIR_MORSE_GPU_H
|
||||
|
||||
#include "pair_morse.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairMorseGPU : public PairMorse {
|
||||
public:
|
||||
PairMorseGPU(LAMMPS *lmp);
|
||||
~PairMorseGPU();
|
||||
void cpu_compute(int, int, int, int, int *, int *, int **);
|
||||
void compute(int, int);
|
||||
void init_style();
|
||||
double memory_usage();
|
||||
|
||||
enum { GPU_PAIR, GPU_NEIGH };
|
||||
|
||||
private:
|
||||
int gpu_mode;
|
||||
double cpu_time;
|
||||
int *gpulist;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,103 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_PPPM_GPU_H
|
||||
#define LMP_PPPM_GPU_H
|
||||
|
||||
#include "kspace.h"
|
||||
#include "lmptype.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template <class grdtyp>
|
||||
class PPPMGPU : public KSpace {
|
||||
public:
|
||||
PPPMGPU(class LAMMPS *, int, char **);
|
||||
virtual ~PPPMGPU();
|
||||
virtual void init() = 0;
|
||||
void base_init();
|
||||
void setup();
|
||||
virtual void compute(int, int) = 0;
|
||||
void timing(int, double &, double &);
|
||||
virtual double memory_usage() = 0;
|
||||
|
||||
protected:
|
||||
int me,nprocs;
|
||||
double PI;
|
||||
double precision;
|
||||
int nfactors;
|
||||
int *factors;
|
||||
double qsum,qsqsum;
|
||||
double qqrd2e;
|
||||
double cutoff;
|
||||
double volume;
|
||||
double delxinv,delyinv,delzinv,delvolinv;
|
||||
double shift,shiftone;
|
||||
|
||||
int nxlo_in,nylo_in,nzlo_in,nxhi_in,nyhi_in,nzhi_in;
|
||||
int nxlo_out,nylo_out,nzlo_out,nxhi_out,nyhi_out,nzhi_out;
|
||||
int nxlo_ghost,nxhi_ghost,nylo_ghost,nyhi_ghost,nzlo_ghost,nzhi_ghost;
|
||||
int nxlo_fft,nylo_fft,nzlo_fft,nxhi_fft,nyhi_fft,nzhi_fft;
|
||||
int nlower,nupper;
|
||||
int ngrid,nfft,nbuf,nfft_both;
|
||||
|
||||
grdtyp ***density_brick;
|
||||
grdtyp ***vd_brick;
|
||||
double *greensfn;
|
||||
double **vg;
|
||||
double *fkx,*fky,*fkz;
|
||||
double *density_fft;
|
||||
double *work1,*work2;
|
||||
double *buf1,*buf2;
|
||||
|
||||
double *gf_b;
|
||||
double **rho1d,**rho_coeff;
|
||||
|
||||
class FFT3d *fft1,*fft2;
|
||||
class Remap *remap;
|
||||
|
||||
int nmax;
|
||||
|
||||
int triclinic; // domain settings, orthog or triclinic
|
||||
double *boxlo;
|
||||
// TIP4P settings
|
||||
int typeH,typeO; // atom types of TIP4P water H and O atoms
|
||||
double qdist; // distance from O site to negative charge
|
||||
double alpha; // geometric factor
|
||||
|
||||
void set_grid();
|
||||
void allocate();
|
||||
void deallocate();
|
||||
int factorable(int);
|
||||
double rms(double, double, bigint, double, double **);
|
||||
double diffpr(double, double, double, double, double **);
|
||||
void compute_gf_denom();
|
||||
double gf_denom(double, double, double);
|
||||
void brick2fft();
|
||||
void fillbrick();
|
||||
void poisson(int, int);
|
||||
void procs2grid2d(int,int,int,int *, int*);
|
||||
void compute_rho1d(double, double, double);
|
||||
void compute_rho_coeff();
|
||||
void slabcorr(int);
|
||||
|
||||
double poisson_time;
|
||||
|
||||
grdtyp ***create_3d_offset(int, int, int, int, int, int, const char *,
|
||||
grdtyp *, int);
|
||||
void destroy_3d_offset(grdtyp ***, int, int);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,217 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "mpi.h"
|
||||
#include "string.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "math.h"
|
||||
#include "pppm_gpu_double.h"
|
||||
#include "lmptype.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "neighbor.h"
|
||||
#include "force.h"
|
||||
#include "pair.h"
|
||||
#include "bond.h"
|
||||
#include "angle.h"
|
||||
#include "domain.h"
|
||||
#include "fft3d_wrap.h"
|
||||
#include "remap_wrap.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "gpu_extra.h"
|
||||
|
||||
#define grdtyp double
|
||||
|
||||
// External functions from cuda library for atom decomposition
|
||||
|
||||
grdtyp* pppm_gpu_init_d(const int nlocal, const int nall, FILE *screen,
|
||||
const int order, const int nxlo_out,
|
||||
const int nylo_out, const int nzlo_out,
|
||||
const int nxhi_out, const int nyhi_out,
|
||||
const int nzhi_out, double **rho_coeff,
|
||||
grdtyp **_vd_brick, const double slab_volfactor,
|
||||
const int nx_pppm, const int ny_pppm,
|
||||
const int nz_pppm, int &success);
|
||||
void pppm_gpu_clear_d(const double poisson_time);
|
||||
int pppm_gpu_spread_d(const int ago, const int nlocal, const int nall,
|
||||
double **host_x, int *host_type, bool &success,
|
||||
double *host_q, double *boxlo, const double delxinv,
|
||||
const double delyinv, const double delzinv);
|
||||
void pppm_gpu_interp_d(const grdtyp qqrd2e_scale);
|
||||
double pppm_gpu_bytes_d();
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MAXORDER 7
|
||||
#define OFFSET 16384
|
||||
#define SMALL 0.00001
|
||||
#define LARGE 10000.0
|
||||
#define EPS_HOC 1.0e-7
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PPPMGPUDouble::PPPMGPUDouble(LAMMPS *lmp, int narg, char **arg) :
|
||||
PPPMGPU<grdtyp>(lmp, narg, arg)
|
||||
{
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
free all memory
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
PPPMGPUDouble::~PPPMGPUDouble()
|
||||
{
|
||||
pppm_gpu_clear_d(poisson_time);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
called once before run
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMGPUDouble::init()
|
||||
{
|
||||
base_init();
|
||||
|
||||
if (order>8)
|
||||
error->all("Cannot use order greater than 8 with pppm/gpu.");
|
||||
pppm_gpu_clear_d(poisson_time);
|
||||
|
||||
int success;
|
||||
grdtyp *data, *h_brick;
|
||||
h_brick = pppm_gpu_init_d(atom->nlocal, atom->nlocal+atom->nghost, screen,
|
||||
order, nxlo_out, nylo_out, nzlo_out, nxhi_out,
|
||||
nyhi_out, nzhi_out, rho_coeff, &data,
|
||||
slab_volfactor, nx_pppm, ny_pppm, nz_pppm,
|
||||
success);
|
||||
|
||||
GPU_EXTRA::check_flag(success,error,world);
|
||||
|
||||
density_brick =
|
||||
create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
|
||||
nxlo_out,nxhi_out,"pppm:density_brick",h_brick,1);
|
||||
vd_brick =
|
||||
create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
|
||||
nxlo_out,nxhi_out,"pppm:vd_brick",data,4);
|
||||
|
||||
poisson_time=0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute the PPPMGPU long-range force, energy, virial
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMGPUDouble::compute(int eflag, int vflag)
|
||||
{
|
||||
bool success = true;
|
||||
int flag=pppm_gpu_spread_d(neighbor->ago, atom->nlocal, atom->nlocal +
|
||||
atom->nghost, atom->x, atom->type, success,
|
||||
atom->q, domain->boxlo, delxinv, delyinv,
|
||||
delzinv);
|
||||
if (!success)
|
||||
error->one("Out of memory on GPGPU");
|
||||
if (flag != 0)
|
||||
error->one("Out of range atoms - cannot compute PPPM");
|
||||
|
||||
int i;
|
||||
|
||||
// convert atoms from box to lamda coords
|
||||
|
||||
if (triclinic == 0) boxlo = domain->boxlo;
|
||||
else {
|
||||
boxlo = domain->boxlo_lamda;
|
||||
domain->x2lamda(atom->nlocal);
|
||||
}
|
||||
|
||||
energy = 0.0;
|
||||
if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0;
|
||||
|
||||
double t3=MPI_Wtime();
|
||||
|
||||
// all procs communicate density values from their ghost cells
|
||||
// to fully sum contribution in their 3d bricks
|
||||
// remap from 3d decomposition to FFT decomposition
|
||||
|
||||
brick2fft();
|
||||
|
||||
// compute potential gradient on my FFT grid and
|
||||
// portion of e_long on this proc's FFT grid
|
||||
// return gradients (electric fields) in 3d brick decomposition
|
||||
|
||||
poisson(eflag,vflag);
|
||||
|
||||
// all procs communicate E-field values to fill ghost cells
|
||||
// surrounding their 3d bricks
|
||||
|
||||
fillbrick();
|
||||
|
||||
poisson_time+=MPI_Wtime()-t3;
|
||||
|
||||
// calculate the force on my particles
|
||||
|
||||
grdtyp qqrd2e_scale=qqrd2e*scale;
|
||||
pppm_gpu_interp_d(qqrd2e_scale);
|
||||
|
||||
// sum energy across procs and add in volume-dependent term
|
||||
|
||||
if (eflag) {
|
||||
double energy_all;
|
||||
MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
energy = energy_all;
|
||||
|
||||
energy *= 0.5*volume;
|
||||
energy -= g_ewald*qsqsum/1.772453851 +
|
||||
0.5*PI*qsum*qsum / (g_ewald*g_ewald*volume);
|
||||
energy *= qqrd2e*scale;
|
||||
}
|
||||
|
||||
// sum virial across procs
|
||||
|
||||
if (vflag) {
|
||||
double virial_all[6];
|
||||
MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
|
||||
for (i = 0; i < 6; i++) virial[i] = 0.5*qqrd2e*scale*volume*virial_all[i];
|
||||
}
|
||||
|
||||
// 2d slab correction
|
||||
|
||||
if (slabflag) slabcorr(eflag);
|
||||
|
||||
// convert atoms back from lamda to box coords
|
||||
|
||||
if (triclinic) domain->lamda2x(atom->nlocal);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
memory usage of local arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double PPPMGPUDouble::memory_usage()
|
||||
{
|
||||
double bytes = nmax*3 * sizeof(double);
|
||||
int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
|
||||
(nzhi_out-nzlo_out+1);
|
||||
bytes += 4 * nbrick * sizeof(grdtyp);
|
||||
bytes += 6 * nfft_both * sizeof(double);
|
||||
bytes += nfft_both*6 * sizeof(double);
|
||||
bytes += 2 * nbuf * sizeof(double);
|
||||
return bytes + pppm_gpu_bytes_d();
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef KSPACE_CLASS
|
||||
|
||||
KSpaceStyle(pppm/gpu/double,PPPMGPUDouble)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PPPM_GPU_DOUBLE_H
|
||||
#define LMP_PPPM_GPU_DOUBLE_H
|
||||
|
||||
#include "pppm_gpu.h"
|
||||
#include "lmptype.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PPPMGPUDouble : public PPPMGPU<double> {
|
||||
public:
|
||||
PPPMGPUDouble(class LAMMPS *, int, char **);
|
||||
~PPPMGPUDouble();
|
||||
void init();
|
||||
void compute(int, int);
|
||||
double memory_usage();
|
||||
|
||||
protected:
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,216 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (ORNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "mpi.h"
|
||||
#include "string.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "math.h"
|
||||
#include "pppm_gpu_single.h"
|
||||
#include "lmptype.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "neighbor.h"
|
||||
#include "force.h"
|
||||
#include "pair.h"
|
||||
#include "bond.h"
|
||||
#include "angle.h"
|
||||
#include "domain.h"
|
||||
#include "fft3d_wrap.h"
|
||||
#include "remap_wrap.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "gpu_extra.h"
|
||||
|
||||
#define grdtyp float
|
||||
|
||||
// External functions from cuda library for atom decomposition
|
||||
|
||||
grdtyp* pppm_gpu_init_f(const int nlocal, const int nall, FILE *screen,
|
||||
const int order, const int nxlo_out,
|
||||
const int nylo_out, const int nzlo_out,
|
||||
const int nxhi_out, const int nyhi_out,
|
||||
const int nzhi_out, double **rho_coeff,
|
||||
grdtyp **_vd_brick, const double slab_volfactor,
|
||||
const int nx_pppm, const int ny_pppm,
|
||||
const int nz_pppm, int &success);
|
||||
void pppm_gpu_clear_f(const double poisson_time);
|
||||
int pppm_gpu_spread_f(const int ago, const int nlocal, const int nall,
|
||||
double **host_x, int *host_type, bool &success,
|
||||
double *host_q, double *boxlo, const double delxinv,
|
||||
const double delyinv, const double delzinv);
|
||||
void pppm_gpu_interp_f(const grdtyp qqrd2e_scale);
|
||||
double pppm_gpu_bytes_f();
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MAXORDER 7
|
||||
#define OFFSET 16384
|
||||
#define SMALL 0.00001
|
||||
#define LARGE 10000.0
|
||||
#define EPS_HOC 1.0e-7
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PPPMGPUSingle::PPPMGPUSingle(LAMMPS *lmp, int narg, char **arg) :
|
||||
PPPMGPU<grdtyp>(lmp, narg, arg)
|
||||
{
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
free all memory
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
PPPMGPUSingle::~PPPMGPUSingle()
|
||||
{
|
||||
pppm_gpu_clear_f(poisson_time);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
called once before run
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMGPUSingle::init()
|
||||
{
|
||||
base_init();
|
||||
|
||||
if (order>8)
|
||||
error->all("Cannot use order greater than 8 with pppm/gpu.");
|
||||
pppm_gpu_clear_f(poisson_time);
|
||||
|
||||
int success;
|
||||
grdtyp *data, *h_brick;
|
||||
h_brick = pppm_gpu_init_f(atom->nlocal, atom->nlocal+atom->nghost, screen,
|
||||
order, nxlo_out, nylo_out, nzlo_out, nxhi_out,
|
||||
nyhi_out, nzhi_out, rho_coeff, &data,
|
||||
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,success);
|
||||
|
||||
GPU_EXTRA::check_flag(success,error,world);
|
||||
|
||||
density_brick =
|
||||
create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
|
||||
nxlo_out,nxhi_out,"pppm:density_brick",h_brick,1);
|
||||
vd_brick =
|
||||
create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
|
||||
nxlo_out,nxhi_out,"pppm:vd_brick",data,4);
|
||||
|
||||
poisson_time=0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute the PPPMGPU long-range force, energy, virial
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMGPUSingle::compute(int eflag, int vflag)
|
||||
{
|
||||
bool success = true;
|
||||
int flag=pppm_gpu_spread_f(neighbor->ago, atom->nlocal, atom->nlocal +
|
||||
atom->nghost, atom->x, atom->type, success,
|
||||
atom->q, domain->boxlo, delxinv, delyinv,
|
||||
delzinv);
|
||||
if (!success)
|
||||
error->one("Out of memory on GPGPU");
|
||||
if (flag != 0)
|
||||
error->one("Out of range atoms - cannot compute PPPM");
|
||||
|
||||
int i;
|
||||
|
||||
// convert atoms from box to lamda coords
|
||||
|
||||
if (triclinic == 0) boxlo = domain->boxlo;
|
||||
else {
|
||||
boxlo = domain->boxlo_lamda;
|
||||
domain->x2lamda(atom->nlocal);
|
||||
}
|
||||
|
||||
energy = 0.0;
|
||||
if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0;
|
||||
|
||||
double t3=MPI_Wtime();
|
||||
|
||||
// all procs communicate density values from their ghost cells
|
||||
// to fully sum contribution in their 3d bricks
|
||||
// remap from 3d decomposition to FFT decomposition
|
||||
|
||||
brick2fft();
|
||||
|
||||
// compute potential gradient on my FFT grid and
|
||||
// portion of e_long on this proc's FFT grid
|
||||
// return gradients (electric fields) in 3d brick decomposition
|
||||
|
||||
poisson(eflag,vflag);
|
||||
|
||||
// all procs communicate E-field values to fill ghost cells
|
||||
// surrounding their 3d bricks
|
||||
|
||||
fillbrick();
|
||||
|
||||
poisson_time+=MPI_Wtime()-t3;
|
||||
|
||||
// calculate the force on my particles
|
||||
|
||||
grdtyp qqrd2e_scale=qqrd2e*scale;
|
||||
pppm_gpu_interp_f(qqrd2e_scale);
|
||||
|
||||
// sum energy across procs and add in volume-dependent term
|
||||
|
||||
if (eflag) {
|
||||
double energy_all;
|
||||
MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
energy = energy_all;
|
||||
|
||||
energy *= 0.5*volume;
|
||||
energy -= g_ewald*qsqsum/1.772453851 +
|
||||
0.5*PI*qsum*qsum / (g_ewald*g_ewald*volume);
|
||||
energy *= qqrd2e*scale;
|
||||
}
|
||||
|
||||
// sum virial across procs
|
||||
|
||||
if (vflag) {
|
||||
double virial_all[6];
|
||||
MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
|
||||
for (i = 0; i < 6; i++) virial[i] = 0.5*qqrd2e*scale*volume*virial_all[i];
|
||||
}
|
||||
|
||||
// 2d slab correction
|
||||
|
||||
if (slabflag) slabcorr(eflag);
|
||||
|
||||
// convert atoms back from lamda to box coords
|
||||
|
||||
if (triclinic) domain->lamda2x(atom->nlocal);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
memory usage of local arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double PPPMGPUSingle::memory_usage()
|
||||
{
|
||||
double bytes = nmax*3 * sizeof(double);
|
||||
int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
|
||||
(nzhi_out-nzlo_out+1);
|
||||
bytes += 4 * nbrick * sizeof(grdtyp);
|
||||
bytes += 6 * nfft_both * sizeof(double);
|
||||
bytes += nfft_both*6 * sizeof(double);
|
||||
bytes += 2 * nbuf * sizeof(double);
|
||||
return bytes + pppm_gpu_bytes_f();
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef KSPACE_CLASS
|
||||
|
||||
KSpaceStyle(pppm/gpu/single,PPPMGPUSingle)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PPPM_GPU_SINGLE_H
|
||||
#define LMP_PPPM_GPU_SINGLE_H
|
||||
|
||||
#include "pppm_gpu.h"
|
||||
#include "lmptype.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PPPMGPUSingle : public PPPMGPU<float> {
|
||||
public:
|
||||
PPPMGPUSingle(class LAMMPS *, int, char **);
|
||||
~PPPMGPUSingle();
|
||||
void init();
|
||||
void compute(int, int);
|
||||
double memory_usage();
|
||||
|
||||
protected:
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
Loading…
Reference in New Issue