mirror of https://github.com/lammps/lammps.git
Changes from Mike Brown.
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@5278 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
5a82c99485
commit
4e52d8d927
|
@ -1,6 +1,7 @@
|
|||
# Install/unInstall package files in LAMMPS
|
||||
# edit Makefile.package to include/exclude GPU library
|
||||
# do not copy gayberne files if non-GPU version does not exist
|
||||
# do not copy charmm files if non-GPU version does not exist
|
||||
|
||||
if (test $1 = 1) then
|
||||
|
||||
|
@ -12,14 +13,36 @@ if (test $1 = 1) then
|
|||
sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(gpu_SYSPATH) |' ../Makefile.package
|
||||
sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(gpu_SYSLIB) |' ../Makefile.package
|
||||
fi
|
||||
|
||||
|
||||
if (test -e ../pair_gayberne.cpp) then
|
||||
cp pair_gayberne_gpu.cpp ..
|
||||
cp pair_gayberne_gpu.h ..
|
||||
fi
|
||||
|
||||
if (test -e ../pair_lj_cut_coul_long.cpp) then
|
||||
cp pair_lj_cut_coul_long_gpu.cpp ..
|
||||
cp pair_lj_cut_coul_long_gpu.h ..
|
||||
fi
|
||||
|
||||
if (test -e ../pair_cg_cmm.cpp) then
|
||||
cp pair_cg_cmm_gpu.cpp ..
|
||||
cp pair_cg_cmm_gpu.h ..
|
||||
fi
|
||||
|
||||
if (test -e ../pair_cg_cmm_coul_long.cpp) then
|
||||
cp pair_cg_cmm_coul_long_gpu.cpp ..
|
||||
cp pair_cg_cmm_coul_long_gpu.h ..
|
||||
fi
|
||||
|
||||
cp pair_lj_cut_gpu.cpp ..
|
||||
cp pair_lj96_cut_gpu.cpp ..
|
||||
cp pair_lj_cut_coul_cut_gpu.cpp ..
|
||||
cp pair_lj_cut_gpu.h ..
|
||||
cp pair_lj96_cut_gpu.h ..
|
||||
cp pair_lj_cut_coul_cut_gpu.h ..
|
||||
|
||||
cp fix_gpu.cpp ..
|
||||
cp fix_gpu.h ..
|
||||
|
||||
elif (test $1 = 0) then
|
||||
|
||||
|
@ -27,11 +50,23 @@ elif (test $1 = 0) then
|
|||
sed -i -e 's/[^ \t]*gpu //' ../Makefile.package
|
||||
sed -i -e 's/[^ \t]*gpu_[^ \t]*) //' ../Makefile.package
|
||||
fi
|
||||
|
||||
|
||||
rm ../pair_gayberne_gpu.cpp
|
||||
rm ../pair_lj_cut_gpu.cpp
|
||||
rm ../pair_lj96_cut_gpu.cpp
|
||||
rm ../pair_lj_cut_coul_cut_gpu.cpp
|
||||
rm ../pair_lj_cut_coul_long_gpu.cpp
|
||||
rm ../pair_cg_cmm_gpu.cpp
|
||||
rm ../pair_cg_cmm_coul_long_gpu.cpp
|
||||
rm ../fix_gpu.cpp
|
||||
|
||||
rm ../pair_gayberne_gpu.h
|
||||
rm ../pair_lj_cut_gpu.h
|
||||
|
||||
rm ../pair_lj96_cut_gpu.h
|
||||
rm ../pair_lj_cut_coul_cut_gpu.h
|
||||
rm ../pair_lj_cut_coul_long_gpu.h
|
||||
rm ../pair_cg_cmm_gpu.h
|
||||
rm ../pair_cg_cmm_coul_long_gpu.h
|
||||
rm ../fix_gpu.h
|
||||
|
||||
fi
|
||||
|
|
|
@ -3,10 +3,40 @@
|
|||
# do not copy gayberne files if non-GPU version does not exist
|
||||
|
||||
for file in *.cpp *.h; do
|
||||
if (test $file == pair_gayberne_gpu.cpp -a ! -e ../pair_gayberne.cpp) then
|
||||
if (test $file = pair_gayberne_gpu.cpp -a ! -e ../pair_gayberne.cpp) then
|
||||
continue
|
||||
fi
|
||||
if (test $file == pair_gayberne_gpu.h -a ! -e ../pair_gayberne.cpp) then
|
||||
if (test $file = pair_gayberne_gpu.h -a ! -e ../pair_gayberne.cpp) then
|
||||
continue
|
||||
fi
|
||||
if (test $file = pair_lj_cut_coul_long_gpu.cpp -a ! -e ../pair_lj_cut_coul_long.cpp) then
|
||||
continue
|
||||
fi
|
||||
if (test $file = pair_lj_cut_coul_long_gpu.h -a ! -e ../pair_lj_cut_coul_long.cpp) then
|
||||
continue
|
||||
fi
|
||||
if (test $file = pair_cg_cmm_gpu.cpp -a ! -e ../pair_cg_cmm.cpp) then
|
||||
continue
|
||||
fi
|
||||
if (test $file = pair_cg_cmm_gpu.h -a ! -e ../pair_cg_cmm.cpp) then
|
||||
continue
|
||||
fi
|
||||
if (test $file = pair_cg_cmm_coul_long_gpu.cpp -a ! -e ../pair_cg_cmm_coul_long.cpp) then
|
||||
continue
|
||||
fi
|
||||
if (test $file = pair_cg_cmm_coul_long_gpu.h -a ! -e ../pair_cg_cmm_coul_long.cpp) then
|
||||
continue
|
||||
fi
|
||||
if (test $file = pair_cg_cmm_coul_msm.cpp -a ! -e ../pair_cg_cmm.cpp) then
|
||||
continue
|
||||
fi
|
||||
if (test $file = pair_cg_cmm_coul_msm.h -a ! -e ../pair_cg_cmm.cpp) then
|
||||
continue
|
||||
fi
|
||||
if (test $file = pair_cg_cmm_coul_msm_gpu.cpp -a ! -e ../pair_cg_cmm.cpp) then
|
||||
continue
|
||||
fi
|
||||
if (test $file = pair_cg_cmm_coul_msm_gpu.h -a ! -e ../pair_cg_cmm.cpp) then
|
||||
continue
|
||||
fi
|
||||
|
||||
|
|
|
@ -0,0 +1,148 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "string.h"
|
||||
#include "stdlib.h"
|
||||
#include "fix_gpu.h"
|
||||
#include "atom.h"
|
||||
#include "force.h"
|
||||
#include "pair.h"
|
||||
#include "respa.h"
|
||||
#include "input.h"
|
||||
#include "error.h"
|
||||
#include "timer.h"
|
||||
#include "modify.h"
|
||||
#include "domain.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{GPU_FORCE, GPU_NEIGH};
|
||||
|
||||
extern bool lmp_init_device(const int first_gpu, const int last_gpu,
|
||||
const int gpu_mode, const double particle_split);
|
||||
extern void lmp_clear_device();
|
||||
extern double lmp_gpu_forces(double **f, double **tor, double *eatom,
|
||||
double **vatom, double *virial, double &ecoul);
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
|
||||
Fix(lmp, narg, arg)
|
||||
{
|
||||
if (narg < 7) error->all("Illegal fix gpu command");
|
||||
|
||||
if (strcmp(arg[1],"all") != 0)
|
||||
error->all("Illegal fix gpu command");
|
||||
|
||||
int gpu_mode, first_gpu, last_gpu;
|
||||
double particle_split;
|
||||
|
||||
if (strcmp(arg[3],"force") == 0)
|
||||
gpu_mode = GPU_FORCE;
|
||||
else if (strcmp(arg[3],"force/neigh") == 0) {
|
||||
gpu_mode = GPU_NEIGH;
|
||||
if (domain->triclinic)
|
||||
error->all("Cannot use force/neigh with triclinic box.");
|
||||
} else
|
||||
error->all("Illegal fix gpu command.");
|
||||
|
||||
first_gpu = atoi(arg[4]);
|
||||
last_gpu = atoi(arg[5]);
|
||||
|
||||
particle_split = force->numeric(arg[6]);
|
||||
if (particle_split==0 || particle_split>1)
|
||||
error->all("Illegal fix gpu command.");
|
||||
|
||||
if (!lmp_init_device(first_gpu,last_gpu,gpu_mode,particle_split))
|
||||
error->one("Could not find or initialize a specified accelerator device.");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixGPU::~FixGPU()
|
||||
{
|
||||
lmp_clear_device();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int FixGPU::setmask()
|
||||
{
|
||||
int mask = 0;
|
||||
mask |= POST_FORCE;
|
||||
mask |= MIN_POST_FORCE;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixGPU::init()
|
||||
{
|
||||
// Can only have 1 gpu fix that must be the first fix for a run
|
||||
if ((void*)modify->fix[0] != (void*)this)
|
||||
error->all("GPU is not the first fix for this run.");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixGPU::setup(int vflag)
|
||||
{
|
||||
post_force(vflag);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixGPU::min_setup(int vflag)
|
||||
{
|
||||
post_force(vflag);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixGPU::post_force(int vflag)
|
||||
{
|
||||
timer->stamp();
|
||||
double lvirial[6];
|
||||
for (int i = 0; i < 6; i++) lvirial[i] = 0.0;
|
||||
double my_eng = lmp_gpu_forces(atom->f, atom->torque, force->pair->eatom,
|
||||
force->pair->vatom, lvirial,
|
||||
force->pair->eng_coul);
|
||||
|
||||
force->pair->eng_vdwl += my_eng;
|
||||
force->pair->virial[0] += lvirial[0];
|
||||
force->pair->virial[1] += lvirial[1];
|
||||
force->pair->virial[2] += lvirial[2];
|
||||
force->pair->virial[3] += lvirial[3];
|
||||
force->pair->virial[4] += lvirial[4];
|
||||
force->pair->virial[5] += lvirial[5];
|
||||
|
||||
if (force->pair->vflag_fdotr) force->pair->virial_compute();
|
||||
timer->stamp(TIME_PAIR);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixGPU::min_post_force(int vflag)
|
||||
{
|
||||
post_force(vflag);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double FixGPU::memory_usage()
|
||||
{
|
||||
double bytes = 0.0;
|
||||
// Memory usage currently returned by pair routine
|
||||
return bytes;
|
||||
}
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(gpu,FixGPU)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_GPU_H
|
||||
#define LMP_FIX_GPU_H
|
||||
|
||||
#include "fix.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixGPU : public Fix {
|
||||
public:
|
||||
FixGPU(class LAMMPS *, int, char **);
|
||||
~FixGPU();
|
||||
int setmask();
|
||||
void init();
|
||||
void setup(int);
|
||||
void min_setup(int);
|
||||
void post_force(int);
|
||||
void min_post_force(int);
|
||||
double memory_usage();
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,499 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Mike Brown (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "pair_cg_cmm_coul_long_gpu.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "integrate.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "neigh_request.h"
|
||||
#include "universe.h"
|
||||
#include "update.h"
|
||||
#include "domain.h"
|
||||
#include "string.h"
|
||||
#include "kspace.h"
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#define EWALD_F 1.12837917
|
||||
#define EWALD_P 0.3275911
|
||||
#define A1 0.254829592
|
||||
#define A2 -0.284496736
|
||||
#define A3 1.421413741
|
||||
#define A4 -1.453152027
|
||||
#define A5 1.061405429
|
||||
|
||||
// External functions from cuda library for atom decomposition
|
||||
|
||||
bool cmml_gpu_init(const int ntypes, double **cutsq, int **cg_type,
|
||||
double **host_lj1, double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **offset, double *special_lj,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size, int &gpu_mode,
|
||||
FILE *screen, double **host_cut_ljsq, double host_cut_coulsq,
|
||||
double *host_special_coul, const double qqrd2e,
|
||||
const double g_ewald);
|
||||
void cmml_gpu_clear();
|
||||
int * cmml_gpu_compute_n(const int timestep, const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *boxlo, double *boxhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success, double *host_q);
|
||||
void cmml_gpu_compute(const int timestep, const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
int *ilist, int *numj, int **firstneigh,
|
||||
const bool eflag, const bool vflag, const bool eatom,
|
||||
const bool vatom, int &host_start, const double cpu_time,
|
||||
bool &success, double *host_q);
|
||||
double cmml_gpu_bytes();
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairCGCMMCoulLongGPU::PairCGCMMCoulLongGPU(LAMMPS *lmp) : PairCGCMMCoulLong(lmp), gpu_mode(GPU_PAIR)
|
||||
{
|
||||
respa_enable = 0;
|
||||
cpu_time = 0.0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
free all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
PairCGCMMCoulLongGPU::~PairCGCMMCoulLongGPU()
|
||||
{
|
||||
cmml_gpu_clear();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCoulLongGPU::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
int nall = atom->nlocal + atom->nghost;
|
||||
int inum, host_start;
|
||||
|
||||
bool success = true;
|
||||
|
||||
if (gpu_mode == GPU_NEIGH) {
|
||||
inum = atom->nlocal;
|
||||
gpulist = cmml_gpu_compute_n(update->ntimestep, neighbor->ago, inum, nall,
|
||||
atom->x, atom->type, domain->sublo,
|
||||
domain->subhi, atom->tag, atom->nspecial,
|
||||
atom->special, eflag, vflag, eflag_atom,
|
||||
vflag_atom, host_start, cpu_time, success,
|
||||
atom->q);
|
||||
} else {
|
||||
inum = list->inum;
|
||||
cmml_gpu_compute(update->ntimestep, neighbor->ago, inum, nall, atom->x,
|
||||
atom->type, list->ilist, list->numneigh, list->firstneigh,
|
||||
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
|
||||
success, atom->q);
|
||||
}
|
||||
if (!success)
|
||||
error->one("Out of memory on GPGPU");
|
||||
|
||||
if (host_start<inum) {
|
||||
cpu_time = MPI_Wtime();
|
||||
if (gpu_mode == GPU_NEIGH)
|
||||
cpu_compute(gpulist, host_start, eflag, vflag);
|
||||
else
|
||||
cpu_compute(host_start, eflag, vflag);
|
||||
cpu_time = MPI_Wtime() - cpu_time;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCoulLongGPU::init_style()
|
||||
{
|
||||
cut_respa = NULL;
|
||||
|
||||
if (!atom->q_flag)
|
||||
error->all("Pair style cg/cmm/coul/long requires atom attribute q");
|
||||
if (force->pair_match("gpu",0) == NULL)
|
||||
error->all("Cannot use pair hybrid with multiple GPU pair styles");
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double maxcut = -1.0;
|
||||
double cut;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
cut = init_one(i,j);
|
||||
cut *= cut;
|
||||
if (cut > maxcut)
|
||||
maxcut = cut;
|
||||
cutsq[i][j] = cutsq[j][i] = cut;
|
||||
} else
|
||||
cutsq[i][j] = cutsq[j][i] = 0.0;
|
||||
}
|
||||
}
|
||||
double cell_size = sqrt(maxcut) + neighbor->skin;
|
||||
|
||||
// insure use of KSpace long-range solver, set g_ewald
|
||||
|
||||
if (force->kspace == NULL)
|
||||
error->all("Pair style is incompatible with KSpace style");
|
||||
g_ewald = force->kspace->g_ewald;
|
||||
|
||||
// setup force tables
|
||||
|
||||
if (ncoultablebits) init_tables();
|
||||
|
||||
int maxspecial=0;
|
||||
if (atom->molecular)
|
||||
maxspecial=atom->maxspecial;
|
||||
bool init_ok = cmml_gpu_init(atom->ntypes+1, cutsq, cg_type, lj1, lj2, lj3,
|
||||
lj4, offset, force->special_lj, atom->nlocal,
|
||||
atom->nlocal+atom->nghost, 300, maxspecial,
|
||||
cell_size, gpu_mode, screen, cut_ljsq,
|
||||
cut_coulsq_global, force->special_coul,
|
||||
force->qqrd2e, g_ewald);
|
||||
if (!init_ok)
|
||||
error->one("Insufficient memory on accelerator (or no fix gpu).\n");
|
||||
|
||||
if (force->newton_pair)
|
||||
error->all("Cannot use newton pair with GPU cg/cmm pair style");
|
||||
|
||||
if (gpu_mode != GPU_NEIGH) {
|
||||
int irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double PairCGCMMCoulLongGPU::memory_usage()
|
||||
{
|
||||
double bytes = Pair::memory_usage();
|
||||
return bytes + cmml_gpu_bytes();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCoulLongGPU::cpu_compute(int start, int eflag, int vflag)
|
||||
{
|
||||
int i,j,ii,jj,inum,jnum,itype,jtype,itable;
|
||||
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz;
|
||||
double fraction,table;
|
||||
double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
|
||||
double grij,expm2,prefactor,t,erfc;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
double rsq;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
double *q = atom->q;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
double *special_coul = force->special_coul;
|
||||
double *special_lj = force->special_lj;
|
||||
double qqrd2e = force->qqrd2e;
|
||||
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (ii = start; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
qtmp = q[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
||||
if (j < nall) factor_coul = factor_lj = 1.0;
|
||||
else {
|
||||
factor_coul = special_coul[j/nall];
|
||||
factor_lj = special_lj[j/nall];
|
||||
j %= nall;
|
||||
}
|
||||
|
||||
const double delx = xtmp - x[j][0];
|
||||
const double dely = ytmp - x[j][1];
|
||||
const double delz = ztmp - x[j][2];
|
||||
const double rsq = delx*delx + dely*dely + delz*delz;
|
||||
const int jtype = type[j];
|
||||
|
||||
double evdwl = 0.0;
|
||||
double ecoul = 0.0;
|
||||
double fpair = 0.0;
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
const double r2inv = 1.0/rsq;
|
||||
const int cgt=cg_type[itype][jtype];
|
||||
|
||||
double forcelj = 0.0;
|
||||
double forcecoul = 0.0;
|
||||
|
||||
if (rsq < cut_ljsq[itype][jtype]) {
|
||||
forcelj=factor_lj;
|
||||
if (eflag) evdwl=factor_lj;
|
||||
|
||||
if (cgt == CG_LJ12_4) {
|
||||
const double r4inv=r2inv*r2inv;
|
||||
forcelj *= r4inv*(lj1[itype][jtype]*r4inv*r4inv
|
||||
- lj2[itype][jtype]);
|
||||
if (eflag) {
|
||||
evdwl *= r4inv*(lj3[itype][jtype]*r4inv*r4inv
|
||||
- lj4[itype][jtype]) - offset[itype][jtype];
|
||||
}
|
||||
} else if (cgt == CG_LJ9_6) {
|
||||
const double r3inv = r2inv*sqrt(r2inv);
|
||||
const double r6inv = r3inv*r3inv;
|
||||
forcelj *= r6inv*(lj1[itype][jtype]*r3inv
|
||||
- lj2[itype][jtype]);
|
||||
if (eflag) {
|
||||
evdwl *= r6inv*(lj3[itype][jtype]*r3inv
|
||||
- lj4[itype][jtype]) - offset[itype][jtype];
|
||||
}
|
||||
} else {
|
||||
const double r6inv = r2inv*r2inv*r2inv;
|
||||
forcelj *= r6inv*(lj1[itype][jtype]*r6inv
|
||||
- lj2[itype][jtype]);
|
||||
if (eflag) {
|
||||
evdwl *= r6inv*(lj3[itype][jtype]*r6inv
|
||||
- lj4[itype][jtype]) - offset[itype][jtype];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rsq < cut_coulsq_global) {
|
||||
if (!ncoultablebits || rsq <= tabinnersq) {
|
||||
const double r = sqrt(rsq);
|
||||
const double grij = g_ewald * r;
|
||||
const double expm2 = exp(-grij*grij);
|
||||
const double t = 1.0 / (1.0 + EWALD_P*grij);
|
||||
const double erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
const double prefactor = qqrd2e * qtmp*q[j]/r;
|
||||
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
|
||||
if (eflag) ecoul = prefactor*erfc;
|
||||
if (factor_coul < 1.0) {
|
||||
forcecoul -= (1.0-factor_coul)*prefactor;
|
||||
if (eflag) ecoul -= (1.0-factor_coul)*prefactor;
|
||||
}
|
||||
} else {
|
||||
union_int_float_t rsq_lookup;
|
||||
rsq_lookup.f = rsq;
|
||||
int itable = rsq_lookup.i & ncoulmask;
|
||||
itable >>= ncoulshiftbits;
|
||||
const double fraction = (rsq_lookup.f - rtable[itable]) *
|
||||
drtable[itable];
|
||||
const double table = ftable[itable] + fraction*dftable[itable];
|
||||
forcecoul = qtmp*q[j] * table;
|
||||
if (eflag) {
|
||||
const double table2 = etable[itable] + fraction*detable[itable];
|
||||
ecoul = qtmp*q[j] * table2;
|
||||
}
|
||||
if (factor_coul < 1.0) {
|
||||
const double table2 = ctable[itable] + fraction*dctable[itable];
|
||||
const double prefactor = qtmp*q[j] * table2;
|
||||
forcecoul -= (1.0-factor_coul)*prefactor;
|
||||
if (eflag) ecoul -= (1.0-factor_coul)*prefactor;
|
||||
}
|
||||
}
|
||||
}
|
||||
fpair = (forcecoul + forcelj) * r2inv;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
|
||||
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMCoulLongGPU::cpu_compute(int *nbors, int start, int eflag,
|
||||
int vflag)
|
||||
{
|
||||
int i,j,jnum,itype,jtype,itable;
|
||||
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz;
|
||||
double fraction,table;
|
||||
double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
|
||||
double grij,expm2,prefactor,t,erfc;
|
||||
double rsq;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
double *q = atom->q;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
int stride = nlocal-start;
|
||||
double *special_coul = force->special_coul;
|
||||
double *special_lj = force->special_lj;
|
||||
double qqrd2e = force->qqrd2e;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (i = start; i < nlocal; i++) {
|
||||
qtmp = q[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
int *nbor = nbors + i - start;
|
||||
jnum = *nbor;
|
||||
nbor += stride;
|
||||
int *nbor_end = nbor + stride * jnum;
|
||||
|
||||
for (; nbor<nbor_end; nbor+=stride) {
|
||||
j = *nbor;
|
||||
|
||||
if (j < nall) factor_coul = factor_lj = 1.0;
|
||||
else {
|
||||
factor_coul = special_coul[j/nall];
|
||||
factor_lj = special_lj[j/nall];
|
||||
j %= nall;
|
||||
}
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
double evdwl = 0.0;
|
||||
double ecoul = 0.0;
|
||||
double fpair = 0.0;
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
const double r2inv = 1.0/rsq;
|
||||
const int cgt=cg_type[itype][jtype];
|
||||
|
||||
double forcelj = 0.0;
|
||||
double forcecoul = 0.0;
|
||||
|
||||
if (rsq < cut_ljsq[itype][jtype]) {
|
||||
forcelj=factor_lj;
|
||||
if (eflag) evdwl=factor_lj;
|
||||
|
||||
if (cgt == CG_LJ12_4) {
|
||||
const double r4inv=r2inv*r2inv;
|
||||
forcelj *= r4inv*(lj1[itype][jtype]*r4inv*r4inv
|
||||
- lj2[itype][jtype]);
|
||||
if (eflag) {
|
||||
evdwl *= r4inv*(lj3[itype][jtype]*r4inv*r4inv
|
||||
- lj4[itype][jtype]) - offset[itype][jtype];
|
||||
}
|
||||
} else if (cgt == CG_LJ9_6) {
|
||||
const double r3inv = r2inv*sqrt(r2inv);
|
||||
const double r6inv = r3inv*r3inv;
|
||||
forcelj *= r6inv*(lj1[itype][jtype]*r3inv
|
||||
- lj2[itype][jtype]);
|
||||
if (eflag) {
|
||||
evdwl *= r6inv*(lj3[itype][jtype]*r3inv
|
||||
- lj4[itype][jtype]) - offset[itype][jtype];
|
||||
}
|
||||
} else {
|
||||
const double r6inv = r2inv*r2inv*r2inv;
|
||||
forcelj *= r6inv*(lj1[itype][jtype]*r6inv
|
||||
- lj2[itype][jtype]);
|
||||
if (eflag) {
|
||||
evdwl *= r6inv*(lj3[itype][jtype]*r6inv
|
||||
- lj4[itype][jtype]) - offset[itype][jtype];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rsq < cut_coulsq_global) {
|
||||
if (!ncoultablebits || rsq <= tabinnersq) {
|
||||
const double r = sqrt(rsq);
|
||||
const double grij = g_ewald * r;
|
||||
const double expm2 = exp(-grij*grij);
|
||||
const double t = 1.0 / (1.0 + EWALD_P*grij);
|
||||
const double erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
const double prefactor = qqrd2e * qtmp*q[j]/r;
|
||||
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
|
||||
if (eflag) ecoul = prefactor*erfc;
|
||||
if (factor_coul < 1.0) {
|
||||
forcecoul -= (1.0-factor_coul)*prefactor;
|
||||
if (eflag) ecoul -= (1.0-factor_coul)*prefactor;
|
||||
}
|
||||
} else {
|
||||
union_int_float_t rsq_lookup;
|
||||
rsq_lookup.f = rsq;
|
||||
int itable = rsq_lookup.i & ncoulmask;
|
||||
itable >>= ncoulshiftbits;
|
||||
const double fraction = (rsq_lookup.f - rtable[itable]) *
|
||||
drtable[itable];
|
||||
const double table = ftable[itable] + fraction*dftable[itable];
|
||||
forcecoul = qtmp*q[j] * table;
|
||||
if (eflag) {
|
||||
const double table2 = etable[itable] + fraction*detable[itable];
|
||||
ecoul = qtmp*q[j] * table2;
|
||||
}
|
||||
if (factor_coul < 1.0) {
|
||||
const double table2 = ctable[itable] + fraction*dctable[itable];
|
||||
const double prefactor = qtmp*q[j] * table2;
|
||||
forcecoul -= (1.0-factor_coul)*prefactor;
|
||||
if (eflag) ecoul -= (1.0-factor_coul)*prefactor;
|
||||
}
|
||||
}
|
||||
}
|
||||
fpair = (forcecoul + forcelj) * r2inv;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
|
||||
if (j<start) {
|
||||
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
|
||||
} else {
|
||||
if (j<nlocal) {
|
||||
f[j][0] -= delx*fpair;
|
||||
f[j][1] -= dely*fpair;
|
||||
f[j][2] -= delz*fpair;
|
||||
}
|
||||
if (evflag) ev_tally(i,j,nlocal,0,
|
||||
evdwl,ecoul,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(cg/cmm/coul/long/gpu,PairCGCMMCoulLongGPU)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_CG_CMM_COUL_LONG_GPU_H
|
||||
#define LMP_PAIR_CG_CMM_COUL_LONG_GPU_H
|
||||
|
||||
#include "pair_cg_cmm_coul_long.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairCGCMMCoulLongGPU : public PairCGCMMCoulLong {
|
||||
public:
|
||||
PairCGCMMCoulLongGPU(LAMMPS *lmp);
|
||||
~PairCGCMMCoulLongGPU();
|
||||
void cpu_compute(int, int, int);
|
||||
void cpu_compute(int *, int, int, int);
|
||||
void compute(int, int);
|
||||
void init_style();
|
||||
double memory_usage();
|
||||
|
||||
enum { GPU_PAIR, GPU_NEIGH };
|
||||
|
||||
private:
|
||||
int gpu_mode;
|
||||
double cpu_time;
|
||||
int *gpulist;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -0,0 +1,362 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Mike Brown (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "pair_cg_cmm_gpu.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "integrate.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "neigh_request.h"
|
||||
#include "universe.h"
|
||||
#include "update.h"
|
||||
#include "domain.h"
|
||||
#include "string.h"
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
// External functions from cuda library for atom decomposition
|
||||
|
||||
bool cmm_gpu_init(const int ntypes, double **cutsq, int **cg_types,
|
||||
double **host_lj1, double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **offset, double *special_lj,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size, int &gpu_mode,
|
||||
FILE *screen);
|
||||
void cmm_gpu_clear();
|
||||
int * cmm_gpu_compute_n(const int timestep, const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *boxlo, double *boxhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success);
|
||||
void cmm_gpu_compute(const int timestep, const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
int *ilist, int *numj, int **firstneigh,
|
||||
const bool eflag, const bool vflag, const bool eatom,
|
||||
const bool vatom, int &host_start, const double cpu_time,
|
||||
bool &success);
|
||||
double cmm_gpu_bytes();
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairCGCMMGPU::PairCGCMMGPU(LAMMPS *lmp) : PairCGCMM(lmp), gpu_mode(GPU_PAIR)
|
||||
{
|
||||
respa_enable = 0;
|
||||
cpu_time = 0.0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
free all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
PairCGCMMGPU::~PairCGCMMGPU()
|
||||
{
|
||||
cmm_gpu_clear();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMGPU::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
int nall = atom->nlocal + atom->nghost;
|
||||
int inum, host_start;
|
||||
|
||||
bool success = true;
|
||||
|
||||
if (gpu_mode == GPU_NEIGH) {
|
||||
inum = atom->nlocal;
|
||||
gpulist = cmm_gpu_compute_n(update->ntimestep, neighbor->ago, inum, nall,
|
||||
atom->x, atom->type, domain->sublo,
|
||||
domain->subhi, atom->tag, atom->nspecial,
|
||||
atom->special, eflag, vflag, eflag_atom,
|
||||
vflag_atom, host_start, cpu_time, success);
|
||||
} else {
|
||||
inum = list->inum;
|
||||
cmm_gpu_compute(update->ntimestep, neighbor->ago, inum, nall, atom->x,
|
||||
atom->type, list->ilist, list->numneigh, list->firstneigh,
|
||||
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
|
||||
success);
|
||||
}
|
||||
if (!success)
|
||||
error->one("Out of memory on GPGPU");
|
||||
|
||||
if (host_start<inum) {
|
||||
cpu_time = MPI_Wtime();
|
||||
if (gpu_mode == GPU_NEIGH)
|
||||
cpu_compute(gpulist, host_start, eflag, vflag);
|
||||
else
|
||||
cpu_compute(host_start, eflag, vflag);
|
||||
cpu_time = MPI_Wtime() - cpu_time;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMGPU::init_style()
|
||||
{
|
||||
cut_respa = NULL;
|
||||
|
||||
if (force->pair_match("gpu",0) == NULL)
|
||||
error->all("Cannot use pair hybrid with multiple GPU pair styles");
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double maxcut = -1.0;
|
||||
double cut;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
cut = init_one(i,j);
|
||||
cut *= cut;
|
||||
if (cut > maxcut)
|
||||
maxcut = cut;
|
||||
cutsq[i][j] = cutsq[j][i] = cut;
|
||||
} else
|
||||
cutsq[i][j] = cutsq[j][i] = 0.0;
|
||||
}
|
||||
}
|
||||
double cell_size = sqrt(maxcut) + neighbor->skin;
|
||||
|
||||
int maxspecial=0;
|
||||
if (atom->molecular)
|
||||
maxspecial=atom->maxspecial;
|
||||
bool init_ok = cmm_gpu_init(atom->ntypes+1,cutsq,cg_type,lj1,lj2,lj3,lj4,
|
||||
offset, force->special_lj, atom->nlocal,
|
||||
atom->nlocal+atom->nghost, 300, maxspecial,
|
||||
cell_size, gpu_mode, screen);
|
||||
if (!init_ok)
|
||||
error->one("Insufficient memory on accelerator (or no fix gpu).\n");
|
||||
|
||||
if (force->newton_pair)
|
||||
error->all("Cannot use newton pair with GPU CGCMM pair style");
|
||||
|
||||
if (gpu_mode != GPU_NEIGH) {
|
||||
int irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double PairCGCMMGPU::memory_usage()
|
||||
{
|
||||
double bytes = Pair::memory_usage();
|
||||
return bytes + cmm_gpu_bytes();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMGPU::cpu_compute(int start, int eflag, int vflag) {
|
||||
int i,j,ii,jj,inum,jnum,itype,jtype;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
|
||||
double rsq,r2inv,r6inv,forcelj,factor_lj;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
double *special_lj = force->special_lj;
|
||||
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (ii = start; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
||||
if (j < nall) factor_lj = 1.0;
|
||||
else {
|
||||
factor_lj = special_lj[j/nall];
|
||||
j %= nall;
|
||||
}
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
const int cgt=cg_type[itype][jtype];
|
||||
r2inv = 1.0/rsq;
|
||||
|
||||
fpair = factor_lj;
|
||||
if (eflag) evdwl = factor_lj;
|
||||
if (cgt == CG_LJ12_4) {
|
||||
const double r4inv = r2inv*r2inv;
|
||||
fpair *= r4inv*(lj1[itype][jtype]*r4inv*r4inv
|
||||
- lj2[itype][jtype]);
|
||||
if (eflag) {
|
||||
evdwl *= r4inv*(lj3[itype][jtype]*r4inv*r4inv
|
||||
- lj4[itype][jtype]) - offset[itype][jtype];
|
||||
}
|
||||
} else if (cgt == CG_LJ9_6) {
|
||||
const double r3inv = r2inv*sqrt(r2inv);
|
||||
const double r6inv = r3inv*r3inv;
|
||||
fpair *= r6inv*(lj1[itype][jtype]*r3inv
|
||||
- lj2[itype][jtype]);
|
||||
if (eflag) {
|
||||
evdwl *= r6inv*(lj3[itype][jtype]*r3inv
|
||||
- lj4[itype][jtype]) - offset[itype][jtype];
|
||||
}
|
||||
} else {
|
||||
const double r6inv = r2inv*r2inv*r2inv;
|
||||
fpair *= r6inv*(lj1[itype][jtype]*r6inv
|
||||
- lj2[itype][jtype]);
|
||||
if (eflag) {
|
||||
evdwl *= r6inv*(lj3[itype][jtype]*r6inv
|
||||
- lj4[itype][jtype]) - offset[itype][jtype];
|
||||
}
|
||||
}
|
||||
fpair *= r2inv;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
|
||||
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairCGCMMGPU::cpu_compute(int *nbors, int start, int eflag, int vflag) {
|
||||
int i,j,itype,jtype;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
int stride = nlocal-start;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
|
||||
double rsq,r2inv,r6inv,forcelj,factor_lj;
|
||||
double *special_lj = force->special_lj;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int *type = atom->type;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (i = start; i < nlocal; i++) {
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
int *nbor = nbors + i - start;
|
||||
int jnum = *nbor;
|
||||
nbor += stride;
|
||||
int *nbor_end = nbor + stride * jnum;
|
||||
|
||||
for (; nbor<nbor_end; nbor+=stride) {
|
||||
j = *nbor;
|
||||
|
||||
if (j < nall) factor_lj = 1.0;
|
||||
else {
|
||||
factor_lj = special_lj[j/nall];
|
||||
j %= nall;
|
||||
}
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
const int cgt=cg_type[itype][jtype];
|
||||
r2inv = 1.0/rsq;
|
||||
|
||||
fpair = factor_lj;
|
||||
if (eflag) evdwl = factor_lj;
|
||||
if (cgt == CG_LJ12_4) {
|
||||
const double r4inv = r2inv*r2inv;
|
||||
fpair *= r4inv*(lj1[itype][jtype]*r4inv*r4inv
|
||||
- lj2[itype][jtype]);
|
||||
if (eflag) {
|
||||
evdwl *= r4inv*(lj3[itype][jtype]*r4inv*r4inv
|
||||
- lj4[itype][jtype]) - offset[itype][jtype];
|
||||
}
|
||||
} else if (cgt == CG_LJ9_6) {
|
||||
const double r3inv = r2inv*sqrt(r2inv);
|
||||
const double r6inv = r3inv*r3inv;
|
||||
fpair *= r6inv*(lj1[itype][jtype]*r3inv
|
||||
- lj2[itype][jtype]);
|
||||
if (eflag) {
|
||||
evdwl *= r6inv*(lj3[itype][jtype]*r3inv
|
||||
- lj4[itype][jtype]) - offset[itype][jtype];
|
||||
}
|
||||
} else {
|
||||
const double r6inv = r2inv*r2inv*r2inv;
|
||||
fpair *= r6inv*(lj1[itype][jtype]*r6inv
|
||||
- lj2[itype][jtype]);
|
||||
if (eflag) {
|
||||
evdwl *= r6inv*(lj3[itype][jtype]*r6inv
|
||||
- lj4[itype][jtype]) - offset[itype][jtype];
|
||||
}
|
||||
}
|
||||
fpair *= r2inv;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
|
||||
if (j<start) {
|
||||
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
|
||||
} else {
|
||||
if (j<nlocal) {
|
||||
f[j][0] -= delx*fpair;
|
||||
f[j][1] -= dely*fpair;
|
||||
f[j][2] -= delz*fpair;
|
||||
}
|
||||
if (evflag) ev_tally(i,j,nlocal,0,
|
||||
evdwl,0.0,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(cg/cmm/gpu,PairCGCMMGPU)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_CG_CMM_GPU_H
|
||||
#define LMP_PAIR_CG_CMM_GPU_H
|
||||
|
||||
#include "pair_cg_cmm.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairCGCMMGPU : public PairCGCMM {
|
||||
public:
|
||||
PairCGCMMGPU(LAMMPS *lmp);
|
||||
~PairCGCMMGPU();
|
||||
void cpu_compute(int, int, int);
|
||||
void cpu_compute(int *, int, int, int);
|
||||
void compute(int, int);
|
||||
void init_style();
|
||||
double memory_usage();
|
||||
|
||||
enum { GPU_PAIR, GPU_NEIGH };
|
||||
|
||||
private:
|
||||
int gpu_mode;
|
||||
double cpu_time;
|
||||
int *gpulist;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
|
@ -31,123 +31,47 @@
|
|||
#include "error.h"
|
||||
#include "neigh_request.h"
|
||||
#include "universe.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#ifdef GB_GPU_OMP
|
||||
#include "omp.h"
|
||||
#endif
|
||||
#include "domain.h"
|
||||
#include "update.h"
|
||||
#include "string.h"
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#ifndef WINDLL
|
||||
|
||||
// External functions from cuda library for atom decomposition
|
||||
|
||||
bool gb_gpu_init(int &ij_size, const int ntypes, const double gamma,
|
||||
const double upsilon, const double mu, double **shape,
|
||||
double **well, double **cutsq, double **sigma,
|
||||
double **epsilon, double *host_lshape, int **form,
|
||||
double **host_lj1, double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **offset, double *special_lj,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int thread, const int gpu_id);
|
||||
void gb_gpu_clear(const int thread);
|
||||
int * gb_gpu_reset_nbors(const int nall, const int nlocal, const int inum,
|
||||
int *ilist, const int *numj, const int *type,
|
||||
const int thread, bool &success);
|
||||
void gb_gpu_nbors(const int *ij, const int num_ij, const bool eflag,
|
||||
const int thread);
|
||||
void gb_gpu_atom(double **host_x, double **host_quat, const int *host_type,
|
||||
const bool rebuild, const int thread);
|
||||
void gb_gpu_gayberne(const bool eflag, const bool vflag, const bool rebuild,
|
||||
const int thread);
|
||||
double gb_gpu_forces(double **f, double **tor, const int *ilist,
|
||||
const bool eflag, const bool vflag, const bool eflag_atom,
|
||||
const bool vflag_atom, double *eatom, double **vatom,
|
||||
double *virial, const int thread);
|
||||
void gb_gpu_name(const int gpu_id, const int max_nbors, char * name);
|
||||
void gb_gpu_time(const int thread);
|
||||
int gb_gpu_num_devices();
|
||||
bool gb_gpu_init(const int ntypes, const double gamma, const double upsilon,
|
||||
const double mu, double **shape, double **well, double **cutsq,
|
||||
double **sigma, double **epsilon, double *host_lshape,
|
||||
int **form, double **host_lj1, double **host_lj2,
|
||||
double **host_lj3, double **host_lj4, double **offset,
|
||||
double *special_lj, const int nlocal, const int nall,
|
||||
const int max_nbors, const double cell_size,
|
||||
int &gpu_mode, FILE *screen);
|
||||
void gb_gpu_clear();
|
||||
int * gb_gpu_compute_n(const int timestep, const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *boxlo, double *boxhi, const bool eflag,
|
||||
const bool vflag, const bool eatom, const bool vatom,
|
||||
int &host_start, const double cpu_time, bool &success,
|
||||
double **host_quat);
|
||||
int * gb_gpu_compute(const int timestep, const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
int *ilist, int *numj, int **firstneigh,
|
||||
const bool eflag, const bool vflag, const bool eatom,
|
||||
const bool vatom, int &host_start, const double cpu_time,
|
||||
bool &success, double **host_quat);
|
||||
double gb_gpu_bytes();
|
||||
|
||||
#else
|
||||
#include <windows.h>
|
||||
|
||||
typedef bool (*_gb_gpu_init)(int &ij_size, const int ntypes, const double gamma,
|
||||
const double upsilon, const double mu, double **shape,
|
||||
double **well, double **cutsq, double **sigma,
|
||||
double **epsilon, double *host_lshape, int **form,
|
||||
double **host_lj1, double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **offset, double *special_lj,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int thread, const int gpu_id);
|
||||
typedef void (*_gb_gpu_clear)(const int thread);
|
||||
typedef int * (*_gb_gpu_reset_nbors)(const int nall, const int nlocal,
|
||||
const int inum, int *ilist, const int *numj, const int *type,
|
||||
const int thread, bool &success);
|
||||
typedef void (*_gb_gpu_nbors)(const int *ij, const int num_ij, const bool eflag,
|
||||
const int thread);
|
||||
typedef void (*_gb_gpu_atom)(double **host_x, double **host_quat,
|
||||
const int *host_type, const bool rebuild, const int thread);
|
||||
typedef void (*_gb_gpu_gayberne)(const bool eflag, const bool vflag,
|
||||
const bool rebuild, const int thread);
|
||||
typedef double (*_gb_gpu_forces)(double **f, double **tor, const int *ilist,
|
||||
const bool eflag, const bool vflag, const bool eflag_atom,
|
||||
const bool vflag_atom, double *eatom, double **vatom,
|
||||
double *virial, const int thread);
|
||||
typedef void (*_gb_gpu_name)(const int gpu_id, const int max_nbors,
|
||||
char * name);
|
||||
typedef void (*_gb_gpu_time)(const int thread);
|
||||
typedef int (*_gb_gpu_num_devices)();
|
||||
typedef double (*_gb_gpu_bytes)();
|
||||
|
||||
_gb_gpu_init gb_gpu_init;
|
||||
_gb_gpu_clear gb_gpu_clear;
|
||||
_gb_gpu_reset_nbors gb_gpu_reset_nbors;
|
||||
_gb_gpu_nbors gb_gpu_nbors;
|
||||
_gb_gpu_atom gb_gpu_atom;
|
||||
_gb_gpu_gayberne gb_gpu_gayberne;
|
||||
_gb_gpu_forces gb_gpu_forces;
|
||||
_gb_gpu_name gb_gpu_name;
|
||||
_gb_gpu_time gb_gpu_time;
|
||||
_gb_gpu_num_devices gb_gpu_num_devices;
|
||||
_gb_gpu_bytes gb_gpu_bytes;
|
||||
|
||||
#endif
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairGayBerneGPU::PairGayBerneGPU(LAMMPS *lmp) : PairGayBerne(lmp), my_thread(0),
|
||||
omp_chunk(0), nthreads(1),
|
||||
multi_gpu_mode(ONE_NODE),
|
||||
multi_gpu_param(0),
|
||||
output_time(false)
|
||||
PairGayBerneGPU::PairGayBerneGPU(LAMMPS *lmp) : PairGayBerne(lmp),
|
||||
gpu_mode(GPU_PAIR)
|
||||
{
|
||||
ij_new[0]=NULL;
|
||||
|
||||
#ifdef WINDLL
|
||||
HINSTANCE hinstLib = LoadLibrary(TEXT("gpu.dll"));
|
||||
if (hinstLib == NULL) {
|
||||
printf("\nUnable to load gpu.dll\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
gb_gpu_init=(_gb_gpu_init)GetProcAddress(hinstLib,"gb_gpu_init");
|
||||
gb_gpu_clear=(_gb_gpu_clear)GetProcAddress(hinstLib,"gb_gpu_clear");
|
||||
gb_gpu_reset_nbors=(_gb_gpu_reset_nbors)GetProcAddress(hinstLib,"gb_gpu_reset_nbors");
|
||||
gb_gpu_nbors=(_gb_gpu_nbors)GetProcAddress(hinstLib,"gb_gpu_nbors");
|
||||
gb_gpu_atom=(_gb_gpu_atom)GetProcAddress(hinstLib,"gb_gpu_atom");
|
||||
gb_gpu_gayberne=(_gb_gpu_gayberne)GetProcAddress(hinstLib,"gb_gpu_gayberne");
|
||||
gb_gpu_forces=(_gb_gpu_forces)GetProcAddress(hinstLib,"gb_gpu_forces");
|
||||
gb_gpu_name=(_gb_gpu_name)GetProcAddress(hinstLib,"gb_gpu_name");
|
||||
gb_gpu_time=(_gb_gpu_time)GetProcAddress(hinstLib,"gb_gpu_time");
|
||||
gb_gpu_num_devices=(_gb_gpu_num_devices)GetProcAddress(hinstLib,"gb_gpu_num_devices");
|
||||
gb_gpu_bytes=(_gb_gpu_bytes)GetProcAddress(hinstLib,"gb_gpu_bytes");
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -156,26 +80,8 @@ PairGayBerneGPU::PairGayBerneGPU(LAMMPS *lmp) : PairGayBerne(lmp), my_thread(0),
|
|||
|
||||
PairGayBerneGPU::~PairGayBerneGPU()
|
||||
{
|
||||
if (output_time) {
|
||||
printf("\n\n-------------------------------------");
|
||||
printf("--------------------------------\n");
|
||||
printf(" GPU Time Stamps (on proc 0): ");
|
||||
printf("\n-------------------------------------");
|
||||
printf("--------------------------------\n");
|
||||
gb_gpu_time(my_thread);
|
||||
printf("-------------------------------------");
|
||||
printf("--------------------------------\n\n");
|
||||
}
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
#ifdef GB_GPU_OMP
|
||||
int my_thread=omp_get_thread_num();
|
||||
#endif
|
||||
gb_gpu_clear(my_thread);
|
||||
if (ij_new[my_thread]!=NULL)
|
||||
delete [] ij_new[my_thread];
|
||||
}
|
||||
gb_gpu_clear();
|
||||
cpu_time = 0.0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -183,141 +89,38 @@ PairGayBerneGPU::~PairGayBerneGPU()
|
|||
void PairGayBerneGPU::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = eflag_atom = vflag_atom = 0;
|
||||
if (vflag_atom)
|
||||
error->all("Per-atom virial not available with GPU Gay-Berne");
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
int inum = list->inum;
|
||||
|
||||
bool rebuild=false;
|
||||
if (neighbor->ncalls > last_neighbor) {
|
||||
last_neighbor=neighbor->ncalls;
|
||||
rebuild=true;
|
||||
int nall = atom->nlocal + atom->nghost;
|
||||
int inum, host_start;
|
||||
|
||||
bool success = true;
|
||||
|
||||
if (gpu_mode == GPU_NEIGH) {
|
||||
inum = atom->nlocal;
|
||||
gpulist = gb_gpu_compute_n(update->ntimestep, neighbor->ago, inum, nall,
|
||||
atom->x, atom->type, domain->sublo, domain->subhi,
|
||||
eflag, vflag, eflag_atom, vflag_atom, host_start,
|
||||
cpu_time, success, atom->quat);
|
||||
} else {
|
||||
inum = list->inum;
|
||||
olist = gb_gpu_compute(update->ntimestep, neighbor->ago, inum, nall, atom->x,
|
||||
atom->type, list->ilist, list->numneigh,
|
||||
list->firstneigh, eflag, vflag, eflag_atom,
|
||||
vflag_atom, host_start, cpu_time, success,
|
||||
atom->quat);
|
||||
}
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
|
||||
bool success=true;
|
||||
#ifdef GB_GPU_OMP
|
||||
int my_thread=omp_get_thread_num();
|
||||
if (rebuild) {
|
||||
omp_chunk=static_cast<int>(ceil(static_cast<double>(inum)/nthreads));
|
||||
if (my_thread==nthreads-1)
|
||||
thread_inum[my_thread]=inum-(nthreads-1)*omp_chunk;
|
||||
else
|
||||
thread_inum[my_thread]=omp_chunk;
|
||||
olist[my_thread]=gb_gpu_reset_nbors(nall, atom->nlocal,
|
||||
thread_inum[my_thread],
|
||||
list->ilist+omp_chunk*my_thread,
|
||||
list->numneigh, atom->type, my_thread,
|
||||
success);
|
||||
}
|
||||
#else
|
||||
if (rebuild)
|
||||
olist[my_thread]=gb_gpu_reset_nbors(nall, atom->nlocal, inum, list->ilist,
|
||||
list->numneigh, atom->type, my_thread,
|
||||
success);
|
||||
#endif
|
||||
if (!success)
|
||||
error->one("Out of memory on GPGPU");
|
||||
|
||||
// copy atom data to GPU
|
||||
gb_gpu_atom(atom->x,atom->quat,atom->type,rebuild,my_thread);
|
||||
|
||||
int i,j,ii,jj,jnum;
|
||||
double factor_lj;
|
||||
int *jlist;
|
||||
|
||||
if (rebuild==true) {
|
||||
int num_ij = 0;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
int *ijp=ij_new[my_thread];
|
||||
#ifdef GB_GPU_OMP
|
||||
int mgo=my_thread*omp_chunk;
|
||||
int mgot=mgo+thread_inum[my_thread];
|
||||
#else
|
||||
int mgo=0, mgot=inum;
|
||||
#endif
|
||||
for (ii = mgo; ii<mgot; ii++) {
|
||||
i = olist[my_thread][ii];
|
||||
jlist = list->firstneigh[i];
|
||||
jnum = list->numneigh[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
||||
*ijp=j;
|
||||
ijp++;
|
||||
num_ij++;
|
||||
|
||||
if (num_ij==ij_size) {
|
||||
gb_gpu_nbors(ij_new[my_thread],num_ij,eflag,my_thread);
|
||||
ijp=ij_new[my_thread];
|
||||
num_ij=0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (num_ij>0)
|
||||
gb_gpu_nbors(ij_new[my_thread],num_ij,eflag,my_thread);
|
||||
if (host_start < inum) {
|
||||
cpu_time = MPI_Wtime();
|
||||
if (gpu_mode == GPU_NEIGH)
|
||||
cpu_compute(gpulist,host_start,eflag,vflag);
|
||||
else
|
||||
cpu_compute(host_start,eflag,vflag);
|
||||
cpu_time = MPI_Wtime() - cpu_time;
|
||||
}
|
||||
|
||||
gb_gpu_gayberne(eflag,vflag,rebuild,my_thread);
|
||||
double lvirial[6];
|
||||
for (int i=0; i<6; i++) lvirial[i]=0.0;
|
||||
double my_eng=gb_gpu_forces(atom->f,atom->torque,olist[my_thread],eflag,vflag,
|
||||
eflag_atom, vflag_atom, eatom, vatom, lvirial,
|
||||
my_thread);
|
||||
#pragma omp critical
|
||||
{
|
||||
eng_vdwl+=my_eng;
|
||||
virial[0]+=lvirial[0];
|
||||
virial[1]+=lvirial[1];
|
||||
virial[2]+=lvirial[2];
|
||||
virial[3]+=lvirial[3];
|
||||
virial[4]+=lvirial[4];
|
||||
virial[5]+=lvirial[5];
|
||||
}
|
||||
|
||||
} //End parallel
|
||||
|
||||
if (vflag_fdotr) virial_compute();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
global settings
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairGayBerneGPU::settings(int narg, char **arg)
|
||||
{
|
||||
// strip off GPU keyword/value and send remaining args to parent
|
||||
|
||||
if (narg < 2) error->all("Illegal pair_style command");
|
||||
|
||||
// set multi_gpu_mode to one/node for multiple gpus on 1 node
|
||||
// -- param is starting gpu id
|
||||
// set multi_gpu_mode to one/gpu to select the same gpu id on every node
|
||||
// -- param is id of gpu
|
||||
// set multi_gpu_mode to multi/gpu to get ma
|
||||
// -- param is number of gpus per node
|
||||
|
||||
if (strcmp(arg[0],"one/node") == 0)
|
||||
multi_gpu_mode = ONE_NODE;
|
||||
else if (strcmp(arg[0],"one/gpu") == 0)
|
||||
multi_gpu_mode = ONE_GPU;
|
||||
else if (strcmp(arg[0],"multi/gpu") == 0)
|
||||
multi_gpu_mode = MULTI_GPU;
|
||||
else error->all("Illegal pair_style command");
|
||||
|
||||
multi_gpu_param = atoi(arg[1]);
|
||||
|
||||
if (multi_gpu_mode == MULTI_GPU && multi_gpu_param < 1)
|
||||
error->all("Illegal pair_style command");
|
||||
|
||||
PairGayBerne::settings(narg-2,&arg[2]);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -326,8 +129,6 @@ void PairGayBerneGPU::settings(int narg, char **arg)
|
|||
|
||||
void PairGayBerneGPU::init_style()
|
||||
{
|
||||
if (comm->me == 0)
|
||||
output_time=true;
|
||||
if (force->pair_match("gpu",0) == NULL)
|
||||
error->all("Cannot use pair hybrid with multiple GPU pair styles");
|
||||
if (!atom->quat_flag || !atom->torque_flag || !atom->avec->shape_type)
|
||||
|
@ -335,24 +136,7 @@ void PairGayBerneGPU::init_style()
|
|||
if (atom->radius_flag)
|
||||
error->all("Pair gayberne cannot be used with atom attribute diameter");
|
||||
|
||||
// set the GPU ID
|
||||
|
||||
int my_gpu=comm->me+multi_gpu_param;
|
||||
int ngpus=universe->nprocs;
|
||||
if (multi_gpu_mode==ONE_GPU) {
|
||||
my_gpu=multi_gpu_param;
|
||||
ngpus=1;
|
||||
} else if (multi_gpu_mode==MULTI_GPU) {
|
||||
ngpus=multi_gpu_param;
|
||||
my_gpu=comm->me%ngpus;
|
||||
if (ngpus>universe->nprocs)
|
||||
ngpus=universe->nprocs;
|
||||
}
|
||||
|
||||
int irequest = neighbor->request(this);
|
||||
|
||||
// per-type shape precalculations
|
||||
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
if (setwell[i]) {
|
||||
double *one = atom->shape[i];
|
||||
|
@ -364,72 +148,38 @@ void PairGayBerneGPU::init_style()
|
|||
}
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double maxcut = -1.0;
|
||||
double cut;
|
||||
for (int i = 1; i <= atom->ntypes; i++)
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
cut = init_one(i,j);
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
cut = init_one(i,j);
|
||||
cut *= cut;
|
||||
if (cut > maxcut)
|
||||
maxcut = cut;
|
||||
cutsq[i][j] = cutsq[j][i] = cut;
|
||||
} else
|
||||
cutsq[i][j] = cutsq[j][i] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
// If compiled with OpenMP and only 1 proc, try to use multiple GPUs w/threads
|
||||
#ifdef GB_GPU_OMP
|
||||
if (multi_gpu_mode!=ONE_GPU)
|
||||
nthreads=ngpus=gb_gpu_num_devices();
|
||||
else
|
||||
nthreads=ngpus=1;
|
||||
if (nthreads>MAX_GPU_THREADS)
|
||||
nthreads=MAX_GPU_THREADS;
|
||||
omp_set_num_threads(nthreads);
|
||||
#endif
|
||||
|
||||
#pragma omp parallel firstprivate(my_gpu)
|
||||
{
|
||||
#ifdef GB_GPU_OMP
|
||||
int my_thread = omp_get_thread_num();
|
||||
if (multi_gpu_mode!=ONE_GPU)
|
||||
my_gpu=my_thread;
|
||||
if (multi_gpu_mode==ONE_NODE)
|
||||
my_gpu+=multi_gpu_param;
|
||||
#endif
|
||||
|
||||
bool init_ok=gb_gpu_init(ij_size, atom->ntypes+1, gamma, upsilon, mu,
|
||||
double cell_size = sqrt(maxcut) + neighbor->skin;
|
||||
|
||||
bool init_ok = gb_gpu_init(atom->ntypes+1, gamma, upsilon, mu,
|
||||
shape, well, cutsq, sigma, epsilon, lshape, form,
|
||||
lj1, lj2, lj3, lj4, offset, force->special_lj,
|
||||
atom->nlocal, atom->nlocal+atom->nghost, 300,
|
||||
my_thread, my_gpu);
|
||||
if (!init_ok)
|
||||
error->one("At least 1 proc could not allocate a CUDA gpu or memory");
|
||||
|
||||
if (ij_new[my_thread]!=NULL)
|
||||
delete [] ij_new[my_thread];
|
||||
ij_new[my_thread]=new int[ij_size];
|
||||
}
|
||||
|
||||
last_neighbor = -1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
cell_size, gpu_mode, screen);
|
||||
if (!init_ok)
|
||||
error->one("Insufficient memory on accelerator (or no fix gpu).");
|
||||
|
||||
if (force->newton_pair)
|
||||
error->all("Cannot use newton pair with GPU GayBerne pair style");
|
||||
error->all("Cannot use newton pair with GPU Gay-Berne pair style");
|
||||
|
||||
if (comm->me == 0 && screen) {
|
||||
printf("\n-------------------------------------");
|
||||
printf("-------------------------------------\n");
|
||||
printf("- Using GPGPU acceleration for Gay-Berne:\n");
|
||||
printf("-------------------------------------");
|
||||
printf("-------------------------------------\n");
|
||||
|
||||
for (int i=0; i<ngpus; i++) {
|
||||
int gpui=my_gpu;
|
||||
if (multi_gpu_mode==ONE_NODE)
|
||||
gpui=i+multi_gpu_param;
|
||||
else if (multi_gpu_mode==MULTI_GPU)
|
||||
gpui=i;
|
||||
char gpu_string[500];
|
||||
gb_gpu_name(gpui,neighbor->oneatom,gpu_string);
|
||||
printf("GPU %d: %s\n",gpui,gpu_string);
|
||||
}
|
||||
printf("-------------------------------------");
|
||||
printf("-------------------------------------\n\n");
|
||||
if (gpu_mode != GPU_NEIGH) {
|
||||
int irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -437,6 +187,275 @@ void PairGayBerneGPU::init_style()
|
|||
|
||||
double PairGayBerneGPU::memory_usage()
|
||||
{
|
||||
double bytes=Pair::memory_usage()+nthreads*ij_size*sizeof(int);
|
||||
return bytes+gb_gpu_bytes();
|
||||
double bytes = Pair::memory_usage();
|
||||
return bytes + gb_gpu_bytes();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairGayBerneGPU::cpu_compute(int start, int eflag, int vflag)
|
||||
{
|
||||
int i,j,ii,jj,inum,jnum,itype,jtype;
|
||||
double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
|
||||
double fforce[3],ttor[3],rtor[3],r12[3];
|
||||
double a1[3][3],b1[3][3],g1[3][3],a2[3][3],b2[3][3],g2[3][3],temp[3][3];
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
double **quat = atom->quat;
|
||||
double **tor = atom->torque;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
double *special_lj = force->special_lj;
|
||||
|
||||
inum = list->inum;
|
||||
ilist = olist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (ii = start; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
itype = type[i];
|
||||
|
||||
if (form[itype][itype] == ELLIPSE_ELLIPSE) {
|
||||
MathExtra::quat_to_mat_trans(quat[i],a1);
|
||||
MathExtra::diag_times3(well[itype],a1,temp);
|
||||
MathExtra::transpose_times3(a1,temp,b1);
|
||||
MathExtra::diag_times3(shape[itype],a1,temp);
|
||||
MathExtra::transpose_times3(a1,temp,g1);
|
||||
}
|
||||
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
||||
if (j < nall) factor_lj = 1.0;
|
||||
else {
|
||||
factor_lj = special_lj[j/nall];
|
||||
j %= nall;
|
||||
}
|
||||
|
||||
// r12 = center to center vector
|
||||
|
||||
r12[0] = x[j][0]-x[i][0];
|
||||
r12[1] = x[j][1]-x[i][1];
|
||||
r12[2] = x[j][2]-x[i][2];
|
||||
rsq = MathExtra::dot3(r12,r12);
|
||||
jtype = type[j];
|
||||
|
||||
// compute if less than cutoff
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
|
||||
switch (form[itype][jtype]) {
|
||||
case SPHERE_SPHERE:
|
||||
r2inv = 1.0/rsq;
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
|
||||
forcelj *= -r2inv;
|
||||
if (eflag) one_eng =
|
||||
r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
|
||||
offset[itype][jtype];
|
||||
fforce[0] = r12[0]*forcelj;
|
||||
fforce[1] = r12[1]*forcelj;
|
||||
fforce[2] = r12[2]*forcelj;
|
||||
ttor[0] = ttor[1] = ttor[2] = 0.0;
|
||||
rtor[0] = rtor[1] = rtor[2] = 0.0;
|
||||
break;
|
||||
|
||||
case SPHERE_ELLIPSE:
|
||||
MathExtra::quat_to_mat_trans(quat[j],a2);
|
||||
MathExtra::diag_times3(well[jtype],a2,temp);
|
||||
MathExtra::transpose_times3(a2,temp,b2);
|
||||
MathExtra::diag_times3(shape[jtype],a2,temp);
|
||||
MathExtra::transpose_times3(a2,temp,g2);
|
||||
one_eng = gayberne_lj(j,i,a2,b2,g2,r12,rsq,fforce,rtor);
|
||||
ttor[0] = ttor[1] = ttor[2] = 0.0;
|
||||
break;
|
||||
|
||||
case ELLIPSE_SPHERE:
|
||||
one_eng = gayberne_lj(i,j,a1,b1,g1,r12,rsq,fforce,ttor);
|
||||
rtor[0] = rtor[1] = rtor[2] = 0.0;
|
||||
break;
|
||||
|
||||
default:
|
||||
MathExtra::quat_to_mat_trans(quat[j],a2);
|
||||
MathExtra::diag_times3(well[jtype],a2,temp);
|
||||
MathExtra::transpose_times3(a2,temp,b2);
|
||||
MathExtra::diag_times3(shape[jtype],a2,temp);
|
||||
MathExtra::transpose_times3(a2,temp,g2);
|
||||
one_eng = gayberne_analytic(i,j,a1,a2,b1,b2,g1,g2,r12,rsq,
|
||||
fforce,ttor,rtor);
|
||||
break;
|
||||
}
|
||||
|
||||
fforce[0] *= factor_lj;
|
||||
fforce[1] *= factor_lj;
|
||||
fforce[2] *= factor_lj;
|
||||
ttor[0] *= factor_lj;
|
||||
ttor[1] *= factor_lj;
|
||||
ttor[2] *= factor_lj;
|
||||
|
||||
f[i][0] += fforce[0];
|
||||
f[i][1] += fforce[1];
|
||||
f[i][2] += fforce[2];
|
||||
tor[i][0] += ttor[0];
|
||||
tor[i][1] += ttor[1];
|
||||
tor[i][2] += ttor[2];
|
||||
|
||||
if (eflag) evdwl = factor_lj*one_eng;
|
||||
|
||||
if (evflag) ev_tally_xyz_full(i,evdwl,0.0,fforce[0],fforce[1],fforce[2],
|
||||
-r12[0],-r12[1],-r12[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairGayBerneGPU::cpu_compute(int *nbors, int start, int eflag, int vflag)
|
||||
{
|
||||
int i,j,itype,jtype;
|
||||
double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
|
||||
double fforce[3],ttor[3],rtor[3],r12[3];
|
||||
double a1[3][3],b1[3][3],g1[3][3],a2[3][3],b2[3][3],g2[3][3],temp[3][3];
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
double **quat = atom->quat;
|
||||
double **tor = atom->torque;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
int stride = nlocal-start;
|
||||
double *special_lj = force->special_lj;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (i = start; i < nlocal; i++) {
|
||||
itype = type[i];
|
||||
|
||||
if (form[itype][itype] == ELLIPSE_ELLIPSE) {
|
||||
MathExtra::quat_to_mat_trans(quat[i],a1);
|
||||
MathExtra::diag_times3(well[itype],a1,temp);
|
||||
MathExtra::transpose_times3(a1,temp,b1);
|
||||
MathExtra::diag_times3(shape[itype],a1,temp);
|
||||
MathExtra::transpose_times3(a1,temp,g1);
|
||||
}
|
||||
|
||||
int *nbor = nbors+i-start;
|
||||
int jnum =* nbor;
|
||||
nbor += stride;
|
||||
int *nbor_end = nbor + stride * jnum;
|
||||
|
||||
for ( ; nbor < nbor_end; nbor += stride) {
|
||||
j = *nbor;
|
||||
|
||||
if (j < nall) factor_lj = 1.0;
|
||||
else {
|
||||
factor_lj = special_lj[j/nall];
|
||||
j %= nall;
|
||||
}
|
||||
|
||||
// r12 = center to center vector
|
||||
|
||||
r12[0] = x[j][0]-x[i][0];
|
||||
r12[1] = x[j][1]-x[i][1];
|
||||
r12[2] = x[j][2]-x[i][2];
|
||||
rsq = MathExtra::dot3(r12,r12);
|
||||
jtype = type[j];
|
||||
|
||||
// compute if less than cutoff
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
|
||||
switch (form[itype][jtype]) {
|
||||
case SPHERE_SPHERE:
|
||||
r2inv = 1.0/rsq;
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
|
||||
forcelj *= -r2inv;
|
||||
if (eflag) one_eng =
|
||||
r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
|
||||
offset[itype][jtype];
|
||||
fforce[0] = r12[0]*forcelj;
|
||||
fforce[1] = r12[1]*forcelj;
|
||||
fforce[2] = r12[2]*forcelj;
|
||||
ttor[0] = ttor[1] = ttor[2] = 0.0;
|
||||
rtor[0] = rtor[1] = rtor[2] = 0.0;
|
||||
break;
|
||||
|
||||
case SPHERE_ELLIPSE:
|
||||
MathExtra::quat_to_mat_trans(quat[j],a2);
|
||||
MathExtra::diag_times3(well[jtype],a2,temp);
|
||||
MathExtra::transpose_times3(a2,temp,b2);
|
||||
MathExtra::diag_times3(shape[jtype],a2,temp);
|
||||
MathExtra::transpose_times3(a2,temp,g2);
|
||||
one_eng = gayberne_lj(j,i,a2,b2,g2,r12,rsq,fforce,rtor);
|
||||
ttor[0] = ttor[1] = ttor[2] = 0.0;
|
||||
break;
|
||||
|
||||
case ELLIPSE_SPHERE:
|
||||
one_eng = gayberne_lj(i,j,a1,b1,g1,r12,rsq,fforce,ttor);
|
||||
rtor[0] = rtor[1] = rtor[2] = 0.0;
|
||||
break;
|
||||
|
||||
default:
|
||||
MathExtra::quat_to_mat_trans(quat[j],a2);
|
||||
MathExtra::diag_times3(well[jtype],a2,temp);
|
||||
MathExtra::transpose_times3(a2,temp,b2);
|
||||
MathExtra::diag_times3(shape[jtype],a2,temp);
|
||||
MathExtra::transpose_times3(a2,temp,g2);
|
||||
one_eng = gayberne_analytic(i,j,a1,a2,b1,b2,g1,g2,r12,rsq,
|
||||
fforce,ttor,rtor);
|
||||
break;
|
||||
}
|
||||
|
||||
fforce[0] *= factor_lj;
|
||||
fforce[1] *= factor_lj;
|
||||
fforce[2] *= factor_lj;
|
||||
ttor[0] *= factor_lj;
|
||||
ttor[1] *= factor_lj;
|
||||
ttor[2] *= factor_lj;
|
||||
|
||||
f[i][0] += fforce[0];
|
||||
f[i][1] += fforce[1];
|
||||
f[i][2] += fforce[2];
|
||||
tor[i][0] += ttor[0];
|
||||
tor[i][1] += ttor[1];
|
||||
tor[i][2] += ttor[2];
|
||||
|
||||
if (eflag) evdwl = factor_lj*one_eng;
|
||||
|
||||
if (j<start) {
|
||||
if (evflag) ev_tally_xyz_full(i,evdwl,0.0,fforce[0],fforce[1],
|
||||
fforce[2],-r12[0],-r12[1],-r12[2]);
|
||||
} else {
|
||||
if (j < nlocal) {
|
||||
rtor[0] *= factor_lj;
|
||||
rtor[1] *= factor_lj;
|
||||
rtor[2] *= factor_lj;
|
||||
f[j][0] -= fforce[0];
|
||||
f[j][1] -= fforce[1];
|
||||
f[j][2] -= fforce[2];
|
||||
tor[j][0] += rtor[0];
|
||||
tor[j][1] += rtor[1];
|
||||
tor[j][2] += rtor[2];
|
||||
}
|
||||
if (evflag) ev_tally_xyz(i,j,nlocal,0,
|
||||
evdwl,0.0,fforce[0],fforce[1],fforce[2],
|
||||
-r12[0],-r12[1],-r12[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -29,21 +29,19 @@ class PairGayBerneGPU : public PairGayBerne {
|
|||
public:
|
||||
PairGayBerneGPU(LAMMPS *lmp);
|
||||
~PairGayBerneGPU();
|
||||
void cpu_compute(int, int, int);
|
||||
void cpu_compute(int *, int, int, int);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void init_style();
|
||||
double memory_usage();
|
||||
|
||||
enum { ONE_NODE, ONE_GPU, MULTI_GPU };
|
||||
|
||||
private:
|
||||
int ij_size;
|
||||
int *ij_new[MAX_GPU_THREADS], *olist[MAX_GPU_THREADS];
|
||||
|
||||
int my_thread, nthreads, thread_inum[MAX_GPU_THREADS], omp_chunk;
|
||||
|
||||
int last_neighbor, multi_gpu_mode, multi_gpu_param;
|
||||
bool output_time;
|
||||
enum { GPU_PAIR, GPU_NEIGH };
|
||||
|
||||
private:
|
||||
int *olist;
|
||||
int gpu_mode;
|
||||
double cpu_time;
|
||||
int *gpulist;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,318 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Mike Brown (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "pair_lj96_cut_gpu.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "integrate.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "neigh_request.h"
|
||||
#include "universe.h"
|
||||
#include "update.h"
|
||||
#include "domain.h"
|
||||
#include "string.h"
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
// External functions from cuda library for atom decomposition
|
||||
|
||||
bool lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **offset, double *special_lj, const int nlocal,
|
||||
const int nall, const int max_nbors, const int maxspecial,
|
||||
const double cell_size, int &gpu_mode, FILE *screen);
|
||||
void lj96_gpu_clear();
|
||||
int * lj96_gpu_compute_n(const int timestep, const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *boxlo, double *boxhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success);
|
||||
void lj96_gpu_compute(const int timestep, const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
int *ilist, int *numj, int **firstneigh,
|
||||
const bool eflag, const bool vflag, const bool eatom,
|
||||
const bool vatom, int &host_start, const double cpu_time,
|
||||
bool &success);
|
||||
double lj96_gpu_bytes();
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJ96CutGPU::PairLJ96CutGPU(LAMMPS *lmp) : PairLJ96Cut(lmp), gpu_mode(GPU_PAIR)
|
||||
{
|
||||
respa_enable = 0;
|
||||
cpu_time = 0.0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
free all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
PairLJ96CutGPU::~PairLJ96CutGPU()
|
||||
{
|
||||
lj96_gpu_clear();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJ96CutGPU::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
int nall = atom->nlocal + atom->nghost;
|
||||
int inum, host_start;
|
||||
|
||||
bool success = true;
|
||||
|
||||
if (gpu_mode == GPU_NEIGH) {
|
||||
inum = atom->nlocal;
|
||||
gpulist = lj96_gpu_compute_n(update->ntimestep, neighbor->ago, inum, nall,
|
||||
atom->x, atom->type, domain->sublo,
|
||||
domain->subhi, atom->tag, atom->nspecial,
|
||||
atom->special, eflag, vflag, eflag_atom,
|
||||
vflag_atom, host_start, cpu_time, success);
|
||||
} else {
|
||||
inum = list->inum;
|
||||
lj96_gpu_compute(update->ntimestep, neighbor->ago, inum, nall, atom->x,
|
||||
atom->type, list->ilist, list->numneigh, list->firstneigh,
|
||||
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
|
||||
success);
|
||||
}
|
||||
if (!success)
|
||||
error->one("Out of memory on GPGPU");
|
||||
|
||||
if (host_start<inum) {
|
||||
cpu_time = MPI_Wtime();
|
||||
if (gpu_mode == GPU_NEIGH)
|
||||
cpu_compute(gpulist, host_start, eflag, vflag);
|
||||
else
|
||||
cpu_compute(host_start, eflag, vflag);
|
||||
cpu_time = MPI_Wtime() - cpu_time;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJ96CutGPU::init_style()
|
||||
{
|
||||
cut_respa = NULL;
|
||||
|
||||
if (force->pair_match("gpu",0) == NULL)
|
||||
error->all("Cannot use pair hybrid with multiple GPU pair styles");
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double maxcut = -1.0;
|
||||
double cut;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
cut = init_one(i,j);
|
||||
cut *= cut;
|
||||
if (cut > maxcut)
|
||||
maxcut = cut;
|
||||
cutsq[i][j] = cutsq[j][i] = cut;
|
||||
} else
|
||||
cutsq[i][j] = cutsq[j][i] = 0.0;
|
||||
}
|
||||
}
|
||||
double cell_size = sqrt(maxcut) + neighbor->skin;
|
||||
|
||||
int maxspecial=0;
|
||||
if (atom->molecular)
|
||||
maxspecial=atom->maxspecial;
|
||||
bool init_ok = lj96_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
|
||||
offset, force->special_lj, atom->nlocal,
|
||||
atom->nlocal+atom->nghost, 300, maxspecial,
|
||||
cell_size, gpu_mode, screen);
|
||||
if (!init_ok)
|
||||
error->one("Insufficient memory on accelerator (or no fix gpu).\n");
|
||||
|
||||
if (force->newton_pair)
|
||||
error->all("Cannot use newton pair with GPU LJ96 pair style");
|
||||
|
||||
if (gpu_mode != GPU_NEIGH) {
|
||||
int irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double PairLJ96CutGPU::memory_usage()
|
||||
{
|
||||
double bytes = Pair::memory_usage();
|
||||
return bytes + lj96_gpu_bytes();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJ96CutGPU::cpu_compute(int start, int eflag, int vflag) {
|
||||
int i,j,ii,jj,inum,jnum,itype,jtype;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
|
||||
double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
double *special_lj = force->special_lj;
|
||||
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (ii = start; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
||||
if (j < nall) factor_lj = 1.0;
|
||||
else {
|
||||
factor_lj = special_lj[j/nall];
|
||||
j %= nall;
|
||||
}
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
r2inv = 1.0/rsq;
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
r3inv = sqrt(r6inv);
|
||||
forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
|
||||
fpair = factor_lj*forcelj*r2inv;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
|
||||
if (eflag) {
|
||||
evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
|
||||
offset[itype][jtype];
|
||||
evdwl *= factor_lj;
|
||||
}
|
||||
|
||||
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJ96CutGPU::cpu_compute(int *nbors, int start, int eflag, int vflag) {
|
||||
int i,j,itype,jtype;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
int stride = nlocal-start;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
|
||||
double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj;
|
||||
double *special_lj = force->special_lj;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int *type = atom->type;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (i = start; i < nlocal; i++) {
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
int *nbor = nbors + i - start;
|
||||
int jnum = *nbor;
|
||||
nbor += stride;
|
||||
int *nbor_end = nbor + stride * jnum;
|
||||
|
||||
for (; nbor<nbor_end; nbor+=stride) {
|
||||
j = *nbor;
|
||||
|
||||
if (j < nall) factor_lj = 1.0;
|
||||
else {
|
||||
factor_lj = special_lj[j/nall];
|
||||
j %= nall;
|
||||
}
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
r2inv = 1.0/rsq;
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
r3inv = sqrt(r6inv);
|
||||
forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
|
||||
fpair = factor_lj*forcelj*r2inv;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
|
||||
if (eflag) {
|
||||
evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
|
||||
offset[itype][jtype];
|
||||
evdwl *= factor_lj;
|
||||
}
|
||||
|
||||
if (j<start) {
|
||||
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
|
||||
} else {
|
||||
if (j<nlocal) {
|
||||
f[j][0] -= delx*fpair;
|
||||
f[j][1] -= dely*fpair;
|
||||
f[j][2] -= delz*fpair;
|
||||
}
|
||||
if (evflag) ev_tally(i,j,nlocal,0,
|
||||
evdwl,0.0,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj96/cut/gpu,PairLJ96CutGPU)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_LJ_96_GPU_H
|
||||
#define LMP_PAIR_LJ_96_GPU_H
|
||||
|
||||
#include "pair_lj96_cut.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairLJ96CutGPU : public PairLJ96Cut {
|
||||
public:
|
||||
PairLJ96CutGPU(LAMMPS *lmp);
|
||||
~PairLJ96CutGPU();
|
||||
void cpu_compute(int, int, int);
|
||||
void cpu_compute(int *, int, int, int);
|
||||
void compute(int, int);
|
||||
void init_style();
|
||||
double memory_usage();
|
||||
|
||||
enum { GPU_PAIR, GPU_NEIGH };
|
||||
|
||||
private:
|
||||
int gpu_mode;
|
||||
double cpu_time;
|
||||
int *gpulist;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -0,0 +1,364 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Mike Brown (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "pair_lj_cut_coul_cut_gpu.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "integrate.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "neigh_request.h"
|
||||
#include "universe.h"
|
||||
#include "update.h"
|
||||
#include "domain.h"
|
||||
#include "string.h"
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
// External functions from cuda library for atom decomposition
|
||||
|
||||
bool ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **offset, double *special_lj, const int nlocal,
|
||||
const int nall, const int max_nbors, const int maxspecial,
|
||||
const double cell_size, int &gpu_mode, FILE *screen,
|
||||
double **host_cut_ljsq, double **host_cut_coulsq,
|
||||
double *host_special_coul, const double qqrd2e);
|
||||
void ljc_gpu_clear();
|
||||
int * ljc_gpu_compute_n(const int timestep, const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *boxlo, double *boxhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success, double *host_q);
|
||||
void ljc_gpu_compute(const int timestep, const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
int *ilist, int *numj, int **firstneigh,
|
||||
const bool eflag, const bool vflag, const bool eatom,
|
||||
const bool vatom, int &host_start, const double cpu_time,
|
||||
bool &success, double *host_q);
|
||||
double ljc_gpu_bytes();
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJCutCoulCutGPU::PairLJCutCoulCutGPU(LAMMPS *lmp) : PairLJCutCoulCut(lmp), gpu_mode(GPU_PAIR)
|
||||
{
|
||||
respa_enable = 0;
|
||||
cpu_time = 0.0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
free all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
PairLJCutCoulCutGPU::~PairLJCutCoulCutGPU()
|
||||
{
|
||||
ljc_gpu_clear();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutCoulCutGPU::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
int nall = atom->nlocal + atom->nghost;
|
||||
int inum, host_start;
|
||||
|
||||
bool success = true;
|
||||
|
||||
if (gpu_mode == GPU_NEIGH) {
|
||||
inum = atom->nlocal;
|
||||
gpulist = ljc_gpu_compute_n(update->ntimestep, neighbor->ago, inum, nall,
|
||||
atom->x, atom->type, domain->sublo,
|
||||
domain->subhi, atom->tag, atom->nspecial,
|
||||
atom->special, eflag, vflag, eflag_atom,
|
||||
vflag_atom, host_start, cpu_time, success,
|
||||
atom->q);
|
||||
} else {
|
||||
inum = list->inum;
|
||||
ljc_gpu_compute(update->ntimestep, neighbor->ago, inum, nall, atom->x,
|
||||
atom->type, list->ilist, list->numneigh, list->firstneigh,
|
||||
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
|
||||
success, atom->q);
|
||||
}
|
||||
if (!success)
|
||||
error->one("Out of memory on GPGPU");
|
||||
|
||||
if (host_start<inum) {
|
||||
cpu_time = MPI_Wtime();
|
||||
if (gpu_mode == GPU_NEIGH)
|
||||
cpu_compute(gpulist, host_start, eflag, vflag);
|
||||
else
|
||||
cpu_compute(host_start, eflag, vflag);
|
||||
cpu_time = MPI_Wtime() - cpu_time;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutCoulCutGPU::init_style()
|
||||
{
|
||||
if (!atom->q_flag)
|
||||
error->all("Pair style lj/cut/coul/cut requires atom attribute q");
|
||||
if (force->pair_match("gpu",0) == NULL)
|
||||
error->all("Cannot use pair hybrid with multiple GPU pair styles");
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double maxcut = -1.0;
|
||||
double cut;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
cut = init_one(i,j);
|
||||
cut *= cut;
|
||||
if (cut > maxcut)
|
||||
maxcut = cut;
|
||||
cutsq[i][j] = cutsq[j][i] = cut;
|
||||
} else
|
||||
cutsq[i][j] = cutsq[j][i] = 0.0;
|
||||
}
|
||||
}
|
||||
double cell_size = sqrt(maxcut) + neighbor->skin;
|
||||
|
||||
int maxspecial=0;
|
||||
if (atom->molecular)
|
||||
maxspecial=atom->maxspecial;
|
||||
bool init_ok = ljc_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
|
||||
offset, force->special_lj, atom->nlocal,
|
||||
atom->nlocal+atom->nghost, 300, maxspecial,
|
||||
cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
|
||||
force->special_coul, force->qqrd2e);
|
||||
if (!init_ok)
|
||||
error->one("Insufficient memory on accelerator (or no fix gpu).\n");
|
||||
|
||||
if (force->newton_pair)
|
||||
error->all("Cannot use newton pair with GPU LJ pair style");
|
||||
|
||||
if (gpu_mode != GPU_NEIGH) {
|
||||
int irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double PairLJCutCoulCutGPU::memory_usage()
|
||||
{
|
||||
double bytes = Pair::memory_usage();
|
||||
return bytes + ljc_gpu_bytes();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutCoulCutGPU::cpu_compute(int start, int eflag, int vflag)
|
||||
{
|
||||
int i,j,ii,jj,inum,jnum,itype,jtype;
|
||||
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
|
||||
double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
evdwl = ecoul = 0.0;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
double *q = atom->q;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
double *special_coul = force->special_coul;
|
||||
double *special_lj = force->special_lj;
|
||||
int newton_pair = force->newton_pair;
|
||||
double qqrd2e = force->qqrd2e;
|
||||
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (ii = start; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
qtmp = q[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
||||
if (j < nall) factor_coul = factor_lj = 1.0;
|
||||
else {
|
||||
factor_coul = special_coul[j/nall];
|
||||
factor_lj = special_lj[j/nall];
|
||||
j %= nall;
|
||||
}
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
r2inv = 1.0/rsq;
|
||||
|
||||
if (rsq < cut_coulsq[itype][jtype])
|
||||
forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
|
||||
else forcecoul = 0.0;
|
||||
|
||||
if (rsq < cut_ljsq[itype][jtype]) {
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
|
||||
} else forcelj = 0.0;
|
||||
|
||||
fpair = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
|
||||
if (eflag) {
|
||||
if (rsq < cut_coulsq[itype][jtype])
|
||||
ecoul = factor_coul * qqrd2e * qtmp*q[j]*sqrt(r2inv);
|
||||
else ecoul = 0.0;
|
||||
if (rsq < cut_ljsq[itype][jtype]) {
|
||||
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
|
||||
offset[itype][jtype];
|
||||
evdwl *= factor_lj;
|
||||
} else evdwl = 0.0;
|
||||
}
|
||||
|
||||
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutCoulCutGPU::cpu_compute(int *nbors, int start, int eflag,
|
||||
int vflag)
|
||||
{
|
||||
int i,j,jnum,itype,jtype;
|
||||
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
|
||||
double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
|
||||
|
||||
evdwl = ecoul = 0.0;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
double *q = atom->q;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
int stride = nlocal-start;
|
||||
double *special_coul = force->special_coul;
|
||||
double *special_lj = force->special_lj;
|
||||
double qqrd2e = force->qqrd2e;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (i = start; i < nlocal; i++) {
|
||||
qtmp = q[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
int *nbor = nbors + i - start;
|
||||
jnum = *nbor;
|
||||
nbor += stride;
|
||||
int *nbor_end = nbor + stride * jnum;
|
||||
|
||||
for (; nbor<nbor_end; nbor+=stride) {
|
||||
j = *nbor;
|
||||
|
||||
if (j < nall) factor_coul = factor_lj = 1.0;
|
||||
else {
|
||||
factor_coul = special_coul[j/nall];
|
||||
factor_lj = special_lj[j/nall];
|
||||
j %= nall;
|
||||
}
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
r2inv = 1.0/rsq;
|
||||
|
||||
if (rsq < cut_coulsq[itype][jtype])
|
||||
forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
|
||||
else forcecoul = 0.0;
|
||||
|
||||
if (rsq < cut_ljsq[itype][jtype]) {
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
|
||||
} else forcelj = 0.0;
|
||||
|
||||
fpair = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
|
||||
if (eflag) {
|
||||
if (rsq < cut_coulsq[itype][jtype])
|
||||
ecoul = factor_coul * qqrd2e * qtmp*q[j]*sqrt(r2inv);
|
||||
else ecoul = 0.0;
|
||||
if (rsq < cut_ljsq[itype][jtype]) {
|
||||
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
|
||||
offset[itype][jtype];
|
||||
evdwl *= factor_lj;
|
||||
} else evdwl = 0.0;
|
||||
}
|
||||
|
||||
if (j<start) {
|
||||
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
|
||||
} else {
|
||||
if (j<nlocal) {
|
||||
f[j][0] -= delx*fpair;
|
||||
f[j][1] -= dely*fpair;
|
||||
f[j][2] -= delz*fpair;
|
||||
}
|
||||
if (evflag) ev_tally(i,j,nlocal,0,
|
||||
evdwl,ecoul,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj/cut/coul/cut/gpu,PairLJCutCoulCutGPU)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_LJ_CUT_COUL_CUT_GPU_H
|
||||
#define LMP_PAIR_LJ_CUT_COUL_CUT_GPU_H
|
||||
|
||||
#include "pair_lj_cut_coul_cut.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairLJCutCoulCutGPU : public PairLJCutCoulCut {
|
||||
public:
|
||||
PairLJCutCoulCutGPU(LAMMPS *lmp);
|
||||
~PairLJCutCoulCutGPU();
|
||||
void cpu_compute(int, int, int);
|
||||
void cpu_compute(int *, int, int, int);
|
||||
void compute(int, int);
|
||||
void init_style();
|
||||
double memory_usage();
|
||||
|
||||
enum { GPU_PAIR, GPU_NEIGH };
|
||||
|
||||
private:
|
||||
int gpu_mode;
|
||||
double cpu_time;
|
||||
int *gpulist;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -0,0 +1,452 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Mike Brown (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "pair_lj_cut_coul_long_gpu.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "integrate.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "neigh_request.h"
|
||||
#include "universe.h"
|
||||
#include "update.h"
|
||||
#include "domain.h"
|
||||
#include "string.h"
|
||||
#include "kspace.h"
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#define EWALD_F 1.12837917
|
||||
#define EWALD_P 0.3275911
|
||||
#define A1 0.254829592
|
||||
#define A2 -0.284496736
|
||||
#define A3 1.421413741
|
||||
#define A4 -1.453152027
|
||||
#define A5 1.061405429
|
||||
|
||||
// External functions from cuda library for atom decomposition
|
||||
|
||||
bool ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **offset, double *special_lj, const int nlocal,
|
||||
const int nall, const int max_nbors, const int maxspecial,
|
||||
const double cell_size, int &gpu_mode, FILE *screen,
|
||||
double **host_cut_ljsq, double host_cut_coulsq,
|
||||
double *host_special_coul, const double qqrd2e,
|
||||
const double g_ewald);
|
||||
void ljcl_gpu_clear();
|
||||
int * ljcl_gpu_compute_n(const int timestep, const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *boxlo, double *boxhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success, double *host_q);
|
||||
void ljcl_gpu_compute(const int timestep, const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
int *ilist, int *numj, int **firstneigh,
|
||||
const bool eflag, const bool vflag, const bool eatom,
|
||||
const bool vatom, int &host_start, const double cpu_time,
|
||||
bool &success, double *host_q);
|
||||
double ljcl_gpu_bytes();
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJCutCoulLongGPU::PairLJCutCoulLongGPU(LAMMPS *lmp) : PairLJCutCoulLong(lmp), gpu_mode(GPU_PAIR)
|
||||
{
|
||||
respa_enable = 0;
|
||||
cpu_time = 0.0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
free all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
PairLJCutCoulLongGPU::~PairLJCutCoulLongGPU()
|
||||
{
|
||||
ljcl_gpu_clear();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutCoulLongGPU::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
int nall = atom->nlocal + atom->nghost;
|
||||
int inum, host_start;
|
||||
|
||||
bool success = true;
|
||||
|
||||
if (gpu_mode == GPU_NEIGH) {
|
||||
inum = atom->nlocal;
|
||||
gpulist = ljcl_gpu_compute_n(update->ntimestep, neighbor->ago, inum, nall,
|
||||
atom->x, atom->type, domain->sublo,
|
||||
domain->subhi, atom->tag, atom->nspecial,
|
||||
atom->special, eflag, vflag, eflag_atom,
|
||||
vflag_atom, host_start, cpu_time, success,
|
||||
atom->q);
|
||||
} else {
|
||||
inum = list->inum;
|
||||
ljcl_gpu_compute(update->ntimestep, neighbor->ago, inum, nall, atom->x,
|
||||
atom->type, list->ilist, list->numneigh, list->firstneigh,
|
||||
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
|
||||
success, atom->q);
|
||||
}
|
||||
if (!success)
|
||||
error->one("Out of memory on GPGPU");
|
||||
|
||||
if (host_start<inum) {
|
||||
cpu_time = MPI_Wtime();
|
||||
if (gpu_mode == GPU_NEIGH)
|
||||
cpu_compute(gpulist, host_start, eflag, vflag);
|
||||
else
|
||||
cpu_compute(host_start, eflag, vflag);
|
||||
cpu_time = MPI_Wtime() - cpu_time;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutCoulLongGPU::init_style()
|
||||
{
|
||||
cut_respa = NULL;
|
||||
|
||||
if (!atom->q_flag)
|
||||
error->all("Pair style lj/cut/coul/cut requires atom attribute q");
|
||||
if (force->pair_match("gpu",0) == NULL)
|
||||
error->all("Cannot use pair hybrid with multiple GPU pair styles");
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double maxcut = -1.0;
|
||||
double cut;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
cut = init_one(i,j);
|
||||
cut *= cut;
|
||||
if (cut > maxcut)
|
||||
maxcut = cut;
|
||||
cutsq[i][j] = cutsq[j][i] = cut;
|
||||
} else
|
||||
cutsq[i][j] = cutsq[j][i] = 0.0;
|
||||
}
|
||||
}
|
||||
double cell_size = sqrt(maxcut) + neighbor->skin;
|
||||
|
||||
cut_coulsq = cut_coul * cut_coul;
|
||||
|
||||
// insure use of KSpace long-range solver, set g_ewald
|
||||
|
||||
if (force->kspace == NULL)
|
||||
error->all("Pair style is incompatible with KSpace style");
|
||||
g_ewald = force->kspace->g_ewald;
|
||||
|
||||
// setup force tables
|
||||
|
||||
if (ncoultablebits) init_tables();
|
||||
|
||||
int maxspecial=0;
|
||||
if (atom->molecular)
|
||||
maxspecial=atom->maxspecial;
|
||||
bool init_ok = ljcl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
|
||||
offset, force->special_lj, atom->nlocal,
|
||||
atom->nlocal+atom->nghost, 300, maxspecial,
|
||||
cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
|
||||
force->special_coul, force->qqrd2e, g_ewald);
|
||||
if (!init_ok)
|
||||
error->one("Insufficient memory on accelerator (or no fix gpu).\n");
|
||||
|
||||
if (force->newton_pair)
|
||||
error->all("Cannot use newton pair with GPU LJ pair style");
|
||||
|
||||
if (gpu_mode != GPU_NEIGH) {
|
||||
int irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double PairLJCutCoulLongGPU::memory_usage()
|
||||
{
|
||||
double bytes = Pair::memory_usage();
|
||||
return bytes + ljcl_gpu_bytes();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutCoulLongGPU::cpu_compute(int start, int eflag, int vflag)
|
||||
{
|
||||
int i,j,ii,jj,inum,jnum,itype,jtype,itable;
|
||||
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
|
||||
double fraction,table;
|
||||
double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
|
||||
double grij,expm2,prefactor,t,erfc;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
double rsq;
|
||||
|
||||
evdwl = ecoul = 0.0;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
double *q = atom->q;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
double *special_coul = force->special_coul;
|
||||
double *special_lj = force->special_lj;
|
||||
double qqrd2e = force->qqrd2e;
|
||||
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (ii = start; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
qtmp = q[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
||||
if (j < nall) factor_coul = factor_lj = 1.0;
|
||||
else {
|
||||
factor_coul = special_coul[j/nall];
|
||||
factor_lj = special_lj[j/nall];
|
||||
j %= nall;
|
||||
}
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
r2inv = 1.0/rsq;
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
if (!ncoultablebits || rsq <= tabinnersq) {
|
||||
r = sqrt(rsq);
|
||||
grij = g_ewald * r;
|
||||
expm2 = exp(-grij*grij);
|
||||
t = 1.0 / (1.0 + EWALD_P*grij);
|
||||
erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
prefactor = qqrd2e * qtmp*q[j]/r;
|
||||
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
|
||||
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
|
||||
} else {
|
||||
union_int_float_t rsq_lookup;
|
||||
rsq_lookup.f = rsq;
|
||||
itable = rsq_lookup.i & ncoulmask;
|
||||
itable >>= ncoulshiftbits;
|
||||
fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
|
||||
table = ftable[itable] + fraction*dftable[itable];
|
||||
forcecoul = qtmp*q[j] * table;
|
||||
if (factor_coul < 1.0) {
|
||||
table = ctable[itable] + fraction*dctable[itable];
|
||||
prefactor = qtmp*q[j] * table;
|
||||
forcecoul -= (1.0-factor_coul)*prefactor;
|
||||
}
|
||||
}
|
||||
} else forcecoul = 0.0;
|
||||
|
||||
if (rsq < cut_ljsq[itype][jtype]) {
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
|
||||
} else forcelj = 0.0;
|
||||
|
||||
fpair = (forcecoul + factor_lj*forcelj) * r2inv;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
|
||||
if (eflag) {
|
||||
if (rsq < cut_coulsq) {
|
||||
if (!ncoultablebits || rsq <= tabinnersq)
|
||||
ecoul = prefactor*erfc;
|
||||
else {
|
||||
table = etable[itable] + fraction*detable[itable];
|
||||
ecoul = qtmp*q[j] * table;
|
||||
}
|
||||
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
|
||||
} else ecoul = 0.0;
|
||||
|
||||
if (rsq < cut_ljsq[itype][jtype]) {
|
||||
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
|
||||
offset[itype][jtype];
|
||||
evdwl *= factor_lj;
|
||||
} else evdwl = 0.0;
|
||||
}
|
||||
|
||||
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutCoulLongGPU::cpu_compute(int *nbors, int start, int eflag,
|
||||
int vflag)
|
||||
{
|
||||
int i,j,jnum,itype,jtype,itable;
|
||||
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
|
||||
double fraction,table;
|
||||
double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
|
||||
double grij,expm2,prefactor,t,erfc;
|
||||
double rsq;
|
||||
|
||||
evdwl = ecoul = 0.0;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
double *q = atom->q;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
int stride = nlocal-start;
|
||||
double *special_coul = force->special_coul;
|
||||
double *special_lj = force->special_lj;
|
||||
double qqrd2e = force->qqrd2e;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (i = start; i < nlocal; i++) {
|
||||
qtmp = q[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
int *nbor = nbors + i - start;
|
||||
jnum = *nbor;
|
||||
nbor += stride;
|
||||
int *nbor_end = nbor + stride * jnum;
|
||||
|
||||
for (; nbor<nbor_end; nbor+=stride) {
|
||||
j = *nbor;
|
||||
|
||||
if (j < nall) factor_coul = factor_lj = 1.0;
|
||||
else {
|
||||
factor_coul = special_coul[j/nall];
|
||||
factor_lj = special_lj[j/nall];
|
||||
j %= nall;
|
||||
}
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
r2inv = 1.0/rsq;
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
if (!ncoultablebits || rsq <= tabinnersq) {
|
||||
r = sqrt(rsq);
|
||||
grij = g_ewald * r;
|
||||
expm2 = exp(-grij*grij);
|
||||
t = 1.0 / (1.0 + EWALD_P*grij);
|
||||
erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
prefactor = qqrd2e * qtmp*q[j]/r;
|
||||
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
|
||||
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
|
||||
} else {
|
||||
union_int_float_t rsq_lookup;
|
||||
rsq_lookup.f = rsq;
|
||||
itable = rsq_lookup.i & ncoulmask;
|
||||
itable >>= ncoulshiftbits;
|
||||
fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
|
||||
table = ftable[itable] + fraction*dftable[itable];
|
||||
forcecoul = qtmp*q[j] * table;
|
||||
if (factor_coul < 1.0) {
|
||||
table = ctable[itable] + fraction*dctable[itable];
|
||||
prefactor = qtmp*q[j] * table;
|
||||
forcecoul -= (1.0-factor_coul)*prefactor;
|
||||
}
|
||||
}
|
||||
} else forcecoul = 0.0;
|
||||
|
||||
if (rsq < cut_ljsq[itype][jtype]) {
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
|
||||
} else forcelj = 0.0;
|
||||
|
||||
fpair = (forcecoul + factor_lj*forcelj) * r2inv;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
|
||||
if (eflag) {
|
||||
if (rsq < cut_coulsq) {
|
||||
if (!ncoultablebits || rsq <= tabinnersq)
|
||||
ecoul = prefactor*erfc;
|
||||
else {
|
||||
table = etable[itable] + fraction*detable[itable];
|
||||
ecoul = qtmp*q[j] * table;
|
||||
}
|
||||
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
|
||||
} else ecoul = 0.0;
|
||||
|
||||
if (rsq < cut_ljsq[itype][jtype]) {
|
||||
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
|
||||
offset[itype][jtype];
|
||||
evdwl *= factor_lj;
|
||||
} else evdwl = 0.0;
|
||||
}
|
||||
|
||||
if (j<start) {
|
||||
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
|
||||
} else {
|
||||
if (j<nlocal) {
|
||||
f[j][0] -= delx*fpair;
|
||||
f[j][1] -= dely*fpair;
|
||||
f[j][2] -= delz*fpair;
|
||||
}
|
||||
if (evflag) ev_tally(i,j,nlocal,0,
|
||||
evdwl,ecoul,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj/cut/coul/long/gpu,PairLJCutCoulLongGPU)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_LJ_CUT_COUL_LONG_GPU_H
|
||||
#define LMP_PAIR_LJ_CUT_COUL_LONG_GPU_H
|
||||
|
||||
#include "pair_lj_cut_coul_long.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairLJCutCoulLongGPU : public PairLJCutCoulLong {
|
||||
public:
|
||||
PairLJCutCoulLongGPU(LAMMPS *lmp);
|
||||
~PairLJCutCoulLongGPU();
|
||||
void cpu_compute(int, int, int);
|
||||
void cpu_compute(int *, int, int, int);
|
||||
void compute(int, int);
|
||||
void init_style();
|
||||
double memory_usage();
|
||||
|
||||
enum { GPU_PAIR, GPU_NEIGH };
|
||||
|
||||
private:
|
||||
int gpu_mode;
|
||||
double cpu_time;
|
||||
int *gpulist;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -2,28 +2,24 @@
|
|||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
|
||||
Peng Wang (Nvidia), penwang@nvidia.com
|
||||
Paul Crozier (SNL), pscrozi@sandia.gov
|
||||
Contributing author: Mike Brown (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "pair_lj_cut_gpu.h"
|
||||
#include "math_extra.h"
|
||||
#include "atom.h"
|
||||
#include "domain.h"
|
||||
#include "atom_vec.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
|
@ -32,79 +28,45 @@
|
|||
#include "integrate.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "neigh_request.h"
|
||||
#include "universe.h"
|
||||
|
||||
#include <string>
|
||||
#include "update.h"
|
||||
#include "domain.h"
|
||||
#include "string.h"
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#ifndef WINDLL
|
||||
// External functions from cuda library for atom decomposition
|
||||
|
||||
// External functions from cuda library for force decomposition
|
||||
|
||||
bool lj_gpu_init(int &ij_size, const int ntypes, double **cutsq,double **sigma,
|
||||
double **epsilon, double **host_lj1, double **host_lj2,
|
||||
double **host_lj3, double **host_lj4, double **offset,
|
||||
double *special_lj, double *boxlo, double *boxhi, double cell_len, double skin,
|
||||
const int max_nbors, const int gpu_id);
|
||||
void lj_gpu_clear();
|
||||
double lj_gpu_cell(double **force, double *virial, double **host_x, int *host_type, const int inum, const int nall,
|
||||
const int ago, const bool eflag, const bool vflag,
|
||||
const double *boxlo, const double *boxhi);
|
||||
void lj_gpu_name(const int gpu_id, const int max_nbors, char * name);
|
||||
void lj_gpu_time();
|
||||
double lj_gpu_bytes();
|
||||
|
||||
#else
|
||||
#include <windows.h>
|
||||
|
||||
typedef bool (*_lj_gpu_init)(int &ij_size, const int ntypes, double **cutsq,double **sigma,
|
||||
double **epsilon, double **host_lj1, double **host_lj2,
|
||||
double **host_lj3, double **host_lj4, double **offset,
|
||||
double *special_lj, double *boxlo, double *boxhi, double cell_len, double skin,
|
||||
const int max_nbors, const int gpu_id);
|
||||
typedef void (*_lj_gpu_clear)();
|
||||
typedef double (*_lj_gpu_cell)(double **force, double *virial, double **host_x, int *host_type, const int inum, const int nall,
|
||||
const int ago, const bool eflag, const bool vflag,
|
||||
const double *boxlo, const double *boxhi);
|
||||
typedef void (*_lj_gpu_name)(const int gpu_id, const int max_nbors, char * name);
|
||||
typedef void (*_lj_gpu_time)();
|
||||
typedef double (*_lj_gpu_bytes)();
|
||||
|
||||
_lj_gpu_init lj_gpu_init;
|
||||
_lj_gpu_clear lj_gpu_clear;
|
||||
_lj_gpu_cell lj_gpu_cell;
|
||||
_lj_gpu_name lj_gpu_name;
|
||||
_lj_gpu_time lj_gpu_time;
|
||||
_lj_gpu_bytes lj_gpu_bytes;
|
||||
|
||||
#endif
|
||||
bool ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **offset, double *special_lj, const int nlocal,
|
||||
const int nall, const int max_nbors, const int maxspecial,
|
||||
const double cell_size, int &gpu_mode, FILE *screen);
|
||||
void ljl_gpu_clear();
|
||||
int * ljl_gpu_compute_n(const int timestep, const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *boxlo, double *boxhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success);
|
||||
void ljl_gpu_compute(const int timestep, const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
int *ilist, int *numj, int **firstneigh,
|
||||
const bool eflag, const bool vflag, const bool eatom,
|
||||
const bool vatom, int &host_start, const double cpu_time,
|
||||
bool &success);
|
||||
double ljl_gpu_bytes();
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJCutGPU::PairLJCutGPU(LAMMPS *lmp) : PairLJCut(lmp), multi_gpu_mode(0)
|
||||
PairLJCutGPU::PairLJCutGPU(LAMMPS *lmp) : PairLJCut(lmp), gpu_mode(GPU_PAIR)
|
||||
{
|
||||
ij_new=NULL;
|
||||
respa_enable = 0;
|
||||
|
||||
#ifdef WINDLL
|
||||
HINSTANCE hinstLib = LoadLibrary(TEXT("gpu.dll"));
|
||||
if (hinstLib == NULL) {
|
||||
printf("\nUnable to load gpu.dll\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
lj_gpu_init=(_lj_gpu_init)GetProcAddress(hinstLib,"lj_gpu_init");
|
||||
lj_gpu_clear=(_lj_gpu_clear)GetProcAddress(hinstLib,"lj_gpu_clear");
|
||||
lj_gpu_cell=(_lj_gpu_cell)GetProcAddress(hinstLib,"lj_gpu_cell");
|
||||
lj_gpu_name=(_lj_gpu_name)GetProcAddress(hinstLib,"lj_gpu_name");
|
||||
lj_gpu_time=(_lj_gpu_time)GetProcAddress(hinstLib,"lj_gpu_time");
|
||||
lj_gpu_bytes=(_lj_gpu_bytes)GetProcAddress(hinstLib,"lj_gpu_bytes");
|
||||
#endif
|
||||
|
||||
cpu_time = 0.0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -113,17 +75,7 @@ PairLJCutGPU::PairLJCutGPU(LAMMPS *lmp) : PairLJCut(lmp), multi_gpu_mode(0)
|
|||
|
||||
PairLJCutGPU::~PairLJCutGPU()
|
||||
{
|
||||
printf("\n\n-------------------------------------");
|
||||
printf("--------------------------------\n");
|
||||
printf(" GPU Time Stamps: ");
|
||||
printf("\n-------------------------------------");
|
||||
printf("--------------------------------\n");
|
||||
lj_gpu_time();
|
||||
printf("-------------------------------------");
|
||||
printf("--------------------------------\n\n");
|
||||
lj_gpu_clear();
|
||||
if (ij_new!=NULL)
|
||||
delete [] ij_new;
|
||||
ljl_gpu_clear();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -132,45 +84,37 @@ void PairLJCutGPU::compute(int eflag, int vflag)
|
|||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
int nall = atom->nlocal + atom->nghost;
|
||||
int inum, host_start;
|
||||
|
||||
bool success = true;
|
||||
|
||||
if (gpu_mode == GPU_NEIGH) {
|
||||
inum = atom->nlocal;
|
||||
gpulist = ljl_gpu_compute_n(update->ntimestep, neighbor->ago, inum, nall,
|
||||
atom->x, atom->type, domain->sublo,
|
||||
domain->subhi, atom->tag, atom->nspecial,
|
||||
atom->special, eflag, vflag, eflag_atom,
|
||||
vflag_atom, host_start, cpu_time, success);
|
||||
} else {
|
||||
inum = list->inum;
|
||||
ljl_gpu_compute(update->ntimestep, neighbor->ago, inum, nall, atom->x,
|
||||
atom->type, list->ilist, list->numneigh, list->firstneigh,
|
||||
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
|
||||
success);
|
||||
}
|
||||
if (!success)
|
||||
error->one("Out of memory on GPGPU");
|
||||
|
||||
// compute forces on GPU
|
||||
eng_vdwl = lj_gpu_cell(atom->f, virial, atom->x, atom->type, atom->nlocal, atom->nlocal + atom->nghost,
|
||||
neighbor->ago, eflag, vflag, domain->boxlo, domain->boxhi);
|
||||
|
||||
if (vflag_fdotr) virial_compute();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
global settings
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutGPU::settings(int narg, char **arg)
|
||||
{
|
||||
// strip off GPU keyword/value and send remaining args to parent
|
||||
|
||||
if (narg < 2) error->all("Illegal pair_style command");
|
||||
|
||||
// set multi_gpu_mode to one/node for multiple gpus on 1 node
|
||||
// -- param is starting gpu id
|
||||
// set multi_gpu_mode to one/gpu to select the same gpu id on every node
|
||||
// -- param is id of gpu
|
||||
// set multi_gpu_mode to multi/gpu to get ma
|
||||
// -- param is number of gpus per node
|
||||
|
||||
if (strcmp(arg[0],"one/node") == 0)
|
||||
multi_gpu_mode = ONE_NODE;
|
||||
else if (strcmp(arg[0],"one/gpu") == 0)
|
||||
multi_gpu_mode = ONE_GPU;
|
||||
else if (strcmp(arg[0],"multi/gpu") == 0)
|
||||
multi_gpu_mode = MULTI_GPU;
|
||||
else error->all("Illegal pair_style command");
|
||||
|
||||
multi_gpu_param = atoi(arg[1]);
|
||||
|
||||
if (multi_gpu_mode == MULTI_GPU && multi_gpu_param < 1)
|
||||
error->all("Illegal pair_style command");
|
||||
|
||||
PairLJCut::settings(narg-2,&arg[2]);
|
||||
if (host_start<inum) {
|
||||
cpu_time = MPI_Wtime();
|
||||
if (gpu_mode == GPU_NEIGH)
|
||||
cpu_compute(gpulist, host_start, eflag, vflag);
|
||||
else
|
||||
cpu_compute(host_start, eflag, vflag);
|
||||
cpu_time = MPI_Wtime() - cpu_time;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -179,74 +123,45 @@ void PairLJCutGPU::settings(int narg, char **arg)
|
|||
|
||||
void PairLJCutGPU::init_style()
|
||||
{
|
||||
cut_respa = NULL;
|
||||
|
||||
if (force->pair_match("gpu",0) == NULL)
|
||||
error->all("Cannot use pair hybrid with multiple GPU pair styles");
|
||||
|
||||
// set the GPU ID
|
||||
|
||||
int my_gpu=comm->me+multi_gpu_param;
|
||||
int ngpus=universe->nprocs;
|
||||
if (multi_gpu_mode==ONE_GPU) {
|
||||
my_gpu=multi_gpu_param;
|
||||
ngpus=1;
|
||||
} else if (multi_gpu_mode==MULTI_GPU) {
|
||||
ngpus=multi_gpu_param;
|
||||
my_gpu=comm->me%ngpus;
|
||||
if (ngpus>universe->nprocs)
|
||||
ngpus=universe->nprocs;
|
||||
}
|
||||
|
||||
cut_respa=NULL;
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut;
|
||||
for (int i = 1; i <= atom->ntypes; i++)
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
cut = init_one(i,j);
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
}
|
||||
|
||||
// use the max cutoff length as the cell length
|
||||
double maxcut = -1.0;
|
||||
for (int i = 1; i <= atom->ntypes; i++)
|
||||
for (int j = i; j <= atom->ntypes; j++)
|
||||
if (cutsq[i][j] > maxcut) maxcut = cutsq[i][j];
|
||||
|
||||
// for this problem, adding skin results in better perf
|
||||
// this may be a parameter in the future
|
||||
double cell_len = sqrt(maxcut) + neighbor->skin;
|
||||
|
||||
if (!lj_gpu_init(ij_size, atom->ntypes+1, cutsq, sigma, epsilon, lj1, lj2, lj3,
|
||||
lj4, offset, force->special_lj, domain->boxlo, domain->boxhi,
|
||||
cell_len, neighbor->skin, neighbor->oneatom, my_gpu))
|
||||
error->one("At least one process could not allocate a CUDA-enabled gpu");
|
||||
|
||||
if (ij_new!=NULL)
|
||||
delete [] ij_new;
|
||||
ij_new=new int[ij_size];
|
||||
|
||||
if (force->newton_pair)
|
||||
error->all("Cannot use newton pair with GPU lj/cut pair style");
|
||||
|
||||
if (comm->me == 0 && screen) {
|
||||
printf("\n-------------------------------------");
|
||||
printf("-------------------------------------\n");
|
||||
printf("- Using GPGPU acceleration for LJ-Cut:\n");
|
||||
printf("-------------------------------------");
|
||||
printf("-------------------------------------\n");
|
||||
|
||||
for (int i=0; i<ngpus; i++) {
|
||||
int gpui=my_gpu;
|
||||
if (multi_gpu_mode==ONE_NODE)
|
||||
gpui=i+multi_gpu_param;
|
||||
else if (multi_gpu_mode==MULTI_GPU)
|
||||
gpui=i;
|
||||
char gpu_string[500];
|
||||
lj_gpu_name(gpui,neighbor->oneatom,gpu_string);
|
||||
printf("GPU %d: %s\n",gpui,gpu_string);
|
||||
double cut;
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
cut = init_one(i,j);
|
||||
cut *= cut;
|
||||
if (cut > maxcut)
|
||||
maxcut = cut;
|
||||
cutsq[i][j] = cutsq[j][i] = cut;
|
||||
} else
|
||||
cutsq[i][j] = cutsq[j][i] = 0.0;
|
||||
}
|
||||
printf("-------------------------------------");
|
||||
printf("-------------------------------------\n\n");
|
||||
}
|
||||
double cell_size = sqrt(maxcut) + neighbor->skin;
|
||||
|
||||
int maxspecial=0;
|
||||
if (atom->molecular)
|
||||
maxspecial=atom->maxspecial;
|
||||
bool init_ok = ljl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
|
||||
offset, force->special_lj, atom->nlocal,
|
||||
atom->nlocal+atom->nghost, 300, maxspecial,
|
||||
cell_size, gpu_mode, screen);
|
||||
if (!init_ok)
|
||||
error->one("Insufficient memory on accelerator (or no fix gpu).\n");
|
||||
|
||||
if (force->newton_pair)
|
||||
error->all("Cannot use newton pair with GPU LJ pair style");
|
||||
|
||||
if (gpu_mode != GPU_NEIGH) {
|
||||
int irequest = neighbor->request(this);
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -254,6 +169,149 @@ void PairLJCutGPU::init_style()
|
|||
|
||||
double PairLJCutGPU::memory_usage()
|
||||
{
|
||||
double bytes=Pair::memory_usage()+ij_size*sizeof(int);
|
||||
return bytes+lj_gpu_bytes();
|
||||
double bytes = Pair::memory_usage();
|
||||
return bytes + ljl_gpu_bytes();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutGPU::cpu_compute(int start, int eflag, int vflag) {
|
||||
int i,j,ii,jj,inum,jnum,itype,jtype;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
|
||||
double rsq,r2inv,r6inv,forcelj,factor_lj;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
double *special_lj = force->special_lj;
|
||||
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (ii = start; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
||||
if (j < nall) factor_lj = 1.0;
|
||||
else {
|
||||
factor_lj = special_lj[j/nall];
|
||||
j %= nall;
|
||||
}
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
r2inv = 1.0/rsq;
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
|
||||
fpair = factor_lj*forcelj*r2inv;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
|
||||
if (eflag) {
|
||||
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
|
||||
offset[itype][jtype];
|
||||
evdwl *= factor_lj;
|
||||
}
|
||||
|
||||
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutGPU::cpu_compute(int *nbors, int start, int eflag, int vflag) {
|
||||
int i,j,itype,jtype;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
int stride = nlocal-start;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
|
||||
double rsq,r2inv,r6inv,forcelj,factor_lj;
|
||||
double *special_lj = force->special_lj;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int *type = atom->type;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (i = start; i < nlocal; i++) {
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
int *nbor = nbors + i - start;
|
||||
int jnum = *nbor;
|
||||
nbor += stride;
|
||||
int *nbor_end = nbor + stride * jnum;
|
||||
|
||||
for (; nbor<nbor_end; nbor+=stride) {
|
||||
j = *nbor;
|
||||
|
||||
if (j < nall) factor_lj = 1.0;
|
||||
else {
|
||||
factor_lj = special_lj[j/nall];
|
||||
j %= nall;
|
||||
}
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
r2inv = 1.0/rsq;
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
|
||||
fpair = factor_lj*forcelj*r2inv;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
|
||||
if (eflag) {
|
||||
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
|
||||
offset[itype][jtype];
|
||||
evdwl *= factor_lj;
|
||||
}
|
||||
|
||||
if (j<start) {
|
||||
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
|
||||
} else {
|
||||
if (j<nlocal) {
|
||||
f[j][0] -= delx*fpair;
|
||||
f[j][1] -= dely*fpair;
|
||||
f[j][2] -= delz*fpair;
|
||||
}
|
||||
if (evflag) ev_tally(i,j,nlocal,0,
|
||||
evdwl,0.0,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,8 +17,8 @@ PairStyle(lj/cut/gpu,PairLJCutGPU)
|
|||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_LJ_CUT_GPU_H
|
||||
#define LMP_PAIR_LJ_CUT_GPU_H
|
||||
#ifndef LMP_PAIR_LJ_LIGHT_GPU_H
|
||||
#define LMP_PAIR_LJ_LIGHT_GPU_H
|
||||
|
||||
#include "pair_lj_cut.h"
|
||||
|
||||
|
@ -28,20 +28,21 @@ class PairLJCutGPU : public PairLJCut {
|
|||
public:
|
||||
PairLJCutGPU(LAMMPS *lmp);
|
||||
~PairLJCutGPU();
|
||||
void cpu_compute(int, int, int);
|
||||
void cpu_compute(int *, int, int, int);
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void init_style();
|
||||
double memory_usage();
|
||||
|
||||
enum { ONE_NODE, ONE_GPU, MULTI_GPU };
|
||||
enum { GPU_PAIR, GPU_NEIGH };
|
||||
|
||||
private:
|
||||
int ij_size;
|
||||
int *ij_new;
|
||||
|
||||
int last_neighbor, multi_gpu_mode, multi_gpu_param;
|
||||
private:
|
||||
int gpu_mode;
|
||||
double cpu_time;
|
||||
int *gpulist;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Reference in New Issue