mirror of https://github.com/lammps/lammps.git
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@3047 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
be41e3b26d
commit
93e0e00793
|
@ -0,0 +1,39 @@
|
|||
# Install/unInstall package classes in LAMMPS
|
||||
# edit Makefile.package to include/exclude GPU library
|
||||
# do not copy gayberne files if non-GPU version does not exist
|
||||
|
||||
if ($1 == 1) then
|
||||
|
||||
sed -i 's/\S*gpu //' ../Makefile.package
|
||||
sed -i 's|^PKGPATH =\s*|&-L../../lib/gpu |' ../Makefile.package
|
||||
sed -i 's|^PKGLIB =\s*|&-lgpu |' ../Makefile.package
|
||||
|
||||
cp style_gpu.h tmp.h
|
||||
if (! -e ../pair_gayberne.cpp) then
|
||||
grep -v gayberne tmp.h > tmp1.h
|
||||
mv tmp1.h tmp.h
|
||||
endif
|
||||
mv tmp.h ../style_gpu.h
|
||||
|
||||
if (-e ../pair_gayberne.cpp) then
|
||||
cp pair_gayberne_gpu.cpp ..
|
||||
cp pair_gayberne_gpu.h ..
|
||||
endif
|
||||
|
||||
cp pair_lj_cut_gpu.cpp ..
|
||||
cp pair_lj_cut_gpu.h ..
|
||||
|
||||
else if ($1 == 0) then
|
||||
|
||||
sed -i 's/\S*gpu //' ../Makefile.package
|
||||
|
||||
rm ../style_gpu.h
|
||||
touch ../style_gpu.h
|
||||
|
||||
rm ../pair_gayberne_gpu.cpp
|
||||
rm ../pair_lj_cut_gpu.cpp
|
||||
|
||||
rm ../pair_gayberne_gpu.h
|
||||
rm ../pair_lj_cut_gpu.h
|
||||
|
||||
endif
|
|
@ -0,0 +1,369 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
cetain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Mike Brown (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "string.h"
|
||||
#include "pair_gayberne_gpu.h"
|
||||
#include "math_extra.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "integrate.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "neigh_request.h"
|
||||
#include "universe.h"
|
||||
|
||||
#ifdef GB_GPU_OMP
|
||||
#include "omp.h"
|
||||
#endif
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
// External functions from cuda library for atom decomposition
|
||||
int * gb_gpu_init(int &ij_size, const int ntypes, const double gamma,
|
||||
const double upsilon, const double mu, double **shape,
|
||||
double **well, double **cutsq, double **sigma,
|
||||
double **epsilon, double *host_lshape, int **form,
|
||||
double **host_lj1, double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **offset, double *special_lj,
|
||||
const int max_nbors, const int thread, const int gpu_id);
|
||||
void gb_gpu_clear(const int thread);
|
||||
int * gb_gpu_reset_nbors(const int nall, const int nlocal, const int inum,
|
||||
int *ilist, const int *numj, const int *type,
|
||||
const int thread, bool &success);
|
||||
void gb_gpu_nbors(const int num_ij, const bool eflag, const int thread);
|
||||
void gb_gpu_atom(double **host_x, double **host_quat, const int *host_type,
|
||||
const bool rebuild, const int thread);
|
||||
void gb_gpu_gayberne(const bool eflag, const bool vflag, const bool rebuild,
|
||||
const int thread);
|
||||
double gb_gpu_forces(double **f, double **tor, const int *ilist,
|
||||
const bool eflag, const bool vflag, const bool eflag_atom,
|
||||
const bool vflag_atom, double *eatom, double **vatom,
|
||||
double *virial, const int thread);
|
||||
std::string gb_gpu_name(const int i, const int max_nbors);
|
||||
void gb_gpu_time(const int thread);
|
||||
int gb_gpu_num_devices();
|
||||
double gb_gpu_bytes();
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairGayBerneGPU::PairGayBerneGPU(LAMMPS *lmp) : PairGayBerne(lmp), my_thread(0),
|
||||
omp_chunk(0), nthreads(1),
|
||||
multi_gpu_mode(ONE_NODE),
|
||||
multi_gpu_param(0)
|
||||
{
|
||||
ij_new[0]=NULL;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
free all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
PairGayBerneGPU::~PairGayBerneGPU()
|
||||
{
|
||||
if (comm->me == 0 && screen) {
|
||||
printf("\n\n-------------------------------------");
|
||||
printf("--------------------------------\n");
|
||||
printf(" GPU Time Stamps: ");
|
||||
printf("\n-------------------------------------");
|
||||
printf("--------------------------------\n");
|
||||
gb_gpu_time(my_thread);
|
||||
std::cout << "Procs: " << universe->nprocs << std::endl;
|
||||
printf("-------------------------------------");
|
||||
printf("--------------------------------\n\n");
|
||||
}
|
||||
#pragma omp parallel
|
||||
{
|
||||
#ifdef GB_GPU_OMP
|
||||
int my_thread=omp_get_thread_num();
|
||||
#endif
|
||||
gb_gpu_clear(my_thread);
|
||||
if (ij_new[my_thread]!=NULL) {
|
||||
ij_new[my_thread]=NULL;
|
||||
delete [] ij_new[my_thread];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairGayBerneGPU::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
if (vflag_atom)
|
||||
error->all("Per-atom virial not available with GPU Gay-Berne.");
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
int inum = list->inum;
|
||||
|
||||
bool rebuild=false;
|
||||
if (neighbor->ncalls > last_neighbor) {
|
||||
last_neighbor=neighbor->ncalls;
|
||||
rebuild=true;
|
||||
}
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
|
||||
bool success=true;
|
||||
#ifdef GB_GPU_OMP
|
||||
int my_thread=omp_get_thread_num();
|
||||
if (rebuild) {
|
||||
omp_chunk=static_cast<int>(ceil(static_cast<double>(inum)/nthreads));
|
||||
if (my_thread==nthreads-1)
|
||||
thread_inum[my_thread]=inum-(nthreads-1)*omp_chunk;
|
||||
else
|
||||
thread_inum[my_thread]=omp_chunk;
|
||||
olist[my_thread]=gb_gpu_reset_nbors(nall, atom->nlocal,
|
||||
thread_inum[my_thread],
|
||||
list->ilist+omp_chunk*my_thread,
|
||||
list->numneigh, atom->type, my_thread,
|
||||
success);
|
||||
}
|
||||
#else
|
||||
if (rebuild)
|
||||
olist[my_thread]=gb_gpu_reset_nbors(nall, atom->nlocal, inum, list->ilist,
|
||||
list->numneigh, atom->type, my_thread,
|
||||
success);
|
||||
#endif
|
||||
if (!success)
|
||||
error->one("Total # of atoms exceeds maximum allowed per GPGPU.\n");
|
||||
|
||||
// copy atom data to GPU
|
||||
gb_gpu_atom(atom->x,atom->quat,atom->type,rebuild,my_thread);
|
||||
|
||||
int i,j,ii,jj,jnum;
|
||||
double factor_lj;
|
||||
int *jlist;
|
||||
|
||||
if (rebuild==true) {
|
||||
int num_ij = 0;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
int *ijp=ij_new[my_thread];
|
||||
#ifdef GB_GPU_OMP
|
||||
int mgo=my_thread*omp_chunk;
|
||||
int mgot=mgo+thread_inum[my_thread];
|
||||
#else
|
||||
int mgo=0, mgot=inum;
|
||||
#endif
|
||||
for (ii = mgo; ii<mgot; ii++) {
|
||||
i = olist[my_thread][ii];
|
||||
jlist = list->firstneigh[i];
|
||||
jnum = list->numneigh[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
||||
*ijp=j;
|
||||
ijp++;
|
||||
num_ij++;
|
||||
|
||||
if (num_ij==ij_size) {
|
||||
memcpy(ij[my_thread],ij_new[my_thread],num_ij*sizeof(int));
|
||||
gb_gpu_nbors(num_ij,eflag,my_thread);
|
||||
ijp=ij_new[my_thread];
|
||||
num_ij=0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (num_ij>0) {
|
||||
memcpy(ij[my_thread],ij_new[my_thread],num_ij*sizeof(int));
|
||||
gb_gpu_nbors(num_ij,eflag,my_thread);
|
||||
}
|
||||
}
|
||||
|
||||
gb_gpu_gayberne(eflag,vflag,rebuild,my_thread);
|
||||
double lvirial[6];
|
||||
for (int i=0; i<6; i++) lvirial[i]=0.0;
|
||||
double my_eng=gb_gpu_forces(atom->f,atom->torque,olist[my_thread],eflag,vflag,
|
||||
eflag_atom, vflag_atom, eatom, vatom, lvirial,
|
||||
my_thread);
|
||||
#pragma omp critical
|
||||
{
|
||||
eng_vdwl+=my_eng;
|
||||
virial[0]+=lvirial[0];
|
||||
virial[1]+=lvirial[1];
|
||||
virial[2]+=lvirial[2];
|
||||
virial[3]+=lvirial[3];
|
||||
virial[4]+=lvirial[4];
|
||||
virial[5]+=lvirial[5];
|
||||
}
|
||||
|
||||
} //End parallel
|
||||
|
||||
if (vflag_fdotr) virial_compute();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
global settings
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairGayBerneGPU::settings(int narg, char **arg)
|
||||
{
|
||||
if (narg != 4 && narg != 6) error->all("Illegal pair_style command");
|
||||
|
||||
// Set multi_gpu_mode to one_node for multiple gpus on 1 node
|
||||
// -- param is starting gpu id
|
||||
// Set multi_gpu_mode to one_gpu to select the same gpu id on every node
|
||||
// -- param is id of gpu
|
||||
// Set multi_gpu_mode to multi_gpu to get ma
|
||||
// -- param is number of gpus per node
|
||||
multi_gpu_mode=ONE_NODE;
|
||||
multi_gpu_param=0;
|
||||
if (narg==6) {
|
||||
multi_gpu_param=atoi(arg[5]);
|
||||
if (strcmp("one_node",arg[4])==0)
|
||||
multi_gpu_mode=ONE_NODE;
|
||||
else if (strcmp("one_gpu",arg[4])==0)
|
||||
multi_gpu_mode=ONE_GPU;
|
||||
else if (strcmp("multi_gpu",arg[4])==0) {
|
||||
multi_gpu_mode=MULTI_GPU;
|
||||
if (multi_gpu_param<1)
|
||||
error->all("Illegal pair_style command");
|
||||
} else
|
||||
error->all("Illegal pair_style command");
|
||||
narg-=2;
|
||||
}
|
||||
|
||||
PairGayBerne::settings(narg,arg);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairGayBerneGPU::init_style()
|
||||
{
|
||||
// Set the GPU ID
|
||||
int my_gpu=comm->me+multi_gpu_param;
|
||||
int ngpus=universe->nprocs;
|
||||
if (multi_gpu_mode==ONE_GPU) {
|
||||
my_gpu=multi_gpu_param;
|
||||
ngpus=1;
|
||||
} else if (multi_gpu_mode==MULTI_GPU) {
|
||||
ngpus=multi_gpu_param;
|
||||
my_gpu=comm->me%ngpus;
|
||||
if (ngpus>universe->nprocs)
|
||||
ngpus=universe->nprocs;
|
||||
}
|
||||
|
||||
if (!atom->quat_flag || !atom->torque_flag || !atom->avec->shape_type)
|
||||
error->all("Pair gayberne requires atom attributes quat, torque, shape");
|
||||
if (atom->radius_flag)
|
||||
error->all("Pair gayberne cannot be used with atom attribute diameter");
|
||||
|
||||
int irequest = neighbor->request(this);
|
||||
|
||||
// per-type shape precalculations
|
||||
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
if (setwell[i]) {
|
||||
double *one = atom->shape[i];
|
||||
shape[i][0] = one[0]*one[0];
|
||||
shape[i][1] = one[1]*one[1];
|
||||
shape[i][2] = one[2]*one[2];
|
||||
lshape[i] = (one[0]*one[1]+one[2]*one[2])*sqrt(one[0]*one[1]);
|
||||
}
|
||||
}
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut;
|
||||
for (int i = 1; i <= atom->ntypes; i++)
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
cut = init_one(i,j);
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
}
|
||||
|
||||
// If compiled with OpenMP and only 1 proc, try to use multiple GPUs w/threads
|
||||
#ifdef GB_GPU_OMP
|
||||
if (multi_gpu_mode!=ONE_GPU)
|
||||
nthreads=ngpus=gb_gpu_num_devices();
|
||||
else
|
||||
nthreads=ngpus=1;
|
||||
if (nthreads>MAX_GPU_THREADS)
|
||||
nthreads=MAX_GPU_THREADS;
|
||||
omp_set_num_threads(nthreads);
|
||||
#endif
|
||||
|
||||
#pragma omp parallel firstprivate(my_gpu)
|
||||
{
|
||||
#ifdef GB_GPU_OMP
|
||||
int my_thread = omp_get_thread_num();
|
||||
if (multi_gpu_mode!=ONE_GPU)
|
||||
my_gpu=my_thread;
|
||||
if (multi_gpu_mode==ONE_NODE)
|
||||
my_gpu+=multi_gpu_param;
|
||||
#endif
|
||||
|
||||
ij[my_thread]=gb_gpu_init(ij_size, atom->ntypes+1, gamma, upsilon, mu,
|
||||
shape, well, cutsq, sigma, epsilon, lshape, form,
|
||||
lj1, lj2, lj3, lj4, offset, force->special_lj,
|
||||
neighbor->oneatom, my_thread, my_gpu);
|
||||
if (ij[my_thread]==0)
|
||||
error->one("AT LEAST ONE PROCESS COULD NOT ALLOCATE A CUDA-ENABLED GPU.");
|
||||
|
||||
if (ij_new[my_thread]!=NULL)
|
||||
delete [] ij_new[my_thread];
|
||||
ij_new[my_thread]=new int[ij_size];
|
||||
}
|
||||
|
||||
last_neighbor = -1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
if (force->newton_pair)
|
||||
error->all("Cannot use newton with GPU GayBerne pair style.");
|
||||
|
||||
if (comm->me == 0 && screen) {
|
||||
printf("\n-------------------------------------");
|
||||
printf("-------------------------------------\n");
|
||||
printf("- Using GPGPU acceleration for Gay-Berne:\n");
|
||||
printf("-------------------------------------");
|
||||
printf("-------------------------------------\n");
|
||||
|
||||
for (int i=0; i<ngpus; i++) {
|
||||
int gpui=my_gpu;
|
||||
if (multi_gpu_mode==ONE_NODE)
|
||||
gpui=i+multi_gpu_param;
|
||||
else if (multi_gpu_mode==MULTI_GPU)
|
||||
gpui=i;
|
||||
std::string gpu_string=gb_gpu_name(gpui,neighbor->oneatom);
|
||||
printf("GPU %d: %s\n",gpui,gpu_string.c_str());
|
||||
}
|
||||
printf("-------------------------------------");
|
||||
printf("-------------------------------------\n\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double PairGayBerneGPU::memory_usage()
|
||||
{
|
||||
double bytes=Pair::memory_usage()+nthreads*ij_size*sizeof(int);
|
||||
return bytes+gb_gpu_bytes();
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef PAIR_GPU_H
|
||||
#define PAIR_GPU_H
|
||||
|
||||
#include "pair_gayberne.h"
|
||||
#define MAX_GPU_THREADS 4
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairGayBerneGPU : public PairGayBerne {
|
||||
public:
|
||||
PairGayBerneGPU(LAMMPS *lmp);
|
||||
~PairGayBerneGPU();
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void init_style();
|
||||
double memory_usage();
|
||||
|
||||
enum { ONE_NODE, ONE_GPU, MULTI_GPU };
|
||||
|
||||
private:
|
||||
int ij_size;
|
||||
int *ij[MAX_GPU_THREADS], *ij_new[MAX_GPU_THREADS], *olist[MAX_GPU_THREADS];
|
||||
|
||||
int my_thread, nthreads, thread_inum[MAX_GPU_THREADS], omp_chunk;
|
||||
|
||||
int last_neighbor, multi_gpu_mode, multi_gpu_param;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,270 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
cetain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Mike Brown (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "string.h"
|
||||
#include "pair_lj_cut_gpu.h"
|
||||
#include "math_extra.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "integrate.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "neigh_request.h"
|
||||
#include "universe.h"
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
// External functions from cuda library for force decomposition
|
||||
int * lj_gpu_init(int &ij_size, const int ntypes, double **cutsq,
|
||||
double **sigma, double **epsilon, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **offset, double *special_lj, const int max_nbors,
|
||||
const int gpu_id);
|
||||
void lj_gpu_clear();
|
||||
bool lj_gpu_reset_nbors(const int nall, const int inum, int *ilist,
|
||||
const int *numj);
|
||||
void lj_gpu_nbors(const int num_ij);
|
||||
void lj_gpu_atom(double **host_x, const int *host_type, const bool rebuild);
|
||||
void lj_gpu(const bool eflag, const bool vflag, const bool rebuild);
|
||||
double lj_gpu_forces(double **f, const int *ilist, const bool eflag,
|
||||
const bool vflag, const bool eflag_atom,
|
||||
const bool vflag_atom, double *eatom, double **vatom,
|
||||
double *virial);
|
||||
std::string lj_gpu_name(const int gpu_id, const int max_nbors);
|
||||
void lj_gpu_time();
|
||||
int lj_gpu_num_devices();
|
||||
double lj_gpu_bytes();
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJCutGPU::PairLJCutGPU(LAMMPS *lmp) : PairLJCut(lmp), multi_gpu_mode(0)
|
||||
{
|
||||
ij_new=NULL;
|
||||
respa_enable = 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
free all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
PairLJCutGPU::~PairLJCutGPU()
|
||||
{
|
||||
if (comm->me == 0 && screen) {
|
||||
printf("\n\n-------------------------------------");
|
||||
printf("--------------------------------\n");
|
||||
printf(" GPU Time Stamps: ");
|
||||
printf("\n-------------------------------------");
|
||||
printf("--------------------------------\n");
|
||||
lj_gpu_time();
|
||||
std::cout << "Procs: " << universe->nprocs << std::endl;
|
||||
printf("-------------------------------------");
|
||||
printf("--------------------------------\n\n");
|
||||
}
|
||||
lj_gpu_clear();
|
||||
if (ij_new!=NULL) {
|
||||
ij_new=NULL;
|
||||
delete [] ij_new;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutGPU::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
if (vflag_atom)
|
||||
error->all("Per-atom virial not available with GPU Gay-Berne.");
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
int inum = list->inum;
|
||||
int *ilist = list->ilist;
|
||||
|
||||
bool rebuild=false;
|
||||
if (neighbor->ncalls > last_neighbor) {
|
||||
last_neighbor=neighbor->ncalls;
|
||||
rebuild=true;
|
||||
}
|
||||
|
||||
// copy nbors to GPU
|
||||
if (rebuild)
|
||||
if (!lj_gpu_reset_nbors(nall, inum, ilist, list->numneigh))
|
||||
error->one("Total # of atoms exceed maximum allowed per GPGPU.\n");
|
||||
|
||||
// copy atom data to GPU
|
||||
lj_gpu_atom(atom->x,atom->type,rebuild);
|
||||
|
||||
int i,j,ii,jj,jnum;
|
||||
double factor_lj;
|
||||
int *jlist;
|
||||
|
||||
if (rebuild==true) {
|
||||
int num_ij = 0;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
int *ijp=ij_new;
|
||||
for (ii = 0; ii<inum; ii++) {
|
||||
i = ilist[ii];
|
||||
jlist = list->firstneigh[i];
|
||||
jnum = list->numneigh[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
|
||||
*ijp=j;
|
||||
ijp++;
|
||||
num_ij++;
|
||||
|
||||
if (num_ij==ij_size) {
|
||||
memcpy(ij,ij_new,num_ij*sizeof(int));
|
||||
lj_gpu_nbors(num_ij);
|
||||
ijp=ij_new;
|
||||
num_ij=0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (num_ij>0) {
|
||||
memcpy(ij,ij_new,num_ij*sizeof(int));
|
||||
lj_gpu_nbors(num_ij);
|
||||
}
|
||||
}
|
||||
|
||||
lj_gpu(eflag,vflag,rebuild);
|
||||
eng_vdwl=lj_gpu_forces(atom->f,ilist,eflag,vflag, eflag_atom, vflag_atom,
|
||||
eatom, vatom, virial);
|
||||
|
||||
if (vflag_fdotr) virial_compute();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
global settings
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutGPU::settings(int narg, char **arg)
|
||||
{
|
||||
if (narg != 1 && narg != 3) error->all("Illegal pair_style command");
|
||||
|
||||
// Set multi_gpu_mode to one_node for multiple gpus on 1 node
|
||||
// -- param is starting gpu id
|
||||
// Set multi_gpu_mode to one_gpu to select the same gpu id on every node
|
||||
// -- param is id of gpu
|
||||
// Set multi_gpu_mode to multi_gpu to get ma
|
||||
// -- param is number of gpus per node
|
||||
multi_gpu_mode=ONE_NODE;
|
||||
multi_gpu_param=0;
|
||||
if (narg==3) {
|
||||
multi_gpu_param=atoi(arg[2]);
|
||||
if (strcmp("one_node",arg[1])==0)
|
||||
multi_gpu_mode=ONE_NODE;
|
||||
else if (strcmp("one_gpu",arg[1])==0)
|
||||
multi_gpu_mode=ONE_GPU;
|
||||
else if (strcmp("multi_gpu",arg[1])==0) {
|
||||
multi_gpu_mode=MULTI_GPU;
|
||||
if (multi_gpu_param<1)
|
||||
error->all("Illegal pair_style command");
|
||||
} else
|
||||
error->all("Illegal pair_style command");
|
||||
narg-=2;
|
||||
}
|
||||
|
||||
PairLJCut::settings(narg,arg);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutGPU::init_style()
|
||||
{
|
||||
// Set the GPU ID
|
||||
int my_gpu=comm->me+multi_gpu_param;
|
||||
int ngpus=universe->nprocs;
|
||||
if (multi_gpu_mode==ONE_GPU) {
|
||||
my_gpu=multi_gpu_param;
|
||||
ngpus=1;
|
||||
} else if (multi_gpu_mode==MULTI_GPU) {
|
||||
ngpus=multi_gpu_param;
|
||||
my_gpu=comm->me%ngpus;
|
||||
if (ngpus>universe->nprocs)
|
||||
ngpus=universe->nprocs;
|
||||
}
|
||||
|
||||
int irequest = neighbor->request(this);
|
||||
cut_respa=NULL;
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut;
|
||||
for (int i = 1; i <= atom->ntypes; i++)
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
cut = init_one(i,j);
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
}
|
||||
|
||||
ij=lj_gpu_init(ij_size, atom->ntypes+1, cutsq, sigma, epsilon, lj1, lj2, lj3,
|
||||
lj4, offset, force->special_lj, neighbor->oneatom, my_gpu);
|
||||
if (ij==0)
|
||||
error->one("AT LEAST ONE PROCESS COULD NOT ALLOCATE A CUDA-ENABLED GPU.");
|
||||
|
||||
if (ij_new!=NULL)
|
||||
delete [] ij_new;
|
||||
ij_new=new int[ij_size];
|
||||
|
||||
last_neighbor = -1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
if (force->newton_pair)
|
||||
error->all("Cannot use newton with GPU LJCut pair style.");
|
||||
|
||||
if (comm->me == 0 && screen) {
|
||||
printf("\n-------------------------------------");
|
||||
printf("-------------------------------------\n");
|
||||
printf("- Using GPGPU acceleration for LJ-Cut:\n");
|
||||
printf("-------------------------------------");
|
||||
printf("-------------------------------------\n");
|
||||
|
||||
for (int i=0; i<ngpus; i++) {
|
||||
int gpui=my_gpu;
|
||||
if (multi_gpu_mode==ONE_NODE)
|
||||
gpui=i+multi_gpu_param;
|
||||
else if (multi_gpu_mode==MULTI_GPU)
|
||||
gpui=i;
|
||||
std::string gpu_string=lj_gpu_name(gpui,neighbor->oneatom);
|
||||
printf("GPU %d: %s\n",gpui,gpu_string.c_str());
|
||||
}
|
||||
printf("-------------------------------------");
|
||||
printf("-------------------------------------\n\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double PairLJCutGPU::memory_usage()
|
||||
{
|
||||
double bytes=Pair::memory_usage()+ij_size*sizeof(int);
|
||||
return bytes+lj_gpu_bytes();
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef PAIR_LJ_CUT_GPU_H
|
||||
#define PAIR_LJ_CUT_GPU_H
|
||||
|
||||
#include "pair_lj_cut.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairLJCutGPU : public PairLJCut {
|
||||
public:
|
||||
PairLJCutGPU(LAMMPS *lmp);
|
||||
~PairLJCutGPU();
|
||||
void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void init_style();
|
||||
double memory_usage();
|
||||
|
||||
enum { ONE_NODE, ONE_GPU, MULTI_GPU };
|
||||
|
||||
private:
|
||||
int ij_size;
|
||||
int *ij, *ij_new;
|
||||
|
||||
int last_neighbor, multi_gpu_mode, multi_gpu_param;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,41 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef AtomInclude
|
||||
#endif
|
||||
|
||||
#ifdef AtomClass
|
||||
# endif
|
||||
|
||||
#ifdef ComputeInclude
|
||||
#endif
|
||||
|
||||
#ifdef ComputeClass
|
||||
#endif
|
||||
|
||||
#ifdef FixInclude
|
||||
#endif
|
||||
|
||||
#ifdef FixClass
|
||||
#endif
|
||||
|
||||
#ifdef PairInclude
|
||||
#include "pair_lj_cut_gpu.h"
|
||||
#include "pair_gayberne_gpu.h"
|
||||
#endif
|
||||
|
||||
#ifdef PairClass
|
||||
PairStyle(lj/cut/gpu,PairLJCutGPU)
|
||||
PairStyle(gayberne/gpu,PairGayBerneGPU)
|
||||
#endif
|
||||
|
|
@ -13,7 +13,7 @@ OBJ = $(SRC:.cpp=.o)
|
|||
|
||||
# Package variables
|
||||
|
||||
PACKAGE = asphere class2 colloid dipole dpd granular \
|
||||
PACKAGE = asphere class2 colloid dipole dpd gpu granular \
|
||||
kspace manybody meam molecule opt peri poems reax xtc
|
||||
|
||||
PACKUSER = user-ackland user-cg-cmm user-ewaldn user-smd
|
||||
|
|
|
@ -1,21 +1,37 @@
|
|||
# Install/unInstall package classes in LAMMPS
|
||||
# do not copy eam and charmm files if non-OPT versions do not exist
|
||||
|
||||
if ($1 == 1) then
|
||||
|
||||
cp style_opt.h ..
|
||||
if (-e ../pair_eam.cpp) then
|
||||
cp pair_eam_opt.cpp ..
|
||||
cp pair_eam_alloy_opt.cpp ..
|
||||
cp pair_eam_fs_opt.cpp ..
|
||||
cp pair_eam_opt.h ..
|
||||
cp pair_eam_alloy_opt.h ..
|
||||
cp pair_eam_fs_opt.h ..
|
||||
endif
|
||||
|
||||
cp style_opt.h tmp.h
|
||||
if (! -e ../pair_eam.cpp) then
|
||||
grep -v eam tmp.h > tmp1.h
|
||||
mv tmp1.h tmp.h
|
||||
endif
|
||||
if (! -e ../pair_lj_charmm_coul_long.cpp) then
|
||||
grep -v charmm tmp.h > tmp1.h
|
||||
mv tmp1.h tmp.h
|
||||
endif
|
||||
mv tmp.h ../style_opt.h
|
||||
|
||||
if (-e ../pair_lj_charmm_coul_long.cpp) then
|
||||
cp pair_lj_charmm_coul_long_opt.cpp ..
|
||||
cp pair_lj_charmm_coul_long_opt.h ..
|
||||
endif
|
||||
|
||||
cp pair_eam_opt.cpp ..
|
||||
cp pair_eam_alloy_opt.cpp ..
|
||||
cp pair_eam_fs_opt.cpp ..
|
||||
cp pair_lj_charmm_coul_long_opt.cpp ..
|
||||
cp pair_lj_cut_opt.cpp ..
|
||||
cp pair_morse_opt.cpp ..
|
||||
|
||||
cp pair_eam_opt.h ..
|
||||
cp pair_eam_alloy_opt.h ..
|
||||
cp pair_eam_fs_opt.h ..
|
||||
cp pair_lj_charmm_coul_long_opt.h ..
|
||||
cp pair_lj_cut_opt.h ..
|
||||
|
||||
cp pair_morse_opt.cpp ..
|
||||
cp pair_morse_opt.h ..
|
||||
|
||||
else if ($1 == 0) then
|
||||
|
|
|
@ -372,6 +372,7 @@ RegionStyle(union,RegUnion)
|
|||
#include "style_colloid.h"
|
||||
#include "style_dipole.h"
|
||||
#include "style_dpd.h"
|
||||
#include "style_gpu.h"
|
||||
#include "style_granular.h"
|
||||
#include "style_kspace.h"
|
||||
#include "style_manybody.h"
|
||||
|
|
Loading…
Reference in New Issue