From 93e0e00793c3dfff9a48639f46897bb2e91ce2cd Mon Sep 17 00:00:00 2001 From: sjplimp Date: Tue, 11 Aug 2009 18:59:27 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@3047 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/GPU/Install.csh | 39 ++++ src/GPU/pair_gayberne_gpu.cpp | 369 ++++++++++++++++++++++++++++++++++ src/GPU/pair_gayberne_gpu.h | 43 ++++ src/GPU/pair_lj_cut_gpu.cpp | 270 +++++++++++++++++++++++++ src/GPU/pair_lj_cut_gpu.h | 40 ++++ src/GPU/style_gpu.h | 41 ++++ src/Makefile | 2 +- src/OPT/Install.csh | 38 +++- src/style.h | 1 + 9 files changed, 831 insertions(+), 12 deletions(-) create mode 100644 src/GPU/Install.csh create mode 100644 src/GPU/pair_gayberne_gpu.cpp create mode 100644 src/GPU/pair_gayberne_gpu.h create mode 100644 src/GPU/pair_lj_cut_gpu.cpp create mode 100644 src/GPU/pair_lj_cut_gpu.h create mode 100644 src/GPU/style_gpu.h diff --git a/src/GPU/Install.csh b/src/GPU/Install.csh new file mode 100644 index 0000000000..80f0783fd8 --- /dev/null +++ b/src/GPU/Install.csh @@ -0,0 +1,39 @@ +# Install/unInstall package classes in LAMMPS +# edit Makefile.package to include/exclude GPU library +# do not copy gayberne files if non-GPU version does not exist + +if ($1 == 1) then + + sed -i 's/\S*gpu //' ../Makefile.package + sed -i 's|^PKGPATH =\s*|&-L../../lib/gpu |' ../Makefile.package + sed -i 's|^PKGLIB =\s*|&-lgpu |' ../Makefile.package + + cp style_gpu.h tmp.h + if (! -e ../pair_gayberne.cpp) then + grep -v gayberne tmp.h > tmp1.h + mv tmp1.h tmp.h + endif + mv tmp.h ../style_gpu.h + + if (-e ../pair_gayberne.cpp) then + cp pair_gayberne_gpu.cpp .. + cp pair_gayberne_gpu.h .. + endif + + cp pair_lj_cut_gpu.cpp .. + cp pair_lj_cut_gpu.h .. + +else if ($1 == 0) then + + sed -i 's/\S*gpu //' ../Makefile.package + + rm ../style_gpu.h + touch ../style_gpu.h + + rm ../pair_gayberne_gpu.cpp + rm ../pair_lj_cut_gpu.cpp + + rm ../pair_gayberne_gpu.h + rm ../pair_lj_cut_gpu.h + +endif diff --git a/src/GPU/pair_gayberne_gpu.cpp b/src/GPU/pair_gayberne_gpu.cpp new file mode 100644 index 0000000000..23ddff98dd --- /dev/null +++ b/src/GPU/pair_gayberne_gpu.cpp @@ -0,0 +1,369 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + cetain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mike Brown (SNL) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_gayberne_gpu.h" +#include "math_extra.h" +#include "atom.h" +#include "atom_vec.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "integrate.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "universe.h" + +#ifdef GB_GPU_OMP +#include "omp.h" +#endif + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +// External functions from cuda library for atom decomposition +int * gb_gpu_init(int &ij_size, const int ntypes, const double gamma, + const double upsilon, const double mu, double **shape, + double **well, double **cutsq, double **sigma, + double **epsilon, double *host_lshape, int **form, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **offset, double *special_lj, + const int max_nbors, const int thread, const int gpu_id); +void gb_gpu_clear(const int thread); +int * gb_gpu_reset_nbors(const int nall, const int nlocal, const int inum, + int *ilist, const int *numj, const int *type, + const int thread, bool &success); +void gb_gpu_nbors(const int num_ij, const bool eflag, const int thread); +void gb_gpu_atom(double **host_x, double **host_quat, const int *host_type, + const bool rebuild, const int thread); +void gb_gpu_gayberne(const bool eflag, const bool vflag, const bool rebuild, + const int thread); +double gb_gpu_forces(double **f, double **tor, const int *ilist, + const bool eflag, const bool vflag, const bool eflag_atom, + const bool vflag_atom, double *eatom, double **vatom, + double *virial, const int thread); +std::string gb_gpu_name(const int i, const int max_nbors); +void gb_gpu_time(const int thread); +int gb_gpu_num_devices(); +double gb_gpu_bytes(); + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairGayBerneGPU::PairGayBerneGPU(LAMMPS *lmp) : PairGayBerne(lmp), my_thread(0), + omp_chunk(0), nthreads(1), + multi_gpu_mode(ONE_NODE), + multi_gpu_param(0) +{ + ij_new[0]=NULL; +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairGayBerneGPU::~PairGayBerneGPU() +{ + if (comm->me == 0 && screen) { + printf("\n\n-------------------------------------"); + printf("--------------------------------\n"); + printf(" GPU Time Stamps: "); + printf("\n-------------------------------------"); + printf("--------------------------------\n"); + gb_gpu_time(my_thread); + std::cout << "Procs: " << universe->nprocs << std::endl; + printf("-------------------------------------"); + printf("--------------------------------\n\n"); + } + #pragma omp parallel + { + #ifdef GB_GPU_OMP + int my_thread=omp_get_thread_num(); + #endif + gb_gpu_clear(my_thread); + if (ij_new[my_thread]!=NULL) { + ij_new[my_thread]=NULL; + delete [] ij_new[my_thread]; + } + } +} + +/* ---------------------------------------------------------------------- */ + +void PairGayBerneGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + if (vflag_atom) + error->all("Per-atom virial not available with GPU Gay-Berne."); + + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + int inum = list->inum; + + bool rebuild=false; + if (neighbor->ncalls > last_neighbor) { + last_neighbor=neighbor->ncalls; + rebuild=true; + } + + #pragma omp parallel + { + + bool success=true; + #ifdef GB_GPU_OMP + int my_thread=omp_get_thread_num(); + if (rebuild) { + omp_chunk=static_cast(ceil(static_cast(inum)/nthreads)); + if (my_thread==nthreads-1) + thread_inum[my_thread]=inum-(nthreads-1)*omp_chunk; + else + thread_inum[my_thread]=omp_chunk; + olist[my_thread]=gb_gpu_reset_nbors(nall, atom->nlocal, + thread_inum[my_thread], + list->ilist+omp_chunk*my_thread, + list->numneigh, atom->type, my_thread, + success); + } + #else + if (rebuild) + olist[my_thread]=gb_gpu_reset_nbors(nall, atom->nlocal, inum, list->ilist, + list->numneigh, atom->type, my_thread, + success); + #endif + if (!success) + error->one("Total # of atoms exceeds maximum allowed per GPGPU.\n"); + + // copy atom data to GPU + gb_gpu_atom(atom->x,atom->quat,atom->type,rebuild,my_thread); + + int i,j,ii,jj,jnum; + double factor_lj; + int *jlist; + + if (rebuild==true) { + int num_ij = 0; + + // loop over neighbors of my atoms + int *ijp=ij_new[my_thread]; + #ifdef GB_GPU_OMP + int mgo=my_thread*omp_chunk; + int mgot=mgo+thread_inum[my_thread]; + #else + int mgo=0, mgot=inum; + #endif + for (ii = mgo; iifirstneigh[i]; + jnum = list->numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + + *ijp=j; + ijp++; + num_ij++; + + if (num_ij==ij_size) { + memcpy(ij[my_thread],ij_new[my_thread],num_ij*sizeof(int)); + gb_gpu_nbors(num_ij,eflag,my_thread); + ijp=ij_new[my_thread]; + num_ij=0; + } + } + } + if (num_ij>0) { + memcpy(ij[my_thread],ij_new[my_thread],num_ij*sizeof(int)); + gb_gpu_nbors(num_ij,eflag,my_thread); + } + } + + gb_gpu_gayberne(eflag,vflag,rebuild,my_thread); + double lvirial[6]; + for (int i=0; i<6; i++) lvirial[i]=0.0; + double my_eng=gb_gpu_forces(atom->f,atom->torque,olist[my_thread],eflag,vflag, + eflag_atom, vflag_atom, eatom, vatom, lvirial, + my_thread); + #pragma omp critical + { + eng_vdwl+=my_eng; + virial[0]+=lvirial[0]; + virial[1]+=lvirial[1]; + virial[2]+=lvirial[2]; + virial[3]+=lvirial[3]; + virial[4]+=lvirial[4]; + virial[5]+=lvirial[5]; + } + + } //End parallel + + if (vflag_fdotr) virial_compute(); +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairGayBerneGPU::settings(int narg, char **arg) +{ + if (narg != 4 && narg != 6) error->all("Illegal pair_style command"); + + // Set multi_gpu_mode to one_node for multiple gpus on 1 node + // -- param is starting gpu id + // Set multi_gpu_mode to one_gpu to select the same gpu id on every node + // -- param is id of gpu + // Set multi_gpu_mode to multi_gpu to get ma + // -- param is number of gpus per node + multi_gpu_mode=ONE_NODE; + multi_gpu_param=0; + if (narg==6) { + multi_gpu_param=atoi(arg[5]); + if (strcmp("one_node",arg[4])==0) + multi_gpu_mode=ONE_NODE; + else if (strcmp("one_gpu",arg[4])==0) + multi_gpu_mode=ONE_GPU; + else if (strcmp("multi_gpu",arg[4])==0) { + multi_gpu_mode=MULTI_GPU; + if (multi_gpu_param<1) + error->all("Illegal pair_style command"); + } else + error->all("Illegal pair_style command"); + narg-=2; + } + + PairGayBerne::settings(narg,arg); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairGayBerneGPU::init_style() +{ + // Set the GPU ID + int my_gpu=comm->me+multi_gpu_param; + int ngpus=universe->nprocs; + if (multi_gpu_mode==ONE_GPU) { + my_gpu=multi_gpu_param; + ngpus=1; + } else if (multi_gpu_mode==MULTI_GPU) { + ngpus=multi_gpu_param; + my_gpu=comm->me%ngpus; + if (ngpus>universe->nprocs) + ngpus=universe->nprocs; + } + + if (!atom->quat_flag || !atom->torque_flag || !atom->avec->shape_type) + error->all("Pair gayberne requires atom attributes quat, torque, shape"); + if (atom->radius_flag) + error->all("Pair gayberne cannot be used with atom attribute diameter"); + + int irequest = neighbor->request(this); + + // per-type shape precalculations + + for (int i = 1; i <= atom->ntypes; i++) { + if (setwell[i]) { + double *one = atom->shape[i]; + shape[i][0] = one[0]*one[0]; + shape[i][1] = one[1]*one[1]; + shape[i][2] = one[2]*one[2]; + lshape[i] = (one[0]*one[1]+one[2]*one[2])*sqrt(one[0]*one[1]); + } + } + + // Repeat cutsq calculation because done after call to init_style + double cut; + for (int i = 1; i <= atom->ntypes; i++) + for (int j = i; j <= atom->ntypes; j++) { + cut = init_one(i,j); + cutsq[i][j] = cutsq[j][i] = cut*cut; + } + + // If compiled with OpenMP and only 1 proc, try to use multiple GPUs w/threads + #ifdef GB_GPU_OMP + if (multi_gpu_mode!=ONE_GPU) + nthreads=ngpus=gb_gpu_num_devices(); + else + nthreads=ngpus=1; + if (nthreads>MAX_GPU_THREADS) + nthreads=MAX_GPU_THREADS; + omp_set_num_threads(nthreads); + #endif + + #pragma omp parallel firstprivate(my_gpu) + { + #ifdef GB_GPU_OMP + int my_thread = omp_get_thread_num(); + if (multi_gpu_mode!=ONE_GPU) + my_gpu=my_thread; + if (multi_gpu_mode==ONE_NODE) + my_gpu+=multi_gpu_param; + #endif + + ij[my_thread]=gb_gpu_init(ij_size, atom->ntypes+1, gamma, upsilon, mu, + shape, well, cutsq, sigma, epsilon, lshape, form, + lj1, lj2, lj3, lj4, offset, force->special_lj, + neighbor->oneatom, my_thread, my_gpu); + if (ij[my_thread]==0) + error->one("AT LEAST ONE PROCESS COULD NOT ALLOCATE A CUDA-ENABLED GPU."); + + if (ij_new[my_thread]!=NULL) + delete [] ij_new[my_thread]; + ij_new[my_thread]=new int[ij_size]; + } + + last_neighbor = -1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + if (force->newton_pair) + error->all("Cannot use newton with GPU GayBerne pair style."); + + if (comm->me == 0 && screen) { + printf("\n-------------------------------------"); + printf("-------------------------------------\n"); + printf("- Using GPGPU acceleration for Gay-Berne:\n"); + printf("-------------------------------------"); + printf("-------------------------------------\n"); + + for (int i=0; ioneatom); + printf("GPU %d: %s\n",gpui,gpu_string.c_str()); + } + printf("-------------------------------------"); + printf("-------------------------------------\n\n"); + } +} + +/* ---------------------------------------------------------------------- */ + +double PairGayBerneGPU::memory_usage() +{ + double bytes=Pair::memory_usage()+nthreads*ij_size*sizeof(int); + return bytes+gb_gpu_bytes(); +} diff --git a/src/GPU/pair_gayberne_gpu.h b/src/GPU/pair_gayberne_gpu.h new file mode 100644 index 0000000000..7be34c3531 --- /dev/null +++ b/src/GPU/pair_gayberne_gpu.h @@ -0,0 +1,43 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef PAIR_GPU_H +#define PAIR_GPU_H + +#include "pair_gayberne.h" +#define MAX_GPU_THREADS 4 + +namespace LAMMPS_NS { + +class PairGayBerneGPU : public PairGayBerne { + public: + PairGayBerneGPU(LAMMPS *lmp); + ~PairGayBerneGPU(); + void compute(int, int); + void settings(int, char **); + void init_style(); + double memory_usage(); + + enum { ONE_NODE, ONE_GPU, MULTI_GPU }; + + private: + int ij_size; + int *ij[MAX_GPU_THREADS], *ij_new[MAX_GPU_THREADS], *olist[MAX_GPU_THREADS]; + + int my_thread, nthreads, thread_inum[MAX_GPU_THREADS], omp_chunk; + + int last_neighbor, multi_gpu_mode, multi_gpu_param; +}; + +} +#endif diff --git a/src/GPU/pair_lj_cut_gpu.cpp b/src/GPU/pair_lj_cut_gpu.cpp new file mode 100644 index 0000000000..d82f99e87d --- /dev/null +++ b/src/GPU/pair_lj_cut_gpu.cpp @@ -0,0 +1,270 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + cetain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mike Brown (SNL) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_lj_cut_gpu.h" +#include "math_extra.h" +#include "atom.h" +#include "atom_vec.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "integrate.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "universe.h" + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +// External functions from cuda library for force decomposition +int * lj_gpu_init(int &ij_size, const int ntypes, double **cutsq, + double **sigma, double **epsilon, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int max_nbors, + const int gpu_id); +void lj_gpu_clear(); +bool lj_gpu_reset_nbors(const int nall, const int inum, int *ilist, + const int *numj); +void lj_gpu_nbors(const int num_ij); +void lj_gpu_atom(double **host_x, const int *host_type, const bool rebuild); +void lj_gpu(const bool eflag, const bool vflag, const bool rebuild); +double lj_gpu_forces(double **f, const int *ilist, const bool eflag, + const bool vflag, const bool eflag_atom, + const bool vflag_atom, double *eatom, double **vatom, + double *virial); +std::string lj_gpu_name(const int gpu_id, const int max_nbors); +void lj_gpu_time(); +int lj_gpu_num_devices(); +double lj_gpu_bytes(); + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJCutGPU::PairLJCutGPU(LAMMPS *lmp) : PairLJCut(lmp), multi_gpu_mode(0) +{ + ij_new=NULL; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairLJCutGPU::~PairLJCutGPU() +{ + if (comm->me == 0 && screen) { + printf("\n\n-------------------------------------"); + printf("--------------------------------\n"); + printf(" GPU Time Stamps: "); + printf("\n-------------------------------------"); + printf("--------------------------------\n"); + lj_gpu_time(); + std::cout << "Procs: " << universe->nprocs << std::endl; + printf("-------------------------------------"); + printf("--------------------------------\n\n"); + } + lj_gpu_clear(); + if (ij_new!=NULL) { + ij_new=NULL; + delete [] ij_new; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + if (vflag_atom) + error->all("Per-atom virial not available with GPU Gay-Berne."); + + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + int inum = list->inum; + int *ilist = list->ilist; + + bool rebuild=false; + if (neighbor->ncalls > last_neighbor) { + last_neighbor=neighbor->ncalls; + rebuild=true; + } + + // copy nbors to GPU + if (rebuild) + if (!lj_gpu_reset_nbors(nall, inum, ilist, list->numneigh)) + error->one("Total # of atoms exceed maximum allowed per GPGPU.\n"); + + // copy atom data to GPU + lj_gpu_atom(atom->x,atom->type,rebuild); + + int i,j,ii,jj,jnum; + double factor_lj; + int *jlist; + + if (rebuild==true) { + int num_ij = 0; + + // loop over neighbors of my atoms + int *ijp=ij_new; + for (ii = 0; iifirstneigh[i]; + jnum = list->numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + + *ijp=j; + ijp++; + num_ij++; + + if (num_ij==ij_size) { + memcpy(ij,ij_new,num_ij*sizeof(int)); + lj_gpu_nbors(num_ij); + ijp=ij_new; + num_ij=0; + } + } + } + if (num_ij>0) { + memcpy(ij,ij_new,num_ij*sizeof(int)); + lj_gpu_nbors(num_ij); + } + } + + lj_gpu(eflag,vflag,rebuild); + eng_vdwl=lj_gpu_forces(atom->f,ilist,eflag,vflag, eflag_atom, vflag_atom, + eatom, vatom, virial); + + if (vflag_fdotr) virial_compute(); +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairLJCutGPU::settings(int narg, char **arg) +{ + if (narg != 1 && narg != 3) error->all("Illegal pair_style command"); + + // Set multi_gpu_mode to one_node for multiple gpus on 1 node + // -- param is starting gpu id + // Set multi_gpu_mode to one_gpu to select the same gpu id on every node + // -- param is id of gpu + // Set multi_gpu_mode to multi_gpu to get ma + // -- param is number of gpus per node + multi_gpu_mode=ONE_NODE; + multi_gpu_param=0; + if (narg==3) { + multi_gpu_param=atoi(arg[2]); + if (strcmp("one_node",arg[1])==0) + multi_gpu_mode=ONE_NODE; + else if (strcmp("one_gpu",arg[1])==0) + multi_gpu_mode=ONE_GPU; + else if (strcmp("multi_gpu",arg[1])==0) { + multi_gpu_mode=MULTI_GPU; + if (multi_gpu_param<1) + error->all("Illegal pair_style command"); + } else + error->all("Illegal pair_style command"); + narg-=2; + } + + PairLJCut::settings(narg,arg); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairLJCutGPU::init_style() +{ + // Set the GPU ID + int my_gpu=comm->me+multi_gpu_param; + int ngpus=universe->nprocs; + if (multi_gpu_mode==ONE_GPU) { + my_gpu=multi_gpu_param; + ngpus=1; + } else if (multi_gpu_mode==MULTI_GPU) { + ngpus=multi_gpu_param; + my_gpu=comm->me%ngpus; + if (ngpus>universe->nprocs) + ngpus=universe->nprocs; + } + + int irequest = neighbor->request(this); + cut_respa=NULL; + + // Repeat cutsq calculation because done after call to init_style + double cut; + for (int i = 1; i <= atom->ntypes; i++) + for (int j = i; j <= atom->ntypes; j++) { + cut = init_one(i,j); + cutsq[i][j] = cutsq[j][i] = cut*cut; + } + + ij=lj_gpu_init(ij_size, atom->ntypes+1, cutsq, sigma, epsilon, lj1, lj2, lj3, + lj4, offset, force->special_lj, neighbor->oneatom, my_gpu); + if (ij==0) + error->one("AT LEAST ONE PROCESS COULD NOT ALLOCATE A CUDA-ENABLED GPU."); + + if (ij_new!=NULL) + delete [] ij_new; + ij_new=new int[ij_size]; + + last_neighbor = -1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + if (force->newton_pair) + error->all("Cannot use newton with GPU LJCut pair style."); + + if (comm->me == 0 && screen) { + printf("\n-------------------------------------"); + printf("-------------------------------------\n"); + printf("- Using GPGPU acceleration for LJ-Cut:\n"); + printf("-------------------------------------"); + printf("-------------------------------------\n"); + + for (int i=0; ioneatom); + printf("GPU %d: %s\n",gpui,gpu_string.c_str()); + } + printf("-------------------------------------"); + printf("-------------------------------------\n\n"); + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCutGPU::memory_usage() +{ + double bytes=Pair::memory_usage()+ij_size*sizeof(int); + return bytes+lj_gpu_bytes(); +} diff --git a/src/GPU/pair_lj_cut_gpu.h b/src/GPU/pair_lj_cut_gpu.h new file mode 100644 index 0000000000..2774e723ed --- /dev/null +++ b/src/GPU/pair_lj_cut_gpu.h @@ -0,0 +1,40 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef PAIR_LJ_CUT_GPU_H +#define PAIR_LJ_CUT_GPU_H + +#include "pair_lj_cut.h" + +namespace LAMMPS_NS { + +class PairLJCutGPU : public PairLJCut { + public: + PairLJCutGPU(LAMMPS *lmp); + ~PairLJCutGPU(); + void compute(int, int); + void settings(int, char **); + void init_style(); + double memory_usage(); + + enum { ONE_NODE, ONE_GPU, MULTI_GPU }; + + private: + int ij_size; + int *ij, *ij_new; + + int last_neighbor, multi_gpu_mode, multi_gpu_param; +}; + +} +#endif diff --git a/src/GPU/style_gpu.h b/src/GPU/style_gpu.h new file mode 100644 index 0000000000..1be999ccdb --- /dev/null +++ b/src/GPU/style_gpu.h @@ -0,0 +1,41 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef AtomInclude +#endif + +#ifdef AtomClass +# endif + +#ifdef ComputeInclude +#endif + +#ifdef ComputeClass +#endif + +#ifdef FixInclude +#endif + +#ifdef FixClass +#endif + +#ifdef PairInclude +#include "pair_lj_cut_gpu.h" +#include "pair_gayberne_gpu.h" +#endif + +#ifdef PairClass +PairStyle(lj/cut/gpu,PairLJCutGPU) +PairStyle(gayberne/gpu,PairGayBerneGPU) +#endif + diff --git a/src/Makefile b/src/Makefile index f84ef210f5..55f00f28db 100755 --- a/src/Makefile +++ b/src/Makefile @@ -13,7 +13,7 @@ OBJ = $(SRC:.cpp=.o) # Package variables -PACKAGE = asphere class2 colloid dipole dpd granular \ +PACKAGE = asphere class2 colloid dipole dpd gpu granular \ kspace manybody meam molecule opt peri poems reax xtc PACKUSER = user-ackland user-cg-cmm user-ewaldn user-smd diff --git a/src/OPT/Install.csh b/src/OPT/Install.csh index 0a42aeff25..2e0d6f6dd0 100644 --- a/src/OPT/Install.csh +++ b/src/OPT/Install.csh @@ -1,21 +1,37 @@ # Install/unInstall package classes in LAMMPS +# do not copy eam and charmm files if non-OPT versions do not exist if ($1 == 1) then - cp style_opt.h .. + if (-e ../pair_eam.cpp) then + cp pair_eam_opt.cpp .. + cp pair_eam_alloy_opt.cpp .. + cp pair_eam_fs_opt.cpp .. + cp pair_eam_opt.h .. + cp pair_eam_alloy_opt.h .. + cp pair_eam_fs_opt.h .. + endif + + cp style_opt.h tmp.h + if (! -e ../pair_eam.cpp) then + grep -v eam tmp.h > tmp1.h + mv tmp1.h tmp.h + endif + if (! -e ../pair_lj_charmm_coul_long.cpp) then + grep -v charmm tmp.h > tmp1.h + mv tmp1.h tmp.h + endif + mv tmp.h ../style_opt.h + + if (-e ../pair_lj_charmm_coul_long.cpp) then + cp pair_lj_charmm_coul_long_opt.cpp .. + cp pair_lj_charmm_coul_long_opt.h .. + endif - cp pair_eam_opt.cpp .. - cp pair_eam_alloy_opt.cpp .. - cp pair_eam_fs_opt.cpp .. - cp pair_lj_charmm_coul_long_opt.cpp .. cp pair_lj_cut_opt.cpp .. - cp pair_morse_opt.cpp .. - - cp pair_eam_opt.h .. - cp pair_eam_alloy_opt.h .. - cp pair_eam_fs_opt.h .. - cp pair_lj_charmm_coul_long_opt.h .. cp pair_lj_cut_opt.h .. + + cp pair_morse_opt.cpp .. cp pair_morse_opt.h .. else if ($1 == 0) then diff --git a/src/style.h b/src/style.h index e5cdf8c1d1..3df9044334 100644 --- a/src/style.h +++ b/src/style.h @@ -372,6 +372,7 @@ RegionStyle(union,RegUnion) #include "style_colloid.h" #include "style_dipole.h" #include "style_dpd.h" +#include "style_gpu.h" #include "style_granular.h" #include "style_kspace.h" #include "style_manybody.h"