diff --git a/src/GPU/pair_gayberne_gpu.cpp b/src/GPU/pair_gayberne_gpu.cpp index 2dd7020413..7389b3714b 100644 --- a/src/GPU/pair_gayberne_gpu.cpp +++ b/src/GPU/pair_gayberne_gpu.cpp @@ -51,7 +51,8 @@ bool gb_gpu_init(int &ij_size, const int ntypes, const double gamma, double **epsilon, double *host_lshape, int **form, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **offset, double *special_lj, - const int max_nbors, const int thread, const int gpu_id); + const int nlocal, const int nall, const int max_nbors, + const int thread, const int gpu_id); void gb_gpu_clear(const int thread); int * gb_gpu_reset_nbors(const int nall, const int nlocal, const int inum, int *ilist, const int *numj, const int *type, @@ -80,22 +81,24 @@ typedef bool (*_gb_gpu_init)(int &ij_size, const int ntypes, const double gamma, double **epsilon, double *host_lshape, int **form, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **offset, double *special_lj, - const int max_nbors, const int thread, const int gpu_id); + const int nlocal, const int nall, const int max_nbors, + const int thread, const int gpu_id); typedef void (*_gb_gpu_clear)(const int thread); -typedef int * (*_gb_gpu_reset_nbors)(const int nall, const int nlocal, const int inum, - int *ilist, const int *numj, const int *type, - const int thread, bool &success); +typedef int * (*_gb_gpu_reset_nbors)(const int nall, const int nlocal, + const int inum, int *ilist, const int *numj, const int *type, + const int thread, bool &success); typedef void (*_gb_gpu_nbors)(const int *ij, const int num_ij, const bool eflag, - const int thread); -typedef void (*_gb_gpu_atom)(double **host_x, double **host_quat, const int *host_type, + const int thread); +typedef void (*_gb_gpu_atom)(double **host_x, double **host_quat, + const int *host_type, const bool rebuild, const int thread); +typedef void (*_gb_gpu_gayberne)(const bool eflag, const bool vflag, const bool rebuild, const int thread); -typedef void (*_gb_gpu_gayberne)(const bool eflag, const bool vflag, const bool rebuild, - const int thread); typedef double (*_gb_gpu_forces)(double **f, double **tor, const int *ilist, - const bool eflag, const bool vflag, const bool eflag_atom, - const bool vflag_atom, double *eatom, double **vatom, - double *virial, const int thread); -typedef void (*_gb_gpu_name)(const int gpu_id, const int max_nbors, char * name); + const bool eflag, const bool vflag, const bool eflag_atom, + const bool vflag_atom, double *eatom, double **vatom, + double *virial, const int thread); +typedef void (*_gb_gpu_name)(const int gpu_id, const int max_nbors, + char * name); typedef void (*_gb_gpu_time)(const int thread); typedef int (*_gb_gpu_num_devices)(); typedef double (*_gb_gpu_bytes)(); @@ -121,7 +124,8 @@ using namespace LAMMPS_NS; PairGayBerneGPU::PairGayBerneGPU(LAMMPS *lmp) : PairGayBerne(lmp), my_thread(0), omp_chunk(0), nthreads(1), multi_gpu_mode(ONE_NODE), - multi_gpu_param(0) + multi_gpu_param(0), + output_time(false) { ij_new[0]=NULL; @@ -152,14 +156,16 @@ PairGayBerneGPU::PairGayBerneGPU(LAMMPS *lmp) : PairGayBerne(lmp), my_thread(0), PairGayBerneGPU::~PairGayBerneGPU() { - printf("\n\n-------------------------------------"); - printf("--------------------------------\n"); - printf(" GPU Time Stamps: "); - printf("\n-------------------------------------"); - printf("--------------------------------\n"); - gb_gpu_time(my_thread); - printf("-------------------------------------"); - printf("--------------------------------\n\n"); + if (output_time) { + printf("\n\n-------------------------------------"); + printf("--------------------------------\n"); + printf(" GPU Time Stamps (on proc 0): "); + printf("\n-------------------------------------"); + printf("--------------------------------\n"); + gb_gpu_time(my_thread); + printf("-------------------------------------"); + printf("--------------------------------\n\n"); + } #pragma omp parallel { @@ -216,7 +222,7 @@ void PairGayBerneGPU::compute(int eflag, int vflag) success); #endif if (!success) - error->one("Total # of atoms exceeds maximum allowed per GPGPU"); + error->one("Out of memory on GPGPU"); // copy atom data to GPU gb_gpu_atom(atom->x,atom->quat,atom->type,rebuild,my_thread); @@ -320,6 +326,8 @@ void PairGayBerneGPU::settings(int narg, char **arg) void PairGayBerneGPU::init_style() { + if (comm->me == 0) + output_time=true; if (force->pair_match("gpu",0) == NULL) error->all("Cannot use pair hybrid with multiple GPU pair styles"); if (!atom->quat_flag || !atom->torque_flag || !atom->avec->shape_type) @@ -387,9 +395,10 @@ void PairGayBerneGPU::init_style() bool init_ok=gb_gpu_init(ij_size, atom->ntypes+1, gamma, upsilon, mu, shape, well, cutsq, sigma, epsilon, lshape, form, lj1, lj2, lj3, lj4, offset, force->special_lj, - neighbor->oneatom, my_thread, my_gpu); + atom->nlocal, atom->nlocal+atom->nghost, 300, + my_thread, my_gpu); if (!init_ok) - error->one("At least one process could not allocate a CUDA-enabled gpu"); + error->one("At least 1 proc could not allocate a CUDA gpu or memory"); if (ij_new[my_thread]!=NULL) delete [] ij_new[my_thread]; diff --git a/src/GPU/pair_gayberne_gpu.h b/src/GPU/pair_gayberne_gpu.h index 4ab7932d29..cafea53bb6 100644 --- a/src/GPU/pair_gayberne_gpu.h +++ b/src/GPU/pair_gayberne_gpu.h @@ -21,7 +21,7 @@ PairStyle(gayberne/gpu,PairGayBerneGPU) #define LMP_PAIR_GPU_H #include "pair_gayberne.h" -#define MAX_GPU_THREADS 4 +#define MAX_GPU_THREADS 1 namespace LAMMPS_NS { @@ -43,6 +43,7 @@ class PairGayBerneGPU : public PairGayBerne { int my_thread, nthreads, thread_inum[MAX_GPU_THREADS], omp_chunk; int last_neighbor, multi_gpu_mode, multi_gpu_param; + bool output_time; }; }