From 7e5e741ea1597bbcff7eaefb97b9360fc25ca5b8 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 3 Aug 2020 08:12:09 -0400 Subject: [PATCH] update code for ufm/gpu to match changes in base class where uf4 is removed --- lib/gpu/lal_ufm.cpp | 31 +++++++++++++++---------------- lib/gpu/lal_ufm.h | 23 +++++++++++------------ lib/gpu/lal_ufm_ext.cpp | 33 ++++++++++++++++----------------- src/GPU/pair_ufm_gpu.cpp | 8 ++++---- 4 files changed, 46 insertions(+), 49 deletions(-) diff --git a/lib/gpu/lal_ufm.cpp b/lib/gpu/lal_ufm.cpp index 9b7d42dec9..a86d07f340 100644 --- a/lib/gpu/lal_ufm.cpp +++ b/lib/gpu/lal_ufm.cpp @@ -10,7 +10,7 @@ This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) __________________________________________________________________________ - begin : + begin : email : pl.rodolfo@gmail.com dekoning@ifi.unicamp.br ***************************************************************************/ @@ -38,7 +38,7 @@ template UFMT::~UFM() { clear(); } - + template int UFMT::bytes_per_atom(const int max_nbors) const { return this->bytes_per_atom_atomic(max_nbors); @@ -46,9 +46,9 @@ int UFMT::bytes_per_atom(const int max_nbors) const { template int UFMT::init(const int ntypes, - double **host_cutsq, double **host_uf1, - double **host_uf2, double **host_uf3, - double **host_uf4, double **host_offset, + double **host_cutsq, double **host_uf1, + double **host_uf2, double **host_uf3, + double **host_offset, double *host_special_lj, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, @@ -78,11 +78,11 @@ int UFMT::init(const int ntypes, uf1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); this->atom->type_pack4(ntypes,lj_types,uf1,host_write,host_uf1,host_uf2, - host_cutsq); + host_cutsq); uf3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); - this->atom->type_pack4(ntypes,lj_types,uf3,host_write,host_uf3,host_uf4, - host_offset); + this->atom->type_pack4(ntypes,lj_types,uf3,host_write,host_uf3,host_uf2, + host_offset); UCL_H_Vec dview; sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY); @@ -96,18 +96,17 @@ int UFMT::init(const int ntypes, template void UFMT::reinit(const int ntypes, double **host_cutsq, double **host_uf1, - double **host_uf2, double **host_uf3, - double **host_uf4, double **host_offset) { + double **host_uf2, double **host_uf3, double **host_offset) { // Allocate a host write buffer for data initialization UCL_H_Vec host_write(_lj_types*_lj_types*32,*(this->ucl_device), UCL_WRITE_ONLY); - + for (int i=0; i<_lj_types*_lj_types; i++) host_write[i]=0.0; - + this->atom->type_pack4(ntypes,_lj_types,uf1,host_write,host_uf1,host_uf2, host_cutsq); - this->atom->type_pack4(ntypes,_lj_types,uf3,host_write,host_uf3,host_uf4, + this->atom->type_pack4(ntypes,_lj_types,uf3,host_write,host_uf3,host_uf2, host_offset); } @@ -145,7 +144,7 @@ void UFMT::loop(const bool _eflag, const bool _vflag) { vflag=1; else vflag=0; - + int GX=static_cast(ceil(static_cast(this->ans->inum())/ (BX/this->_threads_per_atom))); @@ -157,12 +156,12 @@ void UFMT::loop(const bool _eflag, const bool _vflag) { this->k_pair_fast.run(&this->atom->x, &uf1, &uf3, &sp_lj, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->ans->force, &this->ans->engv, &eflag, - &vflag, &ainum, &nbor_pitch, + &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom); } else { this->k_pair.set_size(GX,BX); this->k_pair.run(&this->atom->x, &uf1, &uf3, &_lj_types, &sp_lj, - &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom); } diff --git a/lib/gpu/lal_ufm.h b/lib/gpu/lal_ufm.h index 65ee15d5b5..14b96bcc86 100644 --- a/lib/gpu/lal_ufm.h +++ b/lib/gpu/lal_ufm.h @@ -10,7 +10,7 @@ This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) __________________________________________________________________________ - begin : + begin : email : pl.rodolfo@gmail.com dekoning@ifi.unicamp.br ***************************************************************************/ @@ -32,7 +32,7 @@ class UFM : public BaseAtomic { /** \param max_nbors initial number of rows in the neighbor matrix * \param cell_size cutoff + skin * \param gpu_split fraction of particles handled by device - * + * * Returns: * - 0 if successful * - -1 if fix gpu not found @@ -41,16 +41,15 @@ class UFM : public BaseAtomic { * - -5 Double precision is not supported on card **/ int init(const int ntypes, double **host_cutsq, double **host_uf1, double **host_uf2, double **host_uf3, - double **host_uf4, double **host_offset, double *host_special_lj, - const int nlocal, const int nall, const int max_nbors, - const int maxspecial, const double cell_size, + double **host_offset, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, const double gpu_split, FILE *screen); - + /// Send updated coeffs from host to device (to be compatible with fix adapt) - void reinit(const int ntypes, double **host_cutsq, - double **host_uf1, double **host_uf2, double **host_uf3, - double **host_uf4, double **host_offset); - + void reinit(const int ntypes, double **host_cutsq, double **host_uf1, + double **host_uf2, double **host_uf3, double **host_offset); + /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ void clear(); @@ -65,7 +64,7 @@ class UFM : public BaseAtomic { /// uf1.x = uf1, uf1.y = uf2, uf1.z = cutsq UCL_D_Vec uf1; - /// uf3.x = uf3, uf3.y = uf4, uf3.z = offset + /// uf3.x = uf3, uf3.y = uf2, uf3.z = offset UCL_D_Vec uf3; /// Special LJ values UCL_D_Vec sp_lj; @@ -73,7 +72,7 @@ class UFM : public BaseAtomic { /// If atom type constants fit in shared memory, use fast kernels bool shared_types; - /// Number of atom types + /// Number of atom types int _lj_types; private: diff --git a/lib/gpu/lal_ufm_ext.cpp b/lib/gpu/lal_ufm_ext.cpp index dd476ec3fa..12809a28fb 100644 --- a/lib/gpu/lal_ufm_ext.cpp +++ b/lib/gpu/lal_ufm_ext.cpp @@ -10,7 +10,7 @@ This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) __________________________________________________________________________ - begin : + begin : email : pl.rodolfo@gmail.com dekoning@ifi.unicamp.br ***************************************************************************/ @@ -30,10 +30,10 @@ static UFM UFMLMF; // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1, - double **host_uf2, double **host_uf3, double **host_uf4, - double **offset, double *special_lj, const int inum, const int nall, - const int max_nbors, const int maxspecial, const double cell_size, - int &gpu_mode, FILE *screen) { + double **host_uf2, double **host_uf3, double **offset, + double *special_lj, const int inum, const int nall, + const int max_nbors, const int maxspecial, const double cell_size, + int &gpu_mode, FILE *screen) { UFMLMF.clear(); gpu_mode=UFMLMF.device->gpu_mode(); double gpu_split=UFMLMF.device->particle_split(); @@ -57,8 +57,8 @@ int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1, int init_ok=0; if (world_me==0) init_ok=UFMLMF.init(ntypes, cutsq, host_uf1, host_uf2, host_uf3, - host_uf4, offset, special_lj, inum, nall, 300, - maxspecial, cell_size, gpu_split, screen); + offset, special_lj, inum, nall, 300, + maxspecial, cell_size, gpu_split, screen); UFMLMF.device->world_barrier(); if (message) @@ -74,12 +74,12 @@ int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1, fflush(screen); } if (gpu_rank==i && world_me!=0) - init_ok=UFMLMF.init(ntypes, cutsq, host_uf1, host_uf2, host_uf3, host_uf4, + init_ok=UFMLMF.init(ntypes, cutsq, host_uf1, host_uf2, host_uf3, offset, special_lj, inum, nall, 300, maxspecial, cell_size, gpu_split, screen); UFMLMF.device->gpu_barrier(); - if (message) + if (message) fprintf(screen,"Done.\n"); } if (message) @@ -94,19 +94,18 @@ int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1, // Copy updated coeffs from host to device // --------------------------------------------------------------------------- void ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1, - double **host_uf2, double **host_uf3, double **host_uf4, - double **offset) { + double **host_uf2, double **host_uf3, double **offset) { int world_me=UFMLMF.device->world_me(); int gpu_rank=UFMLMF.device->gpu_rank(); int procs_per_gpu=UFMLMF.device->procs_per_gpu(); - + if (world_me==0) - UFMLMF.reinit(ntypes, cutsq, host_uf1, host_uf2, host_uf3, host_uf4, offset); + UFMLMF.reinit(ntypes, cutsq, host_uf1, host_uf2, host_uf3, offset); UFMLMF.device->world_barrier(); - + for (int i=0; igpu_barrier(); } } @@ -125,8 +124,8 @@ int ** ufml_gpu_compute_n(const int ago, const int inum_full, return UFMLMF.compute(ago, inum_full, nall, host_x, host_type, sublo, subhi, tag, nspecial, special, eflag, vflag, eatom, vatom, host_start, ilist, jnum, cpu_time, success); -} - +} + void ufml_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag, diff --git a/src/GPU/pair_ufm_gpu.cpp b/src/GPU/pair_ufm_gpu.cpp index cea898b050..2b4cc2269f 100644 --- a/src/GPU/pair_ufm_gpu.cpp +++ b/src/GPU/pair_ufm_gpu.cpp @@ -43,13 +43,13 @@ using namespace LAMMPS_NS; // External functions from cuda library for atom decomposition int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1, - double **host_uf2, double **host_uf3, double **host_uf4, + double **host_uf2, double **host_uf3, double **offset, double *special_lj, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen); int ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1, - double **host_uf2, double **host_uf3, double **host_uf4, + double **host_uf2, double **host_uf3, double **offset); void ufml_gpu_clear(); @@ -166,7 +166,7 @@ void PairUFMGPU::init_style() int maxspecial=0; if (atom->molecular) maxspecial=atom->maxspecial; - int success = ufml_gpu_init(atom->ntypes+1, cutsq, uf1, uf2, uf3, uf4, + int success = ufml_gpu_init(atom->ntypes+1, cutsq, uf1, uf2, uf3, offset, force->special_lj, atom->nlocal, atom->nlocal+atom->nghost, 300, maxspecial, cell_size, gpu_mode, screen); @@ -185,7 +185,7 @@ void PairUFMGPU::reinit() { Pair::reinit(); - ufml_gpu_reinit(atom->ntypes+1, cutsq, uf1, uf2, uf3, uf4, offset); + ufml_gpu_reinit(atom->ntypes+1, cutsq, uf1, uf2, uf3, offset); } /* ---------------------------------------------------------------------- */