git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12146 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2014-06-27 17:25:22 +00:00
parent f745f24bcd
commit 67ae64329e
24 changed files with 422 additions and 55 deletions

View File

@ -67,7 +67,9 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
$(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o
CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
$(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \
@ -114,7 +116,9 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
$(OBJ_DIR)/soft.cubin $(OBJ_DIR)/soft_cubin.h \
$(OBJ_DIR)/lj_coul_msm.cubin $(OBJ_DIR)/lj_coul_msm_cubin.h \
$(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs_cubin.h \
$(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd_cubin.h
$(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd_cubin.h \
$(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \
$(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h
all: $(OBJ_DIR) $(GPU_LIB) $(EXECS)
@ -676,6 +680,30 @@ $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cubin.h $(OB
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
$(CUDR) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/coul.cubin: lal_coul.cu lal_precision.h lal_preprocessor.h
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul.cu
$(OBJ_DIR)/coul_cubin.h: $(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul.cubin
$(BIN2C) -c -n coul $(OBJ_DIR)/coul.cubin > $(OBJ_DIR)/coul_cubin.h
$(OBJ_DIR)/lal_coul.o: $(ALL_H) lal_coul.h lal_coul.cpp $(OBJ_DIR)/coul_cubin.h $(OBJ_DIR)/lal_base_charge.o
$(CUDR) -o $@ -c lal_coul.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_coul_ext.o: $(ALL_H) lal_coul.h lal_coul_ext.cpp lal_base_charge.h
$(CUDR) -o $@ -c lal_coul_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/coul_debye.cubin: lal_coul_debye.cu lal_precision.h lal_preprocessor.h
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul_debye.cu
$(OBJ_DIR)/coul_debye_cubin.h: $(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye.cubin
$(BIN2C) -c -n coul_debye $(OBJ_DIR)/coul_debye.cubin > $(OBJ_DIR)/coul_debye_cubin.h
$(OBJ_DIR)/lal_coul_debye.o: $(ALL_H) lal_coul_debye.h lal_coul_debye.cpp $(OBJ_DIR)/coul_debye_cubin.h $(OBJ_DIR)/lal_base_charge.o
$(CUDR) -o $@ -c lal_coul_debye.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_coul_debye_ext.o: $(ALL_H) lal_coul_debye.h lal_coul_debye_ext.cpp lal_base_charge.h
$(CUDR) -o $@ -c lal_coul_debye_ext.cpp -I$(OBJ_DIR)
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda

View File

@ -56,7 +56,9 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
$(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o
KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
$(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/pppm_cl.h \
@ -79,7 +81,8 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
$(OBJ_DIR)/lj_coul_debye_cl.h $(OBJ_DIR)/coul_dsf_cl.h \
$(OBJ_DIR)/sw_cl.h $(OBJ_DIR)/beck_cl.h $(OBJ_DIR)/mie_cl.h \
$(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/lj_coul_msm_cl.h \
$(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/dpd_cl.h
$(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/dpd_cl.h \
$(OBJ_DIR)/coul_cl.h $(OBJ_DIR)/coul_debye_cl.h
OCL_EXECS = $(BIN_DIR)/ocl_get_devices
@ -485,6 +488,24 @@ $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cl.h $(OBJ_
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
$(OCL) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/coul_cl.h: lal_coul.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh coul $(PRE1_H) lal_coul.cu $(OBJ_DIR)/coul_cl.h;
$(OBJ_DIR)/lal_coul.o: $(ALL_H) lal_coul.h lal_coul.cpp $(OBJ_DIR)/coul_cl.h $(OBJ_DIR)/coul_cl.h $(OBJ_DIR)/lal_base_charge.o
$(OCL) -o $@ -c lal_coul.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_coul_ext.o: $(ALL_H) lal_coul.h lal_coul_ext.cpp lal_base_charge.h
$(OCL) -o $@ -c lal_coul_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/coul_debye_cl.h: lal_coul_debye.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh coul_debye $(PRE1_H) lal_coul_debye.cu $(OBJ_DIR)/coul_debye_cl.h;
$(OBJ_DIR)/lal_coul_debye.o: $(ALL_H) lal_coul_debye.h lal_coul_debye.cpp $(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/lal_base_charge.o
$(OCL) -o $@ -c lal_coul_debye.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_coul_debye_ext.o: $(ALL_H) lal_coul_debye.h lal_coul_debye_ext.cpp lal_base_charge.h
$(OCL) -o $@ -c lal_coul_debye_ext.cpp -I$(OBJ_DIR)
$(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp
$(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK)

View File

@ -97,6 +97,25 @@ int BornT::init(const int ntypes, double **host_cutsq,
return 0;
}
template <class numtyp, class acctyp>
void BornT::reinit(const int ntypes, double **host_rhoinv,
double **host_born1, double **host_born2,
double **host_born3, double **host_a, double **host_c,
double **host_d, double **host_offset) {
// Allocate a host write buffer for data initialization
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
UCL_WRITE_ONLY);
for (int i=0; i<_lj_types*_lj_types; i++)
host_write[i]=0.0;
this->atom->type_pack4(ntypes,_lj_types,coeff1,host_write,host_rhoinv,
host_born1,host_born2,host_born3);
this->atom->type_pack4(ntypes,_lj_types,coeff2,host_write,host_a,host_c,
host_d,host_offset);
}
template <class numtyp, class acctyp>
void BornT::clear() {
if (!_allocated)

View File

@ -45,7 +45,13 @@ class Born : public BaseAtomic<numtyp, acctyp> {
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
const double gpu_split, FILE *screen);
/// Send updated coeffs from host to device (to be compatible with fix adapt)
void reinit(const int ntypes, double **host_rhoinv,
double **host_born1, double **host_born2,
double **host_born3, double **host_a, double **host_c,
double **host_d, double **host_offset);
/// Clear all host and device data
/** \note This is called at the beginning of the init() routine **/
void clear();

View File

@ -92,6 +92,32 @@ int born_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
return init_ok;
}
// ---------------------------------------------------------------------------
// Copy updated coeffs from host to device
// ---------------------------------------------------------------------------
void born_gpu_reinit(const int ntypes, double **host_rhoinv,
double **host_born1, double **host_born2,
double **host_born3, double **host_a, double **host_c,
double **host_d, double **offset) {
int world_me=BORNMF.device->world_me();
int gpu_rank=BORNMF.device->gpu_rank();
int procs_per_gpu=BORNMF.device->procs_per_gpu();
if (world_me==0)
BORNMF.reinit(ntypes, host_rhoinv, host_born1, host_born2,
host_born3, host_a, host_c, host_d, offset);
BORNMF.device->world_barrier();
for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0)
BORNMF.reinit(ntypes, host_rhoinv, host_born1, host_born2,
host_born3, host_a, host_c, host_d, offset);
BORNMF.device->gpu_barrier();
}
}
void born_gpu_clear() {
BORNMF.clear();
}

View File

@ -91,6 +91,24 @@ int BuckT::init(const int ntypes, double **host_cutsq,
return 0;
}
template <class numtyp, class acctyp>
void BuckT::reinit(const int ntypes, double **host_cutsq,
double **host_rhoinv, double **host_buck1, double **host_buck2,
double **host_a, double **host_c, double **host_offset) {
// Allocate a host write buffer for data initialization
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
UCL_WRITE_ONLY);
for (int i=0; i<_lj_types*_lj_types; i++)
host_write[i]=0.0;
this->atom->type_pack4(ntypes,_lj_types,coeff1,host_write,host_rhoinv,
host_buck1,host_buck2,host_cutsq);
this->atom->type_pack4(ntypes,_lj_types,coeff2,host_write,host_a,host_c,
host_offset);
}
template <class numtyp, class acctyp>
void BuckT::clear() {
if (!_allocated)

View File

@ -45,6 +45,11 @@ class Buck : public BaseAtomic<numtyp, acctyp> {
const int maxspecial, const double cell_size,
const double gpu_split, FILE *screen);
/// Send updated coeffs from host to device (to be compatible with fix adapt)
void reinit(const int ntypes, double **host_cutsq,
double **host_rhoinv, double **host_buck1, double **host_buck2,
double **host_a, double **host_c, double **host_offset);
/// Clear all host and device data
/** \note This is called at the beginning of the init() routine **/
void clear();

View File

@ -89,6 +89,31 @@ int buck_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
return init_ok;
}
// ---------------------------------------------------------------------------
// Copy updated coeffs from host to device
// ---------------------------------------------------------------------------
void buck_gpu_reinit(const int ntypes, double **cutsq, double **host_rhoinv,
double **host_buck1, double **host_buck2,
double **host_a, double **host_c, double **offset) {
int world_me=BUCKMF.device->world_me();
int gpu_rank=BUCKMF.device->gpu_rank();
int procs_per_gpu=BUCKMF.device->procs_per_gpu();
if (world_me==0)
BUCKMF.reinit(ntypes, cutsq, host_rhoinv, host_buck1, host_buck2,
host_a, host_c, offset);
BUCKMF.device->world_barrier();
for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0)
BUCKMF.reinit(ntypes, cutsq, host_rhoinv, host_buck1, host_buck2,
host_a, host_c, offset);
BUCKMF.device->gpu_barrier();
}
}
void buck_gpu_clear() {
BUCKMF.clear();
}

View File

@ -43,21 +43,19 @@ int CoulLongT::bytes_per_atom(const int max_nbors) const {
}
template <class numtyp, class acctyp>
int CoulLongT::init(const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
const double gpu_split, FILE *_screen,
const double host_cut_coulsq, double *host_special_coul,
const double qqrd2e, const double g_ewald) {
int CoulLongT::init(const int ntypes, double **host_scale,
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
const double gpu_split, FILE *_screen,
const double host_cut_coulsq, double *host_special_coul,
const double qqrd2e, const double g_ewald) {
int success;
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,
gpu_split,_screen,coul_long,"k_coul_long");
if (success!=0)
return success;
// we don't have atom types for coulomb only,
// but go with the minimum so that we can use
// the same infrastructure as lj/cut/coul/long/gpu.
int lj_types=1;
int lj_types=ntypes;
shared_types=false;
int max_shared_types=this->device->max_shared_types();
if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
@ -69,13 +67,16 @@ int CoulLongT::init(const int nlocal, const int nall, const int max_nbors,
// Allocate a host write buffer for data initialization
UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
UCL_WRITE_ONLY);
for (int i=0; i<lj_types*lj_types; i++)
host_write[i]=0.0;
lj1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
lj3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
scale.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
this->atom->type_pack1(ntypes,lj_types,scale,host_write,host_scale);
sp_cl.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
for (int i=0; i<4; i++) {
host_write[i]=host_special_coul[i];
@ -87,10 +88,18 @@ int CoulLongT::init(const int nlocal, const int nall, const int max_nbors,
_g_ewald=g_ewald;
_allocated=true;
this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+sp_cl.row_bytes();
this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+scale.row_bytes()+
sp_cl.row_bytes();
return 0;
}
template <class numtyp, class acctyp>
void CoulLongT::reinit(const int ntypes, double **host_scale) {
UCL_H_Vec<numtyp> hscale(_lj_types*_lj_types,*(this->ucl_device),
UCL_WRITE_ONLY);
this->atom->type_pack1(ntypes,_lj_types,scale,hscale,host_scale);
}
template <class numtyp, class acctyp>
void CoulLongT::clear() {
if (!_allocated)
@ -99,6 +108,7 @@ void CoulLongT::clear() {
lj1.clear();
lj3.clear();
scale.clear();
sp_cl.clear();
this->clear_atomic();
}
@ -134,7 +144,7 @@ void CoulLongT::loop(const bool _eflag, const bool _vflag) {
this->time_pair.start();
if (shared_types) {
this->k_pair_fast.set_size(GX,BX);
this->k_pair_fast.run(&this->atom->x, &lj1, &lj3, &sp_cl,
this->k_pair_fast.run(&this->atom->x, &scale, &sp_cl,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->ans->force, &this->ans->engv,
&eflag, &vflag, &ainum, &nbor_pitch,
@ -142,7 +152,7 @@ void CoulLongT::loop(const bool _eflag, const bool _vflag) {
&this->_threads_per_atom);
} else {
this->k_pair.set_size(GX,BX);
this->k_pair.run(&this->atom->x, &lj1, &lj3, &_lj_types, &sp_cl,
this->k_pair.run(&this->atom->x, &scale, &_lj_types, &sp_cl,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->ans->force, &this->ans->engv, &eflag, &vflag,
&ainum, &nbor_pitch, &this->atom->q, &_cut_coulsq,

View File

@ -124,8 +124,7 @@ texture<int2> q_tex;
#endif
__kernel void k_coul_long(const __global numtyp4 *restrict x_,
const __global numtyp4 *restrict lj1,
const __global numtyp4 *restrict lj3,
const __global numtyp *restrict scale,
const int lj_types,
const __global numtyp *restrict sp_cl_in,
const __global int *dev_nbor,
@ -161,6 +160,7 @@ __kernel void k_coul_long(const __global numtyp4 *restrict x_,
n_stride,list_end,nbor);
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
int itype=ix.w;
numtyp qtmp; fetch(qtmp,i,q_tex);
for ( ; nbor<list_end; nbor+=n_stride) {
@ -171,24 +171,26 @@ __kernel void k_coul_long(const __global numtyp4 *restrict x_,
j &= NEIGHMASK;
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
int jtype=jx.w;
// Compute r12
numtyp delx = ix.x-jx.x;
numtyp dely = ix.y-jx.y;
numtyp delz = ix.z-jx.z;
numtyp rsq = delx*delx+dely*dely+delz*delz;
int mtype=itype*lj_types+jtype;
if (rsq < cut_coulsq) {
numtyp r2inv=ucl_recip(rsq);
numtyp force, prefactor, _erfc;
numtyp r = ucl_rsqrt(r2inv);
numtyp grij = g_ewald * r;
numtyp expm2 = ucl_exp(-grij*grij);
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
fetch(prefactor,j,q_tex);
prefactor *= qqrd2e * qtmp/r;
prefactor *= qqrd2e * scale[mtype] * qtmp/r;
force = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul) * r2inv;
f.x+=delx*force;
@ -196,7 +198,7 @@ __kernel void k_coul_long(const __global numtyp4 *restrict x_,
f.z+=delz*force;
if (eflag>0) {
e_coul += prefactor*(_erfc-factor_coul);
e_coul += prefactor*(_erfc-factor_coul);
}
if (vflag>0) {
virial[0] += delx*delx*force;
@ -215,8 +217,7 @@ __kernel void k_coul_long(const __global numtyp4 *restrict x_,
}
__kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
const __global numtyp4 *restrict lj1_in,
const __global numtyp4 *restrict lj3_in,
const __global numtyp *restrict scale_in,
const __global numtyp *restrict sp_cl_in,
const __global int *dev_nbor,
const __global int *dev_packed,
@ -230,10 +231,14 @@ __kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
int tid, ii, offset;
atom_info(t_per_atom,ii,tid,offset);
__local numtyp scale[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
__local numtyp sp_cl[4];
if (tid<4)
sp_cl[tid]=sp_cl_in[tid];
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
scale[tid]=scale_in[tid];
}
acctyp e_coul=(acctyp)0;
acctyp4 f;
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
@ -252,7 +257,9 @@ __kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
numtyp qtmp; fetch(qtmp,i,q_tex);
int iw = ix.w;
int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
for ( ; nbor<list_end; nbor+=n_stride) {
int j=*nbor;
@ -261,7 +268,8 @@ __kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
j &= NEIGHMASK;
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
int mtype=itype+jx.w;
// Compute r12
numtyp delx = ix.x-jx.x;
numtyp dely = ix.y-jx.y;
@ -272,13 +280,13 @@ __kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
numtyp r2inv=ucl_recip(rsq);
numtyp force, prefactor, _erfc;
numtyp r = ucl_rsqrt(r2inv);
numtyp r = ucl_sqrt(rsq);
numtyp grij = g_ewald * r;
numtyp expm2 = ucl_exp(-grij*grij);
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
fetch(prefactor,j,q_tex);
prefactor *= qqrd2e * qtmp/r;
prefactor *= qqrd2e * scale[mtype] * qtmp/r;
force = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul) * r2inv;
f.x+=delx*force;
@ -286,7 +294,7 @@ __kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
f.z+=delz*force;
if (eflag>0) {
e_coul += prefactor*(_erfc-factor_coul);
e_coul += prefactor*(_erfc-factor_coul);
}
if (vflag>0) {
virial[0] += delx*delx*force;

View File

@ -37,12 +37,16 @@ class CoulLong : public BaseCharge<numtyp, acctyp> {
* - -3 if there is an out of memory error
* - -4 if the GPU library was not compiled for GPU
* - -5 Double precision is not supported on card **/
int init(const int nlocal, const int nall, const int max_nbors,
int init(const int ntypes, double **scale,
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
const double gpu_split, FILE *screen,
const double host_cut_coulsq, double *host_special_coul,
const double qqrd2e, const double g_ewald);
const double gpu_split, FILE *screen,
const double host_cut_coulsq, double *host_special_coul,
const double qqrd2e, const double g_ewald);
/// Send updated coeffs from host to device (to be compatible with fix adapt)
void reinit(const int ntypes, double **scale);
/// Clear all host and device data
/** \note This is called at the beginning of the init() routine **/
void clear();
@ -59,6 +63,8 @@ class CoulLong : public BaseCharge<numtyp, acctyp> {
UCL_D_Vec<numtyp4> lj1;
/// lj3 dummy
UCL_D_Vec<numtyp4> lj3;
/// scale
UCL_D_Vec<numtyp> scale;
/// Special Coul values [0-3]
UCL_D_Vec<numtyp> sp_cl;

View File

@ -27,10 +27,11 @@ static CoulLong<PRECISION,ACC_PRECISION> CLMF;
// ---------------------------------------------------------------------------
// Allocate memory on host and device and copy constants to device
// ---------------------------------------------------------------------------
int cl_gpu_init(const int inum, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode,
FILE *screen, double host_cut_coulsq, double *host_special_coul,
const double qqrd2e, const double g_ewald) {
int cl_gpu_init(const int ntypes, double **host_scale,
const int inum, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode,
FILE *screen, double host_cut_coulsq, double *host_special_coul,
const double qqrd2e, const double g_ewald) {
CLMF.clear();
gpu_mode=CLMF.device->gpu_mode();
double gpu_split=CLMF.device->particle_split();
@ -53,9 +54,9 @@ int cl_gpu_init(const int inum, const int nall, const int max_nbors,
int init_ok=0;
if (world_me==0)
init_ok=CLMF.init(inum, nall, 300, maxspecial, cell_size, gpu_split,
screen, host_cut_coulsq, host_special_coul, qqrd2e,
g_ewald);
init_ok=CLMF.init(ntypes, host_scale, inum, nall, 300, maxspecial,
cell_size, gpu_split, screen, host_cut_coulsq,
host_special_coul, qqrd2e, g_ewald);
CLMF.device->world_barrier();
if (message)
@ -71,9 +72,9 @@ int cl_gpu_init(const int inum, const int nall, const int max_nbors,
fflush(screen);
}
if (gpu_rank==i && world_me!=0)
init_ok=CLMF.init(inum, nall, 300, maxspecial, cell_size, gpu_split,
screen, host_cut_coulsq, host_special_coul,
qqrd2e, g_ewald);
init_ok=CLMF.init(ntypes, host_scale, inum, nall, 300, maxspecial,
cell_size, gpu_split, screen, host_cut_coulsq,
host_special_coul, qqrd2e, g_ewald);
CLMF.device->gpu_barrier();
if (message)
@ -87,6 +88,27 @@ int cl_gpu_init(const int inum, const int nall, const int max_nbors,
return init_ok;
}
// ---------------------------------------------------------------------------
// Copy updated coeffs from host to device
// ---------------------------------------------------------------------------
void cl_gpu_reinit(const int ntypes, double **host_scale) {
int world_me=CLMF.device->world_me();
int gpu_rank=CLMF.device->gpu_rank();
int procs_per_gpu=CLMF.device->procs_per_gpu();
if (world_me==0)
CLMF.reinit(ntypes, host_scale);
CLMF.device->world_barrier();
for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0)
CLMF.reinit(ntypes, host_scale);
CLMF.device->gpu_barrier();
}
}
void cl_gpu_clear() {
CLMF.clear();
}

View File

@ -87,6 +87,21 @@ int GaussT::init(const int ntypes,
return 0;
}
template <class numtyp, class acctyp>
void GaussT::reinit(const int ntypes, double **host_cutsq, double **host_a,
double **host_b, double **host_offset) {
// Allocate a host write buffer for data initialization
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
UCL_WRITE_ONLY);
for (int i=0; i<_lj_types*_lj_types; i++)
host_write[i]=0.0;
this->atom->type_pack4(ntypes,_lj_types,gauss1,host_write,host_a,host_b,
host_cutsq,host_offset);
}
template <class numtyp, class acctyp>
void GaussT::clear() {
if (!_allocated)

View File

@ -14,7 +14,7 @@
***************************************************************************/
#ifndef LAL_GAUSS_H
#define LAL_GAYSS_H
#define LAL_GAUSS_H
#include "lal_base_atomic.h"
@ -43,7 +43,11 @@ class Gauss : public BaseAtomic<numtyp, acctyp> {
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
const double gpu_split, FILE *screen);
/// Send updated coeffs from host to device (to be compatible with fix adapt)
void reinit(const int ntypes, double **host_cutsq,
double **host_a, double **host_b, double **host_offset);
/// Clear all host and device data
/** \note This is called at the beginning of the init() routine **/
void clear();

View File

@ -88,6 +88,28 @@ int gauss_gpu_init(const int ntypes, double **cutsq, double **host_a,
return init_ok;
}
// ---------------------------------------------------------------------------
// Copy updated coeffs from host to device
// ---------------------------------------------------------------------------
void gauss_gpu_reinit(const int ntypes, double **cutsq, double **host_a,
double **host_b, double **offset) {
int world_me=GLMF.device->world_me();
int gpu_rank=GLMF.device->gpu_rank();
int procs_per_gpu=GLMF.device->procs_per_gpu();
if (world_me==0)
GLMF.reinit(ntypes, cutsq, host_a, host_b, offset);
GLMF.device->world_barrier();
for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0)
GLMF.reinit(ntypes, cutsq, host_a, host_b, offset);
GLMF.device->gpu_barrier();
}
}
void gauss_gpu_clear() {
GLMF.clear();
}

View File

@ -92,6 +92,23 @@ int LJT::init(const int ntypes,
return 0;
}
template <class numtyp, class acctyp>
void LJT::reinit(const int ntypes, double **host_cutsq, double **host_lj1,
double **host_lj2, double **host_lj3,
double **host_lj4, double **host_offset) {
// Allocate a host write buffer for data initialization
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
UCL_WRITE_ONLY);
for (int i=0; i<_lj_types*_lj_types; i++)
host_write[i]=0.0;
this->atom->type_pack4(ntypes,_lj_types,lj1,host_write,host_lj1,host_lj2,
host_cutsq);
this->atom->type_pack4(ntypes,_lj_types,lj3,host_write,host_lj3,host_lj4,
host_offset);
}
template <class numtyp, class acctyp>
void LJT::clear() {
if (!_allocated)

View File

@ -43,7 +43,12 @@ class LJ : public BaseAtomic<numtyp, acctyp> {
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
const double gpu_split, FILE *screen);
/// Send updated coeffs from host to device (to be compatible with fix adapt)
void reinit(const int ntypes, double **host_cutsq,
double **host_lj1, double **host_lj2, double **host_lj3,
double **host_lj4, double **host_offset);
/// Clear all host and device data
/** \note This is called at the beginning of the init() routine **/
void clear();

View File

@ -92,6 +92,26 @@ int LJExpandT::init(const int ntypes, double **host_cutsq,
return 0;
}
template <class numtyp, class acctyp>
void LJExpandT::reinit(const int ntypes, double **host_cutsq,
double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4,
double **host_offset, double **host_shift) {
// Allocate a host write buffer for data initialization
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
UCL_WRITE_ONLY);
for (int i=0; i<_lj_types*_lj_types; i++)
host_write[i]=0.0;
this->atom->type_pack4(ntypes,_lj_types,lj1,host_write,host_lj1,host_lj2,
host_cutsq, host_shift);
this->atom->type_pack4(ntypes,_lj_types,lj3,host_write,host_lj3,host_lj4,
host_offset);
}
template <class numtyp, class acctyp>
void LJExpandT::clear() {
if (!_allocated)

View File

@ -43,7 +43,12 @@ class LJExpand : public BaseAtomic<numtyp, acctyp> {
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
const double gpu_split, FILE *screen);
/// Send updated coeffs from host to device (to be compatible with fix adapt)
void reinit(const int ntypes, double **host_cutsq,
double **host_lj1, double **host_lj2, double **host_lj3,
double **host_lj4, double **host_offset, double **host_shift);
/// Clear all host and device data
/** \note This is called at the beginning of the init() routine **/
void clear();

View File

@ -89,6 +89,29 @@ int lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
return init_ok;
}
// ---------------------------------------------------------------------------
// Copy updated coeffs from host to device
// ---------------------------------------------------------------------------
int lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double **shift) {
int world_me=LJEMF.device->world_me();
int gpu_rank=LJEMF.device->gpu_rank();
int procs_per_gpu=LJEMF.device->procs_per_gpu();
if (world_me==0)
LJEMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
offset, shift);
LJEMF.device->world_barrier();
for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0)
LJEMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
offset, shift);
LJEMF.device->gpu_barrier();
}
}
void lje_gpu_clear() {
LJEMF.clear();
}

View File

@ -88,6 +88,27 @@ int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
return init_ok;
}
// ---------------------------------------------------------------------------
// Copy updated coeffs from host to device
// ---------------------------------------------------------------------------
void ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset) {
int world_me=LJLMF.device->world_me();
int gpu_rank=LJLMF.device->gpu_rank();
int procs_per_gpu=LJLMF.device->procs_per_gpu();
if (world_me==0)
LJLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset);
LJLMF.device->world_barrier();
for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0)
LJLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset);
LJLMF.device->gpu_barrier();
}
}
void ljl_gpu_clear() {
LJLMF.clear();
}

View File

@ -86,6 +86,21 @@ int SoftT::init(const int ntypes, double **host_cutsq,
return 0;
}
template <class numtyp, class acctyp>
void SoftT::reinit(const int ntypes, double **host_cutsq,
double **host_prefactor, double **host_cut) {
// Allocate a host write buffer for data initialization
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
UCL_WRITE_ONLY);
for (int i=0; i<_lj_types*_lj_types; i++)
host_write[i]=0.0;
this->atom->type_pack4(ntypes,_lj_types,coeff,host_write,host_prefactor,
host_cut,host_cutsq);
}
template <class numtyp, class acctyp>
void SoftT::clear() {
if (!_allocated)

View File

@ -13,8 +13,8 @@
email : nguyentd@ornl.gov
***************************************************************************/
#ifndef LAL_GAUSS_H
#define LAL_GAYSS_H
#ifndef LAL_SOFT_H
#define LAL_SOFT_H
#include "lal_base_atomic.h"
@ -44,6 +44,10 @@ class Soft : public BaseAtomic<numtyp, acctyp> {
const int maxspecial, const double cell_size,
const double gpu_split, FILE *screen);
/// Send updated coeffs from host to device (to be compatible with fix adapt)
void reinit(const int ntypes, double **host_cutsq,
double **host_prefactor, double **host_cut);
/// Clear all host and device data
/** \note This is called at the beginning of the init() routine **/
void clear();

View File

@ -88,6 +88,28 @@ int soft_gpu_init(const int ntypes, double **cutsq, double **host_prefactor,
return init_ok;
}
// ---------------------------------------------------------------------------
// Copy updated constants to device
// ---------------------------------------------------------------------------
void soft_gpu_reinit(const int ntypes, double **cutsq, double **host_prefactor,
double **host_cut) {
int world_me=SLMF.device->world_me();
int gpu_rank=SLMF.device->gpu_rank();
int procs_per_gpu=SLMF.device->procs_per_gpu();
if (world_me==0)
SLMF.reinit(ntypes, cutsq, host_prefactor, host_cut);
SLMF.device->world_barrier();
for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0)
SLMF.reinit(ntypes, cutsq, host_prefactor, host_cut);
SLMF.device->gpu_barrier();
}
}
void soft_gpu_clear() {
SLMF.clear();
}