forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12146 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
f745f24bcd
commit
67ae64329e
|
@ -67,7 +67,9 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
|
|||
$(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
||||
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o
|
||||
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
|
||||
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
|
||||
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o
|
||||
|
||||
CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
||||
$(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \
|
||||
|
@ -114,7 +116,9 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
|||
$(OBJ_DIR)/soft.cubin $(OBJ_DIR)/soft_cubin.h \
|
||||
$(OBJ_DIR)/lj_coul_msm.cubin $(OBJ_DIR)/lj_coul_msm_cubin.h \
|
||||
$(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs_cubin.h \
|
||||
$(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd_cubin.h
|
||||
$(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd_cubin.h \
|
||||
$(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \
|
||||
$(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h
|
||||
|
||||
all: $(OBJ_DIR) $(GPU_LIB) $(EXECS)
|
||||
|
||||
|
@ -676,6 +680,30 @@ $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cubin.h $(OB
|
|||
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
|
||||
$(CUDR) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/coul.cubin: lal_coul.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul.cu
|
||||
|
||||
$(OBJ_DIR)/coul_cubin.h: $(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul.cubin
|
||||
$(BIN2C) -c -n coul $(OBJ_DIR)/coul.cubin > $(OBJ_DIR)/coul_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_coul.o: $(ALL_H) lal_coul.h lal_coul.cpp $(OBJ_DIR)/coul_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_coul.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_coul_ext.o: $(ALL_H) lal_coul.h lal_coul_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_coul_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/coul_debye.cubin: lal_coul_debye.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul_debye.cu
|
||||
|
||||
$(OBJ_DIR)/coul_debye_cubin.h: $(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye.cubin
|
||||
$(BIN2C) -c -n coul_debye $(OBJ_DIR)/coul_debye.cubin > $(OBJ_DIR)/coul_debye_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_coul_debye.o: $(ALL_H) lal_coul_debye.h lal_coul_debye.cpp $(OBJ_DIR)/coul_debye_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_coul_debye.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_coul_debye_ext.o: $(ALL_H) lal_coul_debye.h lal_coul_debye_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_coul_debye_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
|
||||
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
|
||||
|
||||
|
|
|
@ -56,7 +56,9 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
|
|||
$(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
||||
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o
|
||||
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
|
||||
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
|
||||
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o
|
||||
|
||||
KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
|
||||
$(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/pppm_cl.h \
|
||||
|
@ -79,7 +81,8 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
|
|||
$(OBJ_DIR)/lj_coul_debye_cl.h $(OBJ_DIR)/coul_dsf_cl.h \
|
||||
$(OBJ_DIR)/sw_cl.h $(OBJ_DIR)/beck_cl.h $(OBJ_DIR)/mie_cl.h \
|
||||
$(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/lj_coul_msm_cl.h \
|
||||
$(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/dpd_cl.h
|
||||
$(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/dpd_cl.h \
|
||||
$(OBJ_DIR)/coul_cl.h $(OBJ_DIR)/coul_debye_cl.h
|
||||
|
||||
|
||||
OCL_EXECS = $(BIN_DIR)/ocl_get_devices
|
||||
|
@ -485,6 +488,24 @@ $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cl.h $(OBJ_
|
|||
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
|
||||
$(OCL) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/coul_cl.h: lal_coul.cu $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh coul $(PRE1_H) lal_coul.cu $(OBJ_DIR)/coul_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lal_coul.o: $(ALL_H) lal_coul.h lal_coul.cpp $(OBJ_DIR)/coul_cl.h $(OBJ_DIR)/coul_cl.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(OCL) -o $@ -c lal_coul.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_coul_ext.o: $(ALL_H) lal_coul.h lal_coul_ext.cpp lal_base_charge.h
|
||||
$(OCL) -o $@ -c lal_coul_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/coul_debye_cl.h: lal_coul_debye.cu $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh coul_debye $(PRE1_H) lal_coul_debye.cu $(OBJ_DIR)/coul_debye_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lal_coul_debye.o: $(ALL_H) lal_coul_debye.h lal_coul_debye.cpp $(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(OCL) -o $@ -c lal_coul_debye.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_coul_debye_ext.o: $(ALL_H) lal_coul_debye.h lal_coul_debye_ext.cpp lal_base_charge.h
|
||||
$(OCL) -o $@ -c lal_coul_debye_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp
|
||||
$(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK)
|
||||
|
||||
|
|
|
@ -97,6 +97,25 @@ int BornT::init(const int ntypes, double **host_cutsq,
|
|||
return 0;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void BornT::reinit(const int ntypes, double **host_rhoinv,
|
||||
double **host_born1, double **host_born2,
|
||||
double **host_born3, double **host_a, double **host_c,
|
||||
double **host_d, double **host_offset) {
|
||||
|
||||
// Allocate a host write buffer for data initialization
|
||||
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
|
||||
UCL_WRITE_ONLY);
|
||||
|
||||
for (int i=0; i<_lj_types*_lj_types; i++)
|
||||
host_write[i]=0.0;
|
||||
|
||||
this->atom->type_pack4(ntypes,_lj_types,coeff1,host_write,host_rhoinv,
|
||||
host_born1,host_born2,host_born3);
|
||||
this->atom->type_pack4(ntypes,_lj_types,coeff2,host_write,host_a,host_c,
|
||||
host_d,host_offset);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void BornT::clear() {
|
||||
if (!_allocated)
|
||||
|
|
|
@ -45,7 +45,13 @@ class Born : public BaseAtomic<numtyp, acctyp> {
|
|||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *screen);
|
||||
|
||||
|
||||
/// Send updated coeffs from host to device (to be compatible with fix adapt)
|
||||
void reinit(const int ntypes, double **host_rhoinv,
|
||||
double **host_born1, double **host_born2,
|
||||
double **host_born3, double **host_a, double **host_c,
|
||||
double **host_d, double **host_offset);
|
||||
|
||||
/// Clear all host and device data
|
||||
/** \note This is called at the beginning of the init() routine **/
|
||||
void clear();
|
||||
|
|
|
@ -92,6 +92,32 @@ int born_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
|
|||
return init_ok;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Copy updated coeffs from host to device
|
||||
// ---------------------------------------------------------------------------
|
||||
void born_gpu_reinit(const int ntypes, double **host_rhoinv,
|
||||
double **host_born1, double **host_born2,
|
||||
double **host_born3, double **host_a, double **host_c,
|
||||
double **host_d, double **offset) {
|
||||
int world_me=BORNMF.device->world_me();
|
||||
int gpu_rank=BORNMF.device->gpu_rank();
|
||||
int procs_per_gpu=BORNMF.device->procs_per_gpu();
|
||||
|
||||
if (world_me==0)
|
||||
BORNMF.reinit(ntypes, host_rhoinv, host_born1, host_born2,
|
||||
host_born3, host_a, host_c, host_d, offset);
|
||||
|
||||
BORNMF.device->world_barrier();
|
||||
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
BORNMF.reinit(ntypes, host_rhoinv, host_born1, host_born2,
|
||||
host_born3, host_a, host_c, host_d, offset);
|
||||
|
||||
BORNMF.device->gpu_barrier();
|
||||
}
|
||||
}
|
||||
|
||||
void born_gpu_clear() {
|
||||
BORNMF.clear();
|
||||
}
|
||||
|
|
|
@ -91,6 +91,24 @@ int BuckT::init(const int ntypes, double **host_cutsq,
|
|||
return 0;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void BuckT::reinit(const int ntypes, double **host_cutsq,
|
||||
double **host_rhoinv, double **host_buck1, double **host_buck2,
|
||||
double **host_a, double **host_c, double **host_offset) {
|
||||
|
||||
// Allocate a host write buffer for data initialization
|
||||
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
|
||||
UCL_WRITE_ONLY);
|
||||
|
||||
for (int i=0; i<_lj_types*_lj_types; i++)
|
||||
host_write[i]=0.0;
|
||||
|
||||
this->atom->type_pack4(ntypes,_lj_types,coeff1,host_write,host_rhoinv,
|
||||
host_buck1,host_buck2,host_cutsq);
|
||||
this->atom->type_pack4(ntypes,_lj_types,coeff2,host_write,host_a,host_c,
|
||||
host_offset);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void BuckT::clear() {
|
||||
if (!_allocated)
|
||||
|
|
|
@ -45,6 +45,11 @@ class Buck : public BaseAtomic<numtyp, acctyp> {
|
|||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *screen);
|
||||
|
||||
/// Send updated coeffs from host to device (to be compatible with fix adapt)
|
||||
void reinit(const int ntypes, double **host_cutsq,
|
||||
double **host_rhoinv, double **host_buck1, double **host_buck2,
|
||||
double **host_a, double **host_c, double **host_offset);
|
||||
|
||||
/// Clear all host and device data
|
||||
/** \note This is called at the beginning of the init() routine **/
|
||||
void clear();
|
||||
|
|
|
@ -89,6 +89,31 @@ int buck_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
|
|||
return init_ok;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Copy updated coeffs from host to device
|
||||
// ---------------------------------------------------------------------------
|
||||
void buck_gpu_reinit(const int ntypes, double **cutsq, double **host_rhoinv,
|
||||
double **host_buck1, double **host_buck2,
|
||||
double **host_a, double **host_c, double **offset) {
|
||||
int world_me=BUCKMF.device->world_me();
|
||||
int gpu_rank=BUCKMF.device->gpu_rank();
|
||||
int procs_per_gpu=BUCKMF.device->procs_per_gpu();
|
||||
|
||||
if (world_me==0)
|
||||
BUCKMF.reinit(ntypes, cutsq, host_rhoinv, host_buck1, host_buck2,
|
||||
host_a, host_c, offset);
|
||||
|
||||
BUCKMF.device->world_barrier();
|
||||
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
BUCKMF.reinit(ntypes, cutsq, host_rhoinv, host_buck1, host_buck2,
|
||||
host_a, host_c, offset);
|
||||
|
||||
BUCKMF.device->gpu_barrier();
|
||||
}
|
||||
}
|
||||
|
||||
void buck_gpu_clear() {
|
||||
BUCKMF.clear();
|
||||
}
|
||||
|
|
|
@ -43,21 +43,19 @@ int CoulLongT::bytes_per_atom(const int max_nbors) const {
|
|||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int CoulLongT::init(const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *_screen,
|
||||
const double host_cut_coulsq, double *host_special_coul,
|
||||
const double qqrd2e, const double g_ewald) {
|
||||
int CoulLongT::init(const int ntypes, double **host_scale,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *_screen,
|
||||
const double host_cut_coulsq, double *host_special_coul,
|
||||
const double qqrd2e, const double g_ewald) {
|
||||
int success;
|
||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,
|
||||
gpu_split,_screen,coul_long,"k_coul_long");
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
// we don't have atom types for coulomb only,
|
||||
// but go with the minimum so that we can use
|
||||
// the same infrastructure as lj/cut/coul/long/gpu.
|
||||
int lj_types=1;
|
||||
int lj_types=ntypes;
|
||||
shared_types=false;
|
||||
int max_shared_types=this->device->max_shared_types();
|
||||
if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
|
||||
|
@ -69,13 +67,16 @@ int CoulLongT::init(const int nlocal, const int nall, const int max_nbors,
|
|||
// Allocate a host write buffer for data initialization
|
||||
UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
|
||||
UCL_WRITE_ONLY);
|
||||
|
||||
|
||||
for (int i=0; i<lj_types*lj_types; i++)
|
||||
host_write[i]=0.0;
|
||||
|
||||
lj1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
lj3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
|
||||
|
||||
scale.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack1(ntypes,lj_types,scale,host_write,host_scale);
|
||||
|
||||
sp_cl.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
|
||||
for (int i=0; i<4; i++) {
|
||||
host_write[i]=host_special_coul[i];
|
||||
|
@ -87,10 +88,18 @@ int CoulLongT::init(const int nlocal, const int nall, const int max_nbors,
|
|||
_g_ewald=g_ewald;
|
||||
|
||||
_allocated=true;
|
||||
this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+sp_cl.row_bytes();
|
||||
this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+scale.row_bytes()+
|
||||
sp_cl.row_bytes();
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void CoulLongT::reinit(const int ntypes, double **host_scale) {
|
||||
UCL_H_Vec<numtyp> hscale(_lj_types*_lj_types,*(this->ucl_device),
|
||||
UCL_WRITE_ONLY);
|
||||
this->atom->type_pack1(ntypes,_lj_types,scale,hscale,host_scale);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void CoulLongT::clear() {
|
||||
if (!_allocated)
|
||||
|
@ -99,6 +108,7 @@ void CoulLongT::clear() {
|
|||
|
||||
lj1.clear();
|
||||
lj3.clear();
|
||||
scale.clear();
|
||||
sp_cl.clear();
|
||||
this->clear_atomic();
|
||||
}
|
||||
|
@ -134,7 +144,7 @@ void CoulLongT::loop(const bool _eflag, const bool _vflag) {
|
|||
this->time_pair.start();
|
||||
if (shared_types) {
|
||||
this->k_pair_fast.set_size(GX,BX);
|
||||
this->k_pair_fast.run(&this->atom->x, &lj1, &lj3, &sp_cl,
|
||||
this->k_pair_fast.run(&this->atom->x, &scale, &sp_cl,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->ans->force, &this->ans->engv,
|
||||
&eflag, &vflag, &ainum, &nbor_pitch,
|
||||
|
@ -142,7 +152,7 @@ void CoulLongT::loop(const bool _eflag, const bool _vflag) {
|
|||
&this->_threads_per_atom);
|
||||
} else {
|
||||
this->k_pair.set_size(GX,BX);
|
||||
this->k_pair.run(&this->atom->x, &lj1, &lj3, &_lj_types, &sp_cl,
|
||||
this->k_pair.run(&this->atom->x, &scale, &_lj_types, &sp_cl,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->ans->force, &this->ans->engv, &eflag, &vflag,
|
||||
&ainum, &nbor_pitch, &this->atom->q, &_cut_coulsq,
|
||||
|
|
|
@ -124,8 +124,7 @@ texture<int2> q_tex;
|
|||
#endif
|
||||
|
||||
__kernel void k_coul_long(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const __global numtyp *restrict scale,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_cl_in,
|
||||
const __global int *dev_nbor,
|
||||
|
@ -161,6 +160,7 @@ __kernel void k_coul_long(const __global numtyp4 *restrict x_,
|
|||
n_stride,list_end,nbor);
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
int itype=ix.w;
|
||||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
|
||||
for ( ; nbor<list_end; nbor+=n_stride) {
|
||||
|
@ -171,24 +171,26 @@ __kernel void k_coul_long(const __global numtyp4 *restrict x_,
|
|||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
|
||||
int jtype=jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp delx = ix.x-jx.x;
|
||||
numtyp dely = ix.y-jx.y;
|
||||
numtyp delz = ix.z-jx.z;
|
||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
|
||||
int mtype=itype*lj_types+jtype;
|
||||
if (rsq < cut_coulsq) {
|
||||
numtyp r2inv=ucl_recip(rsq);
|
||||
numtyp force, prefactor, _erfc;
|
||||
|
||||
|
||||
numtyp r = ucl_rsqrt(r2inv);
|
||||
numtyp grij = g_ewald * r;
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
fetch(prefactor,j,q_tex);
|
||||
prefactor *= qqrd2e * qtmp/r;
|
||||
prefactor *= qqrd2e * scale[mtype] * qtmp/r;
|
||||
force = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul) * r2inv;
|
||||
|
||||
f.x+=delx*force;
|
||||
|
@ -196,7 +198,7 @@ __kernel void k_coul_long(const __global numtyp4 *restrict x_,
|
|||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
}
|
||||
if (vflag>0) {
|
||||
virial[0] += delx*delx*force;
|
||||
|
@ -215,8 +217,7 @@ __kernel void k_coul_long(const __global numtyp4 *restrict x_,
|
|||
}
|
||||
|
||||
__kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict scale_in,
|
||||
const __global numtyp *restrict sp_cl_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
|
@ -230,10 +231,14 @@ __kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
|
|||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
__local numtyp scale[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__local numtyp sp_cl[4];
|
||||
if (tid<4)
|
||||
sp_cl[tid]=sp_cl_in[tid];
|
||||
|
||||
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
|
||||
scale[tid]=scale_in[tid];
|
||||
}
|
||||
|
||||
acctyp e_coul=(acctyp)0;
|
||||
acctyp4 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
|
@ -252,7 +257,9 @@ __kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
|
|||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
|
||||
int iw = ix.w;
|
||||
int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
|
||||
|
||||
for ( ; nbor<list_end; nbor+=n_stride) {
|
||||
int j=*nbor;
|
||||
|
||||
|
@ -261,7 +268,8 @@ __kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
|
|||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
|
||||
int mtype=itype+jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp delx = ix.x-jx.x;
|
||||
numtyp dely = ix.y-jx.y;
|
||||
|
@ -272,13 +280,13 @@ __kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
|
|||
numtyp r2inv=ucl_recip(rsq);
|
||||
numtyp force, prefactor, _erfc;
|
||||
|
||||
numtyp r = ucl_rsqrt(r2inv);
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
numtyp grij = g_ewald * r;
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
fetch(prefactor,j,q_tex);
|
||||
prefactor *= qqrd2e * qtmp/r;
|
||||
prefactor *= qqrd2e * scale[mtype] * qtmp/r;
|
||||
force = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul) * r2inv;
|
||||
|
||||
f.x+=delx*force;
|
||||
|
@ -286,7 +294,7 @@ __kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
|
|||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
}
|
||||
if (vflag>0) {
|
||||
virial[0] += delx*delx*force;
|
||||
|
|
|
@ -37,12 +37,16 @@ class CoulLong : public BaseCharge<numtyp, acctyp> {
|
|||
* - -3 if there is an out of memory error
|
||||
* - -4 if the GPU library was not compiled for GPU
|
||||
* - -5 Double precision is not supported on card **/
|
||||
int init(const int nlocal, const int nall, const int max_nbors,
|
||||
int init(const int ntypes, double **scale,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *screen,
|
||||
const double host_cut_coulsq, double *host_special_coul,
|
||||
const double qqrd2e, const double g_ewald);
|
||||
|
||||
const double gpu_split, FILE *screen,
|
||||
const double host_cut_coulsq, double *host_special_coul,
|
||||
const double qqrd2e, const double g_ewald);
|
||||
|
||||
/// Send updated coeffs from host to device (to be compatible with fix adapt)
|
||||
void reinit(const int ntypes, double **scale);
|
||||
|
||||
/// Clear all host and device data
|
||||
/** \note This is called at the beginning of the init() routine **/
|
||||
void clear();
|
||||
|
@ -59,6 +63,8 @@ class CoulLong : public BaseCharge<numtyp, acctyp> {
|
|||
UCL_D_Vec<numtyp4> lj1;
|
||||
/// lj3 dummy
|
||||
UCL_D_Vec<numtyp4> lj3;
|
||||
/// scale
|
||||
UCL_D_Vec<numtyp> scale;
|
||||
/// Special Coul values [0-3]
|
||||
UCL_D_Vec<numtyp> sp_cl;
|
||||
|
||||
|
|
|
@ -27,10 +27,11 @@ static CoulLong<PRECISION,ACC_PRECISION> CLMF;
|
|||
// ---------------------------------------------------------------------------
|
||||
// Allocate memory on host and device and copy constants to device
|
||||
// ---------------------------------------------------------------------------
|
||||
int cl_gpu_init(const int inum, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size, int &gpu_mode,
|
||||
FILE *screen, double host_cut_coulsq, double *host_special_coul,
|
||||
const double qqrd2e, const double g_ewald) {
|
||||
int cl_gpu_init(const int ntypes, double **host_scale,
|
||||
const int inum, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size, int &gpu_mode,
|
||||
FILE *screen, double host_cut_coulsq, double *host_special_coul,
|
||||
const double qqrd2e, const double g_ewald) {
|
||||
CLMF.clear();
|
||||
gpu_mode=CLMF.device->gpu_mode();
|
||||
double gpu_split=CLMF.device->particle_split();
|
||||
|
@ -53,9 +54,9 @@ int cl_gpu_init(const int inum, const int nall, const int max_nbors,
|
|||
|
||||
int init_ok=0;
|
||||
if (world_me==0)
|
||||
init_ok=CLMF.init(inum, nall, 300, maxspecial, cell_size, gpu_split,
|
||||
screen, host_cut_coulsq, host_special_coul, qqrd2e,
|
||||
g_ewald);
|
||||
init_ok=CLMF.init(ntypes, host_scale, inum, nall, 300, maxspecial,
|
||||
cell_size, gpu_split, screen, host_cut_coulsq,
|
||||
host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
CLMF.device->world_barrier();
|
||||
if (message)
|
||||
|
@ -71,9 +72,9 @@ int cl_gpu_init(const int inum, const int nall, const int max_nbors,
|
|||
fflush(screen);
|
||||
}
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
init_ok=CLMF.init(inum, nall, 300, maxspecial, cell_size, gpu_split,
|
||||
screen, host_cut_coulsq, host_special_coul,
|
||||
qqrd2e, g_ewald);
|
||||
init_ok=CLMF.init(ntypes, host_scale, inum, nall, 300, maxspecial,
|
||||
cell_size, gpu_split, screen, host_cut_coulsq,
|
||||
host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
CLMF.device->gpu_barrier();
|
||||
if (message)
|
||||
|
@ -87,6 +88,27 @@ int cl_gpu_init(const int inum, const int nall, const int max_nbors,
|
|||
return init_ok;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Copy updated coeffs from host to device
|
||||
// ---------------------------------------------------------------------------
|
||||
void cl_gpu_reinit(const int ntypes, double **host_scale) {
|
||||
int world_me=CLMF.device->world_me();
|
||||
int gpu_rank=CLMF.device->gpu_rank();
|
||||
int procs_per_gpu=CLMF.device->procs_per_gpu();
|
||||
|
||||
if (world_me==0)
|
||||
CLMF.reinit(ntypes, host_scale);
|
||||
|
||||
CLMF.device->world_barrier();
|
||||
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
CLMF.reinit(ntypes, host_scale);
|
||||
|
||||
CLMF.device->gpu_barrier();
|
||||
}
|
||||
}
|
||||
|
||||
void cl_gpu_clear() {
|
||||
CLMF.clear();
|
||||
}
|
||||
|
|
|
@ -87,6 +87,21 @@ int GaussT::init(const int ntypes,
|
|||
return 0;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void GaussT::reinit(const int ntypes, double **host_cutsq, double **host_a,
|
||||
double **host_b, double **host_offset) {
|
||||
|
||||
// Allocate a host write buffer for data initialization
|
||||
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
|
||||
UCL_WRITE_ONLY);
|
||||
|
||||
for (int i=0; i<_lj_types*_lj_types; i++)
|
||||
host_write[i]=0.0;
|
||||
|
||||
this->atom->type_pack4(ntypes,_lj_types,gauss1,host_write,host_a,host_b,
|
||||
host_cutsq,host_offset);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void GaussT::clear() {
|
||||
if (!_allocated)
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
***************************************************************************/
|
||||
|
||||
#ifndef LAL_GAUSS_H
|
||||
#define LAL_GAYSS_H
|
||||
#define LAL_GAUSS_H
|
||||
|
||||
#include "lal_base_atomic.h"
|
||||
|
||||
|
@ -43,7 +43,11 @@ class Gauss : public BaseAtomic<numtyp, acctyp> {
|
|||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *screen);
|
||||
|
||||
|
||||
/// Send updated coeffs from host to device (to be compatible with fix adapt)
|
||||
void reinit(const int ntypes, double **host_cutsq,
|
||||
double **host_a, double **host_b, double **host_offset);
|
||||
|
||||
/// Clear all host and device data
|
||||
/** \note This is called at the beginning of the init() routine **/
|
||||
void clear();
|
||||
|
|
|
@ -88,6 +88,28 @@ int gauss_gpu_init(const int ntypes, double **cutsq, double **host_a,
|
|||
return init_ok;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Copy updated coeffs from host to device
|
||||
// ---------------------------------------------------------------------------
|
||||
void gauss_gpu_reinit(const int ntypes, double **cutsq, double **host_a,
|
||||
double **host_b, double **offset) {
|
||||
int world_me=GLMF.device->world_me();
|
||||
int gpu_rank=GLMF.device->gpu_rank();
|
||||
int procs_per_gpu=GLMF.device->procs_per_gpu();
|
||||
|
||||
if (world_me==0)
|
||||
GLMF.reinit(ntypes, cutsq, host_a, host_b, offset);
|
||||
|
||||
GLMF.device->world_barrier();
|
||||
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
GLMF.reinit(ntypes, cutsq, host_a, host_b, offset);
|
||||
|
||||
GLMF.device->gpu_barrier();
|
||||
}
|
||||
}
|
||||
|
||||
void gauss_gpu_clear() {
|
||||
GLMF.clear();
|
||||
}
|
||||
|
|
|
@ -92,6 +92,23 @@ int LJT::init(const int ntypes,
|
|||
return 0;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void LJT::reinit(const int ntypes, double **host_cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **host_offset) {
|
||||
// Allocate a host write buffer for data initialization
|
||||
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
|
||||
UCL_WRITE_ONLY);
|
||||
|
||||
for (int i=0; i<_lj_types*_lj_types; i++)
|
||||
host_write[i]=0.0;
|
||||
|
||||
this->atom->type_pack4(ntypes,_lj_types,lj1,host_write,host_lj1,host_lj2,
|
||||
host_cutsq);
|
||||
this->atom->type_pack4(ntypes,_lj_types,lj3,host_write,host_lj3,host_lj4,
|
||||
host_offset);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void LJT::clear() {
|
||||
if (!_allocated)
|
||||
|
|
|
@ -43,7 +43,12 @@ class LJ : public BaseAtomic<numtyp, acctyp> {
|
|||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *screen);
|
||||
|
||||
|
||||
/// Send updated coeffs from host to device (to be compatible with fix adapt)
|
||||
void reinit(const int ntypes, double **host_cutsq,
|
||||
double **host_lj1, double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **host_offset);
|
||||
|
||||
/// Clear all host and device data
|
||||
/** \note This is called at the beginning of the init() routine **/
|
||||
void clear();
|
||||
|
|
|
@ -92,6 +92,26 @@ int LJExpandT::init(const int ntypes, double **host_cutsq,
|
|||
return 0;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void LJExpandT::reinit(const int ntypes, double **host_cutsq,
|
||||
double **host_lj1, double **host_lj2,
|
||||
double **host_lj3, double **host_lj4,
|
||||
double **host_offset, double **host_shift) {
|
||||
|
||||
// Allocate a host write buffer for data initialization
|
||||
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
|
||||
UCL_WRITE_ONLY);
|
||||
|
||||
for (int i=0; i<_lj_types*_lj_types; i++)
|
||||
host_write[i]=0.0;
|
||||
|
||||
this->atom->type_pack4(ntypes,_lj_types,lj1,host_write,host_lj1,host_lj2,
|
||||
host_cutsq, host_shift);
|
||||
|
||||
this->atom->type_pack4(ntypes,_lj_types,lj3,host_write,host_lj3,host_lj4,
|
||||
host_offset);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void LJExpandT::clear() {
|
||||
if (!_allocated)
|
||||
|
|
|
@ -43,7 +43,12 @@ class LJExpand : public BaseAtomic<numtyp, acctyp> {
|
|||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *screen);
|
||||
|
||||
|
||||
/// Send updated coeffs from host to device (to be compatible with fix adapt)
|
||||
void reinit(const int ntypes, double **host_cutsq,
|
||||
double **host_lj1, double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **host_offset, double **host_shift);
|
||||
|
||||
/// Clear all host and device data
|
||||
/** \note This is called at the beginning of the init() routine **/
|
||||
void clear();
|
||||
|
|
|
@ -89,6 +89,29 @@ int lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
|||
return init_ok;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Copy updated coeffs from host to device
|
||||
// ---------------------------------------------------------------------------
|
||||
int lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **offset, double **shift) {
|
||||
int world_me=LJEMF.device->world_me();
|
||||
int gpu_rank=LJEMF.device->gpu_rank();
|
||||
int procs_per_gpu=LJEMF.device->procs_per_gpu();
|
||||
|
||||
if (world_me==0)
|
||||
LJEMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
|
||||
offset, shift);
|
||||
LJEMF.device->world_barrier();
|
||||
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
LJEMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
|
||||
offset, shift);
|
||||
LJEMF.device->gpu_barrier();
|
||||
}
|
||||
}
|
||||
|
||||
void lje_gpu_clear() {
|
||||
LJEMF.clear();
|
||||
}
|
||||
|
|
|
@ -88,6 +88,27 @@ int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
|||
return init_ok;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Copy updated coeffs from host to device
|
||||
// ---------------------------------------------------------------------------
|
||||
void ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **offset) {
|
||||
int world_me=LJLMF.device->world_me();
|
||||
int gpu_rank=LJLMF.device->gpu_rank();
|
||||
int procs_per_gpu=LJLMF.device->procs_per_gpu();
|
||||
|
||||
if (world_me==0)
|
||||
LJLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset);
|
||||
LJLMF.device->world_barrier();
|
||||
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
LJLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset);
|
||||
LJLMF.device->gpu_barrier();
|
||||
}
|
||||
}
|
||||
|
||||
void ljl_gpu_clear() {
|
||||
LJLMF.clear();
|
||||
}
|
||||
|
|
|
@ -86,6 +86,21 @@ int SoftT::init(const int ntypes, double **host_cutsq,
|
|||
return 0;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void SoftT::reinit(const int ntypes, double **host_cutsq,
|
||||
double **host_prefactor, double **host_cut) {
|
||||
|
||||
// Allocate a host write buffer for data initialization
|
||||
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
|
||||
UCL_WRITE_ONLY);
|
||||
|
||||
for (int i=0; i<_lj_types*_lj_types; i++)
|
||||
host_write[i]=0.0;
|
||||
|
||||
this->atom->type_pack4(ntypes,_lj_types,coeff,host_write,host_prefactor,
|
||||
host_cut,host_cutsq);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void SoftT::clear() {
|
||||
if (!_allocated)
|
||||
|
|
|
@ -13,8 +13,8 @@
|
|||
email : nguyentd@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef LAL_GAUSS_H
|
||||
#define LAL_GAYSS_H
|
||||
#ifndef LAL_SOFT_H
|
||||
#define LAL_SOFT_H
|
||||
|
||||
#include "lal_base_atomic.h"
|
||||
|
||||
|
@ -44,6 +44,10 @@ class Soft : public BaseAtomic<numtyp, acctyp> {
|
|||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *screen);
|
||||
|
||||
/// Send updated coeffs from host to device (to be compatible with fix adapt)
|
||||
void reinit(const int ntypes, double **host_cutsq,
|
||||
double **host_prefactor, double **host_cut);
|
||||
|
||||
/// Clear all host and device data
|
||||
/** \note This is called at the beginning of the init() routine **/
|
||||
void clear();
|
||||
|
|
|
@ -88,6 +88,28 @@ int soft_gpu_init(const int ntypes, double **cutsq, double **host_prefactor,
|
|||
return init_ok;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Copy updated constants to device
|
||||
// ---------------------------------------------------------------------------
|
||||
void soft_gpu_reinit(const int ntypes, double **cutsq, double **host_prefactor,
|
||||
double **host_cut) {
|
||||
int world_me=SLMF.device->world_me();
|
||||
int gpu_rank=SLMF.device->gpu_rank();
|
||||
int procs_per_gpu=SLMF.device->procs_per_gpu();
|
||||
|
||||
if (world_me==0)
|
||||
SLMF.reinit(ntypes, cutsq, host_prefactor, host_cut);
|
||||
|
||||
SLMF.device->world_barrier();
|
||||
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
SLMF.reinit(ntypes, cutsq, host_prefactor, host_cut);
|
||||
|
||||
SLMF.device->gpu_barrier();
|
||||
}
|
||||
}
|
||||
|
||||
void soft_gpu_clear() {
|
||||
SLMF.clear();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue