forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@8810 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
8bcf139b73
commit
209999068c
|
@ -87,7 +87,7 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
|||
$(OBJ_DIR)/eam.cubin $(OBJ_DIR)/eam_cubin.h \
|
||||
$(OBJ_DIR)/buck.cubin $(OBJ_DIR)/buck_cubin.h \
|
||||
$(OBJ_DIR)/buck_coul_long.cubin $(OBJ_DIR)/buck_coul_long_cubin.h \
|
||||
$(OBJ_DIR)/buck_coul_wolf.cubin $(OBJ_DIR)/buck_coul_wolf_cubin.h \
|
||||
$(OBJ_DIR)/buck_coul.cubin $(OBJ_DIR)/buck_coul_cubin.h \
|
||||
$(OBJ_DIR)/table.cubin $(OBJ_DIR)/table_cubin.h \
|
||||
$(OBJ_DIR)/yukawa.cubin $(OBJ_DIR)/yukawa_cubin.h \
|
||||
$(OBJ_DIR)/born.cubin $(OBJ_DIR)/born_cubin.h \
|
||||
|
|
|
@ -64,16 +64,11 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
|
|||
$(OBJ_DIR)/eam_cl.h $(OBJ_DIR)/buck_cl.h \
|
||||
$(OBJ_DIR)/buck_coul_cl.h $(OBJ_DIR)/buck_coul_long_cl.h \
|
||||
$(OBJ_DIR)/table_cl.h $(OBJ_DIR)/yukawa_cl.h \
|
||||
$(OBJ_DIR)/born.cubin $(OBJ_DIR)/born_cubin.h \
|
||||
$(OBJ_DIR)/born_coul_wolf.cubin $(OBJ_DIR)/born_coul_wolf_cubin.h \
|
||||
$(OBJ_DIR)/born_coul_long.cubin $(OBJ_DIR)/born_coul_long_cubin.h \
|
||||
$(OBJ_DIR)/dipole_lj.cubin $(OBJ_DIR)/dipole_lj_cubin.h \
|
||||
$(OBJ_DIR)/dipole_lj_sf.cubin $(OBJ_DIR)/dipole_lj_sf_cubin.h \
|
||||
$(OBJ_DIR)/colloid.cubin $(OBJ_DIR)/colloid_cubin.h \
|
||||
$(OBJ_DIR)/gauss.cubin $(OBJ_DIR)/gauss_cubin.h \
|
||||
$(OBJ_DIR)/yukawa_colloid.cubin $(OBJ_DIR)/yukawa_colloid_cubin.h \
|
||||
$(OBJ_DIR)/lj_coul_debye.cubin $(OBJ_DIR)/lj_coul_debye_cubin.h \
|
||||
$(OBJ_DIR)/coul_dsf.cubin $(OBJ_DIR)/coul_dsf_cubin.h
|
||||
$(OBJ_DIR)/born_cl.h $(OBJ_DIR)/born_coul_wolf_cl.h \
|
||||
$(OBJ_DIR)/born_coul_long_cl.h $(OBJ_DIR)/dipole_lj_cl.h \
|
||||
$(OBJ_DIR)/dipole_lj_sf_cl.h $(OBJ_DIR)/colloid_cl.h \
|
||||
$(OBJ_DIR)/gauss_cl.h $(OBJ_DIR)/yukawa_colloid_cl.h \
|
||||
$(OBJ_DIR)/lj_coul_debye_cl.h $(OBJ_DIR)/coul_dsf_cl.h
|
||||
|
||||
|
||||
OCL_EXECS = $(BIN_DIR)/ocl_get_devices
|
||||
|
@ -131,11 +126,11 @@ $(OBJ_DIR)/lal_pppm_ext.o: $(ALL_H) lal_pppm.h lal_pppm_ext.cpp
|
|||
$(OBJ_DIR)/ellipsoid_nbor_cl.h: lal_ellipsoid_nbor.cu lal_preprocessor.h
|
||||
$(BSH) ./geryon/file_to_cstr.sh ellipsoid_nbor lal_preprocessor.h lal_ellipsoid_nbor.cu $(OBJ_DIR)/ellipsoid_nbor_cl.h
|
||||
|
||||
$(OBJ_DIR)/gayberne_cl.h: lal_gayberne.cu lal_ellipsoid_extra.h lal_preprocessor.h
|
||||
$(BSH) ./geryon/file_to_cstr.sh gayberne lal_preprocessor.h lal_ellipsoid_extra.h lal_gayberne.cu $(OBJ_DIR)/gayberne_cl.h;
|
||||
$(OBJ_DIR)/gayberne_cl.h: lal_gayberne.cu lal_ellipsoid_extra.h lal_aux_fun1.h lal_preprocessor.h
|
||||
$(BSH) ./geryon/file_to_cstr.sh gayberne lal_preprocessor.h lal_aux_fun1.h lal_ellipsoid_extra.h lal_gayberne.cu $(OBJ_DIR)/gayberne_cl.h;
|
||||
|
||||
$(OBJ_DIR)/gayberne_lj_cl.h: lal_gayberne_lj.cu lal_ellipsoid_extra.h lal_preprocessor.h
|
||||
$(BSH) ./geryon/file_to_cstr.sh gayberne_lj lal_preprocessor.h lal_ellipsoid_extra.h lal_gayberne_lj.cu $(OBJ_DIR)/gayberne_lj_cl.h;
|
||||
$(OBJ_DIR)/gayberne_lj_cl.h: lal_gayberne_lj.cu lal_ellipsoid_extra.h lal_aux_fun1.h lal_preprocessor.h
|
||||
$(BSH) ./geryon/file_to_cstr.sh gayberne_lj lal_preprocessor.h lal_aux_fun1.h lal_ellipsoid_extra.h lal_gayberne_lj.cu $(OBJ_DIR)/gayberne_lj_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lal_gayberne.o: $(ALL_H) lal_gayberne.h lal_gayberne.cpp $(OBJ_DIR)/gayberne_cl.h $(OBJ_DIR)/gayberne_lj_cl.h $(OBJ_DIR)/lal_base_ellipsoid.o
|
||||
$(OCL) -o $@ -c lal_gayberne.cpp -I$(OBJ_DIR)
|
||||
|
@ -143,11 +138,11 @@ $(OBJ_DIR)/lal_gayberne.o: $(ALL_H) lal_gayberne.h lal_gayberne.cpp $(OBJ_DIR)/g
|
|||
$(OBJ_DIR)/lal_gayberne_ext.o: $(ALL_H) $(OBJ_DIR)/lal_gayberne.o lal_gayberne_ext.cpp
|
||||
$(OCL) -o $@ -c lal_gayberne_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/re_squared_cl.h: lal_re_squared.cu lal_ellipsoid_extra.h lal_preprocessor.h
|
||||
$(BSH) ./geryon/file_to_cstr.sh re_squared lal_preprocessor.h lal_ellipsoid_extra.h lal_re_squared.cu $(OBJ_DIR)/re_squared_cl.h;
|
||||
$(OBJ_DIR)/re_squared_cl.h: lal_re_squared.cu lal_ellipsoid_extra.h lal_aux_fun1.h lal_preprocessor.h
|
||||
$(BSH) ./geryon/file_to_cstr.sh re_squared lal_preprocessor.h lal_aux_fun1.h lal_ellipsoid_extra.h lal_re_squared.cu $(OBJ_DIR)/re_squared_cl.h;
|
||||
|
||||
$(OBJ_DIR)/re_squared_lj_cl.h: lal_re_squared_lj.cu lal_ellipsoid_extra.h lal_preprocessor.h
|
||||
$(BSH) ./geryon/file_to_cstr.sh re_squared_lj lal_preprocessor.h lal_ellipsoid_extra.h lal_re_squared_lj.cu $(OBJ_DIR)/re_squared_lj_cl.h;
|
||||
$(OBJ_DIR)/re_squared_lj_cl.h: lal_re_squared_lj.cu lal_ellipsoid_extra.h lal_aux_fun1.h lal_preprocessor.h
|
||||
$(BSH) ./geryon/file_to_cstr.sh re_squared_lj lal_preprocessor.h lal_aux_fun1.h lal_ellipsoid_extra.h lal_re_squared_lj.cu $(OBJ_DIR)/re_squared_lj_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lal_re_squared.o: $(ALL_H) lal_re_squared.h lal_re_squared.cpp $(OBJ_DIR)/re_squared_cl.h $(OBJ_DIR)/re_squared_lj_cl.h $(OBJ_DIR)/lal_base_ellipsoid.o
|
||||
$(OCL) -o $@ -c lal_re_squared.cpp -I$(OBJ_DIR)
|
||||
|
|
|
@ -17,8 +17,10 @@
|
|||
#include "lal_preprocessor.h"
|
||||
#endif
|
||||
|
||||
__kernel void kernel_cast_x(__global numtyp4 *x_type, __global double *x,
|
||||
__global int *type, const int nall) {
|
||||
__kernel void kernel_cast_x(__global numtyp4 *restrict x_type,
|
||||
const __global double *restrict x,
|
||||
const __global int *restrict type,
|
||||
const int nall) {
|
||||
int ii=GLOBAL_ID_X;
|
||||
|
||||
if (ii<nall) {
|
||||
|
|
|
@ -41,6 +41,8 @@
|
|||
nbor+=offset; \
|
||||
}
|
||||
|
||||
#if (ARCH < 300)
|
||||
|
||||
#define store_answers(f, energy, virial, ii, inum, tid, t_per_atom, offset, \
|
||||
eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
|
@ -137,3 +139,72 @@
|
|||
ans[ii]=f; \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define store_answers(f, energy, virial, ii, inum, tid, t_per_atom, offset, \
|
||||
eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
f.x += shfl_xor(f.x, s, t_per_atom); \
|
||||
f.y += shfl_xor(f.y, s, t_per_atom); \
|
||||
f.z += shfl_xor(f.z, s, t_per_atom); \
|
||||
energy += shfl_xor(energy, s, t_per_atom); \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
virial[r] += shfl_xor(virial[r], s, t_per_atom); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (offset==0) { \
|
||||
engv+=ii; \
|
||||
if (eflag>0) { \
|
||||
*engv=energy; \
|
||||
engv+=inum; \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (int i=0; i<6; i++) { \
|
||||
*engv=virial[i]; \
|
||||
engv+=inum; \
|
||||
} \
|
||||
} \
|
||||
ans[ii]=f; \
|
||||
}
|
||||
|
||||
#define store_answers_q(f, energy, e_coul, virial, ii, inum, tid, \
|
||||
t_per_atom, offset, eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
f.x += shfl_xor(f.x, s, t_per_atom); \
|
||||
f.y += shfl_xor(f.y, s, t_per_atom); \
|
||||
f.z += shfl_xor(f.z, s, t_per_atom); \
|
||||
energy += shfl_xor(energy, s, t_per_atom); \
|
||||
e_coul += shfl_xor(e_coul, s, t_per_atom); \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
virial[r] += shfl_xor(virial[r], s, t_per_atom); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (offset==0) { \
|
||||
engv+=ii; \
|
||||
if (eflag>0) { \
|
||||
*engv=energy; \
|
||||
engv+=inum; \
|
||||
*engv=e_coul; \
|
||||
engv+=inum; \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (int i=0; i<6; i++) { \
|
||||
*engv=virial[i]; \
|
||||
engv+=inum; \
|
||||
} \
|
||||
} \
|
||||
ans[ii]=f; \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -132,7 +132,7 @@ int * BaseAtomicT::reset_nbors(const int nall, const int inum, int *ilist,
|
|||
resize_atom(inum,nall,success);
|
||||
resize_local(inum,mn,success);
|
||||
if (!success)
|
||||
return false;
|
||||
return NULL;
|
||||
|
||||
nbor->get_host(inum,ilist,numj,firstneigh,block_size());
|
||||
|
||||
|
|
|
@ -135,7 +135,7 @@ int * BaseChargeT::reset_nbors(const int nall, const int inum, int *ilist,
|
|||
resize_atom(inum,nall,success);
|
||||
resize_local(inum,mn,success);
|
||||
if (!success)
|
||||
return false;
|
||||
return NULL;
|
||||
|
||||
nbor->get_host(inum,ilist,numj,firstneigh,block_size());
|
||||
|
||||
|
|
|
@ -137,7 +137,7 @@ int * BaseDipoleT::reset_nbors(const int nall, const int inum, int *ilist,
|
|||
resize_atom(inum,nall,success);
|
||||
resize_local(inum,mn,success);
|
||||
if (!success)
|
||||
return false;
|
||||
return NULL;
|
||||
|
||||
nbor->get_host(inum,ilist,numj,firstneigh,block_size());
|
||||
|
||||
|
|
|
@ -80,7 +80,7 @@ int BaseEllipsoidT::init_base(const int nlocal, const int nall,
|
|||
ucl_device=device->gpu;
|
||||
atom=&device->atom;
|
||||
|
||||
_block_size=device->pair_block_size();
|
||||
_block_size=device->block_ellipse();
|
||||
compile_kernels(*ucl_device,ellipsoid_program,lj_program,k_name,ellip_sphere);
|
||||
|
||||
// Initialize host-device load balancer
|
||||
|
@ -118,9 +118,8 @@ int BaseEllipsoidT::init_base(const int nlocal, const int nall,
|
|||
ans->force.zero();
|
||||
|
||||
// Memory for ilist ordered by particle type
|
||||
if (host_olist.alloc(nbor->max_atoms(),*ucl_device)==UCL_SUCCESS)
|
||||
return 0;
|
||||
else return -3;
|
||||
if (host_olist.alloc(nbor->max_atoms(),*ucl_device)!=UCL_SUCCESS)
|
||||
return -3;
|
||||
|
||||
_max_an_bytes=ans->gpu_bytes()+nbor->gpu_bytes();
|
||||
|
||||
|
|
|
@ -24,14 +24,18 @@ texture<int4,1> pos_tex;
|
|||
#define pos_tex x_
|
||||
#endif
|
||||
|
||||
__kernel void k_born(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
||||
__global numtyp4* coeff2,
|
||||
__global numtyp2 *cutsq_sigma,
|
||||
const int lj_types, __global numtyp *sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
__kernel void k_born(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1,
|
||||
const __global numtyp4 *restrict coeff2,
|
||||
const __global numtyp2 *restrict cutsq_sigma,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -49,7 +53,7 @@ __kernel void k_born(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -108,14 +112,17 @@ __kernel void k_born(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_born_fast(__global numtyp4 *x_, __global numtyp4 *coeff1_in,
|
||||
__global numtyp4* coeff2_in,
|
||||
__global numtyp2 *cutsq_sigma,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
__kernel void k_born_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1_in,
|
||||
const __global numtyp4 *restrict coeff2_in,
|
||||
const __global numtyp2 *restrict cutsq_sigma,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -140,7 +147,7 @@ __kernel void k_born_fast(__global numtyp4 *x_, __global numtyp4 *coeff1_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -29,14 +29,19 @@ texture<int2> q_tex;
|
|||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
__kernel void k_born_long(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
||||
__global numtyp4* coeff2, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
__global numtyp4 *cutsq_sigma,
|
||||
__kernel void k_born_long(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1,
|
||||
const __global numtyp4 *restrict coeff2,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp4 *restrict cutsq_sigma,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
|
@ -61,7 +66,7 @@ __kernel void k_born_long(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -143,14 +148,18 @@ __kernel void k_born_long(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_born_long_fast(__global numtyp4 *x_, __global numtyp4 *coeff1_in,
|
||||
__global numtyp4* coeff2_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
__kernel void k_born_long_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1_in,
|
||||
const __global numtyp4 *restrict coeff2_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
__global numtyp4 *cutsq_sigma,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp4 *restrict cutsq_sigma,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
|
@ -178,7 +187,7 @@ __kernel void k_born_long_fast(__global numtyp4 *x_, __global numtyp4 *coeff1_in
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -31,14 +31,19 @@ texture<int2> q_tex;
|
|||
|
||||
#define MY_PIS (acctyp)1.77245385090551602729
|
||||
|
||||
__kernel void k_born_wolf(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
||||
__global numtyp4* coeff2, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
__global numtyp4 *cutsq_sigma,
|
||||
__kernel void k_born_wolf(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1,
|
||||
const __global numtyp4 *restrict coeff2,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp4 *restrict cutsq_sigma,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp alf, const numtyp e_shift,
|
||||
const numtyp f_shift, const int t_per_atom) {
|
||||
|
@ -64,7 +69,7 @@ __kernel void k_born_wolf(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -74,7 +79,8 @@ __kernel void k_born_wolf(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
int itype=ix.w;
|
||||
|
||||
if (eflag>0) {
|
||||
acctyp e_self = -((acctyp)0.5*e_shift + alf/MY_PIS) * qtmp*qtmp*qqrd2e/(acctyp)t_per_atom;
|
||||
acctyp e_self = -((acctyp)0.5*e_shift + alf/MY_PIS) *
|
||||
qtmp*qtmp*qqrd2e/(acctyp)t_per_atom;
|
||||
e_coul += (acctyp)2.0*e_self;
|
||||
}
|
||||
|
||||
|
@ -83,7 +89,7 @@ __kernel void k_born_wolf(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
|
||||
numtyp factor_lj, factor_coul;
|
||||
factor_lj = sp_lj[sbmask(j)];
|
||||
factor_coul = (numtyp)1.0-sp_lj[sbmask(j)+4];
|
||||
factor_coul = sp_lj[sbmask(j)+4];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
|
@ -98,7 +104,7 @@ __kernel void k_born_wolf(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
int mtype=itype*lj_types+jtype;
|
||||
if (rsq<cutsq_sigma[mtype].x) { // cutsq
|
||||
numtyp r2inv = ucl_recip(rsq);
|
||||
numtyp forcecoul, forceborn, force, r6inv, prefactor, _erfc;
|
||||
numtyp forcecoul, forceborn, force, r6inv, prefactor, erfcc;
|
||||
numtyp v_sh = (numtyp)0.0;
|
||||
numtyp rexp = (numtyp)0.0;
|
||||
|
||||
|
@ -113,14 +119,14 @@ __kernel void k_born_wolf(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
if (rsq < cut_coulsq) {
|
||||
numtyp r=ucl_rsqrt(r2inv);
|
||||
numtyp arij = alf * r;
|
||||
numtyp expm2 = ucl_exp(-arij*arij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*arij);
|
||||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
numtyp erfcd = ucl_exp(-arij*arij);
|
||||
fetch(prefactor,j,q_tex);
|
||||
prefactor *= qqrd2e * qtmp/r;
|
||||
v_sh = (_erfc - e_shift*r)*prefactor;
|
||||
numtyp dvdrr = (_erfc/rsq + EWALD_F*alf*expm2/r) + f_shift;
|
||||
forcecoul = prefactor * (dvdrr*rsq-factor_coul);
|
||||
|
||||
erfcc = erfc(arij);
|
||||
v_sh = (erfcc - e_shift*r)*prefactor;
|
||||
numtyp dvdrr = (erfcc/rsq + (numtyp)2.0*alf/MY_PIS * erfcd/r) + f_shift;
|
||||
forcecoul = prefactor * dvdrr*rsq*factor_coul;
|
||||
} else forcecoul = (numtyp)0.0;
|
||||
|
||||
force = (forceborn + forcecoul) * r2inv;
|
||||
|
@ -131,8 +137,8 @@ __kernel void k_born_wolf(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
|
||||
if (eflag>0) {
|
||||
if (rsq < cut_coulsq)
|
||||
e_coul += prefactor*(v_sh-factor_coul);
|
||||
if (rsq < coeff1[mtype].w) {
|
||||
e_coul += v_sh*factor_coul;
|
||||
if (rsq < cutsq_sigma[mtype].y) {
|
||||
numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
|
||||
+ coeff2[mtype].z*r2inv*r6inv;
|
||||
energy+=factor_lj*(e-coeff2[mtype].w);
|
||||
|
@ -154,14 +160,18 @@ __kernel void k_born_wolf(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_born_wolf_fast(__global numtyp4 *x_, __global numtyp4 *coeff1_in,
|
||||
__global numtyp4* coeff2_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
__kernel void k_born_wolf_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1_in,
|
||||
const __global numtyp4 *restrict coeff2_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
__global numtyp4 *cutsq_sigma,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp4 *restrict cutsq_sigma,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp alf, const numtyp e_shift,
|
||||
const numtyp f_shift, const int t_per_atom) {
|
||||
|
@ -190,7 +200,7 @@ __kernel void k_born_wolf_fast(__global numtyp4 *x_, __global numtyp4 *coeff1_in
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -201,7 +211,8 @@ __kernel void k_born_wolf_fast(__global numtyp4 *x_, __global numtyp4 *coeff1_in
|
|||
int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
|
||||
|
||||
if (eflag>0) {
|
||||
acctyp e_self = -((acctyp)0.5*e_shift + alf/MY_PIS) * qtmp*qtmp*qqrd2e/(acctyp)t_per_atom;
|
||||
acctyp e_self = -((acctyp)0.5*e_shift + alf/MY_PIS) *
|
||||
qtmp*qtmp*qqrd2e/(acctyp)t_per_atom;
|
||||
e_coul += (acctyp)2.0*e_self;
|
||||
}
|
||||
|
||||
|
@ -210,7 +221,7 @@ __kernel void k_born_wolf_fast(__global numtyp4 *x_, __global numtyp4 *coeff1_in
|
|||
|
||||
numtyp factor_lj, factor_coul;
|
||||
factor_lj = sp_lj[sbmask(j)];
|
||||
factor_coul = (numtyp)1.0-sp_lj[sbmask(j)+4];
|
||||
factor_coul = sp_lj[sbmask(j)+4];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
|
@ -224,11 +235,11 @@ __kernel void k_born_wolf_fast(__global numtyp4 *x_, __global numtyp4 *coeff1_in
|
|||
|
||||
if (rsq<cutsq_sigma[mtype].x) {
|
||||
numtyp r2inv=ucl_recip(rsq);
|
||||
numtyp forcecoul, forceborn, force, r6inv, prefactor, _erfc;
|
||||
numtyp forcecoul, forceborn, force, r6inv, prefactor, erfcc;
|
||||
numtyp v_sh = (numtyp)0.0;
|
||||
numtyp rexp = (numtyp)0.0;
|
||||
|
||||
if (rsq < coeff1[mtype].w) {
|
||||
if (rsq < cutsq_sigma[mtype].y) {
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
|
@ -237,16 +248,16 @@ __kernel void k_born_wolf_fast(__global numtyp4 *x_, __global numtyp4 *coeff1_in
|
|||
} else forceborn = (numtyp)0.0;
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
numtyp r=ucl_rsqrt(r2inv);
|
||||
numtyp r=ucl_sqrt(rsq);
|
||||
numtyp arij = alf * r;
|
||||
numtyp expm2 = ucl_exp(-arij*arij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*arij);
|
||||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
numtyp erfcd = ucl_exp(-arij*arij);
|
||||
fetch(prefactor,j,q_tex);
|
||||
prefactor *= qqrd2e * qtmp/r;
|
||||
v_sh = (_erfc - e_shift*r)*prefactor;
|
||||
numtyp dvdrr = (_erfc/rsq + EWALD_F*alf*expm2/r) + f_shift;
|
||||
forcecoul = prefactor * (dvdrr*rsq-factor_coul);
|
||||
|
||||
erfcc = erfc(arij);
|
||||
v_sh = (erfcc - e_shift*r)*prefactor;
|
||||
numtyp dvdrr = (erfcc/rsq + (numtyp)2.0*alf/MY_PIS * erfcd/r) + f_shift;
|
||||
forcecoul = prefactor * dvdrr*rsq*factor_coul;
|
||||
} else forcecoul = (numtyp)0.0;
|
||||
|
||||
force = (forceborn + forcecoul) * r2inv;
|
||||
|
@ -257,8 +268,8 @@ __kernel void k_born_wolf_fast(__global numtyp4 *x_, __global numtyp4 *coeff1_in
|
|||
|
||||
if (eflag>0) {
|
||||
if (rsq < cut_coulsq)
|
||||
e_coul += prefactor*(v_sh-factor_coul);
|
||||
if (rsq < coeff1[mtype].w) {
|
||||
e_coul += v_sh*factor_coul;
|
||||
if (rsq < cutsq_sigma[mtype].y) {
|
||||
numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
|
||||
+ coeff2[mtype].z*r2inv*r6inv;
|
||||
energy+=factor_lj*(e-coeff2[mtype].w);
|
||||
|
|
|
@ -24,13 +24,17 @@ texture<int4,1> pos_tex;
|
|||
#define pos_tex x_
|
||||
#endif
|
||||
|
||||
__kernel void k_buck(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
||||
__global numtyp4* coeff2, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
__kernel void k_buck(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1,
|
||||
const __global numtyp4 *restrict coeff2,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -48,7 +52,7 @@ __kernel void k_buck(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -106,13 +110,16 @@ __kernel void k_buck(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_buck_fast(__global numtyp4 *x_, __global numtyp4 *coeff1_in,
|
||||
__global numtyp4* coeff2_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
__kernel void k_buck_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1_in,
|
||||
const __global numtyp4 *restrict coeff2_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -137,7 +144,7 @@ __kernel void k_buck_fast(__global numtyp4 *x_, __global numtyp4 *coeff1_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -29,15 +29,20 @@ texture<int2> q_tex;
|
|||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
__kernel void k_buck_coul(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
||||
__global numtyp4* coeff2, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_ ,
|
||||
__global numtyp4 *cutsq, const numtyp qqrd2e,
|
||||
const int t_per_atom) {
|
||||
__kernel void k_buck_coul(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1,
|
||||
const __global numtyp4 *restrict coeff2,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_ ,
|
||||
const __global numtyp4 *restrict cutsq,
|
||||
const numtyp qqrd2e, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -60,7 +65,7 @@ __kernel void k_buck_coul(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -136,15 +141,19 @@ __kernel void k_buck_coul(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_buck_coul_fast(__global numtyp4 *x_, __global numtyp4 *coeff1_in,
|
||||
__global numtyp4* coeff2_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
__kernel void k_buck_coul_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1_in,
|
||||
const __global numtyp4 *restrict coeff2_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
__global numtyp4 *_cutsq, const numtyp qqrd2e,
|
||||
const int t_per_atom) {
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp4 *restrict _cutsq,
|
||||
const numtyp qqrd2e, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -172,7 +181,7 @@ __kernel void k_buck_coul_fast(__global numtyp4 *x_, __global numtyp4 *coeff1_in
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -29,16 +29,21 @@ texture<int2> q_tex;
|
|||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
__kernel void k_buck_coul_long(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
||||
__global numtyp4* coeff2, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
__global numtyp *cutsq,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
__kernel void k_buck_coul_long(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1,
|
||||
const __global numtyp4 *restrict coeff2,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp *restrict cutsq,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -61,7 +66,7 @@ __kernel void k_buck_coul_long(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -144,16 +149,21 @@ __kernel void k_buck_coul_long(__global numtyp4 *x_, __global numtyp4 *coeff1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_buck_coul_long_fast(__global numtyp4 *x_, __global numtyp4 *coeff1_in,
|
||||
__global numtyp4* coeff2_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
__global numtyp *cutsq,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
__kernel void k_buck_coul_long_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1_in,
|
||||
const __global numtyp4 *restrict coeff2_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag,
|
||||
const int inum, const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp *restrict cutsq,
|
||||
const numtyp cut_coulsq,
|
||||
const numtyp qqrd2e, const numtyp g_ewald,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -179,7 +189,7 @@ __kernel void k_buck_coul_long_fast(__global numtyp4 *x_, __global numtyp4 *coef
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -24,13 +24,17 @@ texture<int4,1> pos_tex;
|
|||
#define pos_tex x_
|
||||
#endif
|
||||
|
||||
__kernel void k_cg_cmm(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
__global numtyp4* lj3, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
__kernel void k_cg_cmm(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -48,7 +52,7 @@ __kernel void k_cg_cmm(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -111,13 +115,16 @@ __kernel void k_cg_cmm(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_cg_cmm_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
||||
__global numtyp4* lj3_in,
|
||||
__global numtyp* sp_lj_in,__global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
__kernel void k_cg_cmm_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -142,7 +149,7 @@ __kernel void k_cg_cmm_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -29,15 +29,20 @@ texture<int2> q_tex;
|
|||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
__kernel void k_cg_cmm_long(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
__global numtyp4* lj3, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_ ,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
__kernel void k_cg_cmm_long(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_ ,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -60,7 +65,7 @@ __kernel void k_cg_cmm_long(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -148,15 +153,19 @@ __kernel void k_cg_cmm_long(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_cg_cmm_long_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
||||
__global numtyp4* lj3_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
__kernel void k_cg_cmm_long_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag,
|
||||
const int inum, const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -181,7 +190,7 @@ __kernel void k_cg_cmm_long_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -29,16 +29,21 @@ texture<int2> q_tex;
|
|||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
__kernel void k_charmm_long(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
const int lj_types, __global numtyp *sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const numtyp denom_lj,
|
||||
const numtyp cut_bothsq, const numtyp cut_ljsq,
|
||||
const numtyp cut_lj_innersq, const int t_per_atom) {
|
||||
__kernel void k_charmm_long(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const numtyp denom_lj,
|
||||
const numtyp cut_bothsq, const numtyp cut_ljsq,
|
||||
const numtyp cut_lj_innersq, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -61,7 +66,7 @@ __kernel void k_charmm_long(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -152,17 +157,21 @@ __kernel void k_charmm_long(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_charmm_long_fast(__global numtyp4 *x_, __global numtyp2 *ljd_in,
|
||||
__global numtyp* sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const numtyp denom_lj,
|
||||
const numtyp cut_bothsq, const numtyp cut_ljsq,
|
||||
const numtyp cut_lj_innersq,
|
||||
const int t_per_atom) {
|
||||
__kernel void k_charmm_long_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp2 *restrict ljd_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag,
|
||||
const int inum, const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const numtyp denom_lj,
|
||||
const numtyp cut_bothsq, const numtyp cut_ljsq,
|
||||
const numtyp cut_lj_innersq,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -170,7 +179,8 @@ __kernel void k_charmm_long_fast(__global numtyp4 *x_, __global numtyp2 *ljd_in,
|
|||
__local numtyp sp_lj[8];
|
||||
if (tid<8)
|
||||
sp_lj[tid]=sp_lj_in[tid];
|
||||
ljd[tid]=ljd_in[tid];
|
||||
if (tid<MAX_BIO_SHARED_TYPES)
|
||||
ljd[tid]=ljd_in[tid];
|
||||
if (tid+BLOCK_BIO_PAIR<MAX_BIO_SHARED_TYPES)
|
||||
ljd[tid+BLOCK_BIO_PAIR]=ljd_in[tid+BLOCK_BIO_PAIR];
|
||||
|
||||
|
@ -185,7 +195,7 @@ __kernel void k_charmm_long_fast(__global numtyp4 *x_, __global numtyp2 *ljd_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -24,16 +24,19 @@ texture<int4,1> pos_tex;
|
|||
#define pos_tex x_
|
||||
#endif
|
||||
|
||||
__kernel void k_colloid(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
__global numtyp4* lj3, const int lj_types,
|
||||
__global numtyp *sp_lj_in,
|
||||
__global numtyp4* colloid1,
|
||||
__global numtyp4* colloid2,
|
||||
__global int *form,
|
||||
__global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
__kernel void k_colloid(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global numtyp4 *restrict colloid1,
|
||||
const __global numtyp4 *restrict colloid2,
|
||||
const __global int *form,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
@ -52,7 +55,7 @@ __kernel void k_colloid(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -98,9 +101,10 @@ __kernel void k_colloid(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
K[3] *= K[3]*K[3];
|
||||
K[6] = K[3]*K[3];
|
||||
fR = colloid2[mtype].z*colloid1[mtype].x*c2*K[1]/K[3];
|
||||
force = 4.0/15.0*fR *
|
||||
(2.0*(K[1]+K[2]) * (K[1]*(5.0*K[1]+22.0*K[2])+5.0*K[4]) *
|
||||
colloid2[mtype].w/K[6]-5.0) / K[0];
|
||||
force = (numtyp)4.0/(numtyp)15.0*fR *
|
||||
((numtyp)2.0*(K[1]+K[2]) *
|
||||
(K[1]*((numtyp)5.0*K[1]+(numtyp)22.0*K[2])+(numtyp)5.0*K[4]) *
|
||||
colloid2[mtype].w/K[6]-(numtyp)5.0) / K[0];
|
||||
force*=factor_lj;
|
||||
} else if (form[mtype]==2) { // LARGE_LARGE
|
||||
r = ucl_sqrt(rsq);
|
||||
|
@ -131,8 +135,9 @@ __kernel void k_colloid(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
fR = colloid1[mtype].x*colloid2[mtype].w/r/(numtyp)37800.0;
|
||||
evdwl = fR * (h[0]-h[1]-h[2]+h[3]);
|
||||
numtyp dUR = evdwl/r + (numtyp)5.0*fR*(g[0]+g[1]-g[2]-g[3]);
|
||||
numtyp dUA = -colloid1[mtype].x/3.0*r*(((numtyp)2.0*K[0]*K[7]+(numtyp)1.0)*K[7] +
|
||||
((numtyp)2.0*K[0]*K[8]-(numtyp)1.0)*K[8]);
|
||||
numtyp dUA = -colloid1[mtype].x/(numtyp)3.0*r*
|
||||
(((numtyp)2.0*K[0]*K[7]+(numtyp)1.0)*K[7] +
|
||||
((numtyp)2.0*K[0]*K[8]-(numtyp)1.0)*K[8]);
|
||||
force = factor_lj * (dUR+dUA)/r;
|
||||
}
|
||||
|
||||
|
@ -149,7 +154,8 @@ __kernel void k_colloid(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
((numtyp)1.0-(K[1]*(K[1]*(K[1]/(numtyp)3.0+(numtyp)3.0*K[2]) +
|
||||
(numtyp)4.2*K[4])+K[2]*K[4]) * colloid2[mtype].w/K[6]);
|
||||
} else if (form[mtype]==2) {
|
||||
e=evdwl+colloid1[mtype].x/(numtyp)6.0 * ((numtyp)2.0*K[0]*(K[7]+K[8])-log(K[8]/K[7]));
|
||||
e=evdwl+colloid1[mtype].x/(numtyp)6.0 *
|
||||
((numtyp)2.0*K[0]*(K[7]+K[8])-log(K[8]/K[7]));
|
||||
}
|
||||
energy+=factor_lj*(e-lj3[mtype].z);
|
||||
}
|
||||
|
@ -169,15 +175,17 @@ __kernel void k_colloid(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_colloid_fast(__global numtyp4 *x_,
|
||||
__global numtyp4 *lj1_in,
|
||||
__global numtyp4 *lj3_in,
|
||||
__global numtyp *sp_lj_in,
|
||||
__global numtyp4 *colloid1_in,
|
||||
__global numtyp4 *colloid2_in,
|
||||
__global int *form_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
__kernel void k_colloid_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global numtyp4 *restrict colloid1_in,
|
||||
const __global numtyp4 *restrict colloid2_in,
|
||||
const __global int *form_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
|
@ -210,7 +218,7 @@ __kernel void k_colloid_fast(__global numtyp4 *x_,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -257,7 +265,8 @@ __kernel void k_colloid_fast(__global numtyp4 *x_,
|
|||
K[6] = K[3]*K[3];
|
||||
fR = colloid2[mtype].z*colloid1[mtype].x*c2*K[1]/K[3];
|
||||
force = (numtyp)4.0/(numtyp)15.0*fR *
|
||||
((numtyp)2.0*(K[1]+K[2]) * (K[1]*((numtyp)5.0*K[1]+(numtyp)22.0*K[2])+(numtyp)5.0*K[4]) *
|
||||
((numtyp)2.0*(K[1]+K[2]) *
|
||||
(K[1]*((numtyp)5.0*K[1]+(numtyp)22.0*K[2])+(numtyp)5.0*K[4]) *
|
||||
colloid2[mtype].w/K[6]-(numtyp)5.0) / K[0];
|
||||
force*=factor_lj;
|
||||
} else if (form[mtype]==2) { // LARGE_LARGE
|
||||
|
@ -289,7 +298,8 @@ __kernel void k_colloid_fast(__global numtyp4 *x_,
|
|||
fR = colloid1[mtype].x*colloid2[mtype].w/r/(numtyp)37800.0;
|
||||
evdwl = fR * (h[0]-h[1]-h[2]+h[3]);
|
||||
numtyp dUR = evdwl/r + (numtyp)5.0*fR*(g[0]+g[1]-g[2]-g[3]);
|
||||
numtyp dUA = -colloid1[mtype].x/(numtyp)3.0*r*(((numtyp)2.0*K[0]*K[7]+(numtyp)1.0)*K[7] +
|
||||
numtyp dUA = -colloid1[mtype].x/(numtyp)3.0*r*
|
||||
(((numtyp)2.0*K[0]*K[7]+(numtyp)1.0)*K[7] +
|
||||
((numtyp)2.0*K[0]*K[8]-(numtyp)1.0)*K[8]);
|
||||
force = factor_lj * (dUR+dUA)/r;
|
||||
} else force = (numtyp)0.0;
|
||||
|
@ -304,10 +314,12 @@ __kernel void k_colloid_fast(__global numtyp4 *x_,
|
|||
e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y);
|
||||
} else if (form[mtype]==1) {
|
||||
e=(numtyp)2.0/(numtyp)9.0*fR *
|
||||
((numtyp)1.0-(K[1]*(K[1]*(K[1]/(numtyp)3.0+(numtyp)3.0*K[2])+(numtyp)4.2*K[4])+K[2]*K[4])*
|
||||
colloid2[mtype].w/K[6]);
|
||||
((numtyp)1.0-(K[1]*(K[1]*(K[1]/(numtyp)3.0+
|
||||
(numtyp)3.0*K[2])+(numtyp)4.2*K[4])+K[2]*K[4])*
|
||||
colloid2[mtype].w/K[6]);
|
||||
} else if (form[mtype]==2) {
|
||||
e=evdwl+colloid1[mtype].x/(numtyp)6.0 * ((numtyp)2.0*K[0]*(K[7]+K[8])-log(K[8]/K[7]));
|
||||
e=evdwl+colloid1[mtype].x/(numtyp)6.0 *
|
||||
((numtyp)2.0*K[0]*(K[7]+K[8])-log(K[8]/K[7]));
|
||||
}
|
||||
energy+=factor_lj*(e-lj3[mtype].z);
|
||||
}
|
||||
|
|
|
@ -31,12 +31,16 @@ texture<int2> q_tex;
|
|||
|
||||
#define MY_PIS (acctyp)1.77245385090551602729
|
||||
|
||||
__kernel void k_coul_dsf(__global numtyp4 *x_, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_ ,
|
||||
__kernel void k_coul_dsf(const __global numtyp4 *restrict x_,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_ ,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp e_shift, const numtyp f_shift,
|
||||
const numtyp alpha, const int t_per_atom) {
|
||||
|
@ -58,7 +62,7 @@ __kernel void k_coul_dsf(__global numtyp4 *x_, const int lj_types,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -66,6 +70,12 @@ __kernel void k_coul_dsf(__global numtyp4 *x_, const int lj_types,
|
|||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
|
||||
if (eflag>0) {
|
||||
acctyp e_self = -((acctyp)0.5*e_shift + alpha/MY_PIS) *
|
||||
qtmp*qtmp*qqrd2e/(acctyp)t_per_atom;
|
||||
e_coul += (acctyp)2.0*e_self;
|
||||
}
|
||||
|
||||
for ( ; nbor<list_end; nbor+=n_stride) {
|
||||
int j=*nbor;
|
||||
|
||||
|
@ -91,8 +101,8 @@ __kernel void k_coul_dsf(__global numtyp4 *x_, const int lj_types,
|
|||
numtyp erfcd = ucl_exp(-alpha*alpha*rsq);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*alpha*r);
|
||||
erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd;
|
||||
forcecoul = prefactor * (erfcc + 2.0*alpha/MY_PIS*r*erfcd +
|
||||
rsq*f_shift);
|
||||
forcecoul = prefactor * (erfcc + (numtyp)2.0*alpha/MY_PIS*r*erfcd +
|
||||
rsq*f_shift);
|
||||
|
||||
force = forcecoul * r2inv;
|
||||
|
||||
|
@ -122,11 +132,15 @@ __kernel void k_coul_dsf(__global numtyp4 *x_, const int lj_types,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_coul_dsf_fast(__global numtyp4 *x_, __global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
__kernel void k_coul_dsf_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp e_shift, const numtyp f_shift,
|
||||
const numtyp alpha, const int t_per_atom) {
|
||||
|
@ -148,13 +162,19 @@ __kernel void k_coul_dsf_fast(__global numtyp4 *x_, __global numtyp* sp_lj_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
|
||||
if (eflag>0) {
|
||||
acctyp e_self = -((acctyp)0.5*e_shift + alpha/MY_PIS) *
|
||||
qtmp*qtmp*qqrd2e/(acctyp)t_per_atom;
|
||||
e_coul += (acctyp)2.0*e_self;
|
||||
}
|
||||
|
||||
for ( ; nbor<list_end; nbor+=n_stride) {
|
||||
int j=*nbor;
|
||||
|
@ -181,8 +201,8 @@ __kernel void k_coul_dsf_fast(__global numtyp4 *x_, __global numtyp* sp_lj_in,
|
|||
numtyp erfcd = ucl_exp(-alpha*alpha*rsq);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*alpha*r);
|
||||
erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd;
|
||||
forcecoul = prefactor * (erfcc + 2.0*alpha/MY_PIS*r*erfcd +
|
||||
rsq*f_shift);
|
||||
forcecoul = prefactor * (erfcc + (numtyp)2.0*alpha/MY_PIS*r*erfcd +
|
||||
rsq*f_shift);
|
||||
|
||||
force = forcecoul * r2inv;
|
||||
|
||||
|
|
|
@ -29,13 +29,112 @@ texture<int2> q_tex;
|
|||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
__kernel void k_coul_long(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
__global numtyp4* lj3, const int lj_types,
|
||||
__global numtyp *sp_cl_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
#if (ARCH < 300)
|
||||
|
||||
#define store_answers_lq(f, e_coul, virial, ii, inum, tid, \
|
||||
t_per_atom, offset, eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
__local acctyp red_acc[6][BLOCK_PAIR]; \
|
||||
\
|
||||
red_acc[0][tid]=f.x; \
|
||||
red_acc[1][tid]=f.y; \
|
||||
red_acc[2][tid]=f.z; \
|
||||
red_acc[3][tid]=e_coul; \
|
||||
\
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
if (offset < s) { \
|
||||
for (int r=0; r<4; r++) \
|
||||
red_acc[r][tid] += red_acc[r][tid+s]; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
f.x=red_acc[0][tid]; \
|
||||
f.y=red_acc[1][tid]; \
|
||||
f.z=red_acc[2][tid]; \
|
||||
e_coul=red_acc[3][tid]; \
|
||||
\
|
||||
if (vflag>0) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
red_acc[r][tid]=virial[r]; \
|
||||
\
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
if (offset < s) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
red_acc[r][tid] += red_acc[r][tid+s]; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
for (int r=0; r<6; r++) \
|
||||
virial[r]=red_acc[r][tid]; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
if (offset==0) { \
|
||||
__global acctyp *ap1=engv+ii; \
|
||||
if (eflag>0) { \
|
||||
*ap1=(acctyp)0; \
|
||||
ap1+=inum; \
|
||||
*ap1=e_coul; \
|
||||
ap1+=inum; \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (int i=0; i<6; i++) { \
|
||||
*ap1=virial[i]; \
|
||||
ap1+=inum; \
|
||||
} \
|
||||
} \
|
||||
ans[ii]=f; \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define store_answers_lq(f, e_coul, virial, ii, inum, tid, \
|
||||
t_per_atom, offset, eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
f.x += shfl_xor(f.x, s, t_per_atom); \
|
||||
f.y += shfl_xor(f.y, s, t_per_atom); \
|
||||
f.z += shfl_xor(f.z, s, t_per_atom); \
|
||||
e_coul += shfl_xor(e_coul, s, t_per_atom); \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
virial[r] += shfl_xor(virial[r], s, t_per_atom); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (offset==0) { \
|
||||
__global acctyp *ap1=engv+ii; \
|
||||
if (eflag>0) { \
|
||||
*ap1=(acctyp)0; \
|
||||
ap1+=inum; \
|
||||
*ap1=e_coul; \
|
||||
ap1+=inum; \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (int i=0; i<6; i++) { \
|
||||
*ap1=virial[i]; \
|
||||
ap1+=inum; \
|
||||
} \
|
||||
} \
|
||||
ans[ii]=f; \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
__kernel void k_coul_long(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_cl_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
|
@ -55,7 +154,7 @@ __kernel void k_coul_long(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -109,71 +208,22 @@ __kernel void k_coul_long(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
}
|
||||
|
||||
} // for nbor
|
||||
|
||||
// Reduce answers
|
||||
if (t_per_atom>1) {
|
||||
__local acctyp red_acc[6][BLOCK_PAIR];
|
||||
|
||||
red_acc[0][tid]=f.x;
|
||||
red_acc[1][tid]=f.y;
|
||||
red_acc[2][tid]=f.z;
|
||||
red_acc[3][tid]=e_coul;
|
||||
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) {
|
||||
if (offset < s) {
|
||||
for (int r=0; r<4; r++)
|
||||
red_acc[r][tid] += red_acc[r][tid+s];
|
||||
}
|
||||
}
|
||||
|
||||
f.x=red_acc[0][tid];
|
||||
f.y=red_acc[1][tid];
|
||||
f.z=red_acc[2][tid];
|
||||
e_coul=red_acc[3][tid];
|
||||
|
||||
if (vflag>0) {
|
||||
for (int r=0; r<6; r++)
|
||||
red_acc[r][tid]=virial[r];
|
||||
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) {
|
||||
if (offset < s) {
|
||||
for (int r=0; r<6; r++)
|
||||
red_acc[r][tid] += red_acc[r][tid+s];
|
||||
}
|
||||
}
|
||||
|
||||
for (int r=0; r<6; r++)
|
||||
virial[r]=red_acc[r][tid];
|
||||
}
|
||||
}
|
||||
|
||||
// Store answers
|
||||
if (offset==0) {
|
||||
__global acctyp *ap1=engv+ii;
|
||||
if (eflag>0) {
|
||||
*ap1=(acctyp)0;
|
||||
ap1+=inum;
|
||||
*ap1=e_coul;
|
||||
ap1+=inum;
|
||||
}
|
||||
if (vflag>0) {
|
||||
for (int i=0; i<6; i++) {
|
||||
*ap1=virial[i];
|
||||
ap1+=inum;
|
||||
}
|
||||
}
|
||||
ans[ii]=f;
|
||||
}
|
||||
store_answers_lq(f,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
|
||||
vflag,ans,engv);
|
||||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_coul_long_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
||||
__global numtyp4* lj3_in,
|
||||
__global numtyp* sp_cl_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
__kernel void k_coul_long_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_cl_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
|
@ -193,7 +243,7 @@ __kernel void k_coul_long_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -247,61 +297,8 @@ __kernel void k_coul_long_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
}
|
||||
|
||||
} // for nbor
|
||||
|
||||
// Reduce answers
|
||||
if (t_per_atom>1) {
|
||||
__local acctyp red_acc[6][BLOCK_PAIR];
|
||||
|
||||
red_acc[0][tid]=f.x;
|
||||
red_acc[1][tid]=f.y;
|
||||
red_acc[2][tid]=f.z;
|
||||
red_acc[3][tid]=e_coul;
|
||||
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) {
|
||||
if (offset < s) {
|
||||
for (int r=0; r<4; r++)
|
||||
red_acc[r][tid] += red_acc[r][tid+s];
|
||||
}
|
||||
}
|
||||
|
||||
f.x=red_acc[0][tid];
|
||||
f.y=red_acc[1][tid];
|
||||
f.z=red_acc[2][tid];
|
||||
e_coul=red_acc[3][tid];
|
||||
|
||||
if (vflag>0) {
|
||||
for (int r=0; r<6; r++)
|
||||
red_acc[r][tid]=virial[r];
|
||||
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) {
|
||||
if (offset < s) {
|
||||
for (int r=0; r<6; r++)
|
||||
red_acc[r][tid] += red_acc[r][tid+s];
|
||||
}
|
||||
}
|
||||
|
||||
for (int r=0; r<6; r++)
|
||||
virial[r]=red_acc[r][tid];
|
||||
}
|
||||
}
|
||||
|
||||
// Store answers
|
||||
if (offset==0) {
|
||||
__global acctyp *ap1=engv+ii;
|
||||
if (eflag>0) {
|
||||
*ap1=(acctyp)0;
|
||||
ap1+=inum;
|
||||
*ap1=e_coul;
|
||||
ap1+=inum;
|
||||
}
|
||||
if (vflag>0) {
|
||||
for (int i=0; i<6; i++) {
|
||||
*ap1=virial[i];
|
||||
ap1+=inum;
|
||||
}
|
||||
}
|
||||
ans[ii]=f;
|
||||
}
|
||||
store_answers_lq(f,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
|
||||
vflag,ans,engv);
|
||||
} // if ii
|
||||
}
|
||||
|
||||
|
|
|
@ -580,14 +580,14 @@ int DeviceT::compile_kernels() {
|
|||
k_info.set_function(*dev_program,"kernel_info");
|
||||
_compiled=true;
|
||||
|
||||
UCL_Vector<int,int> gpu_lib_data(14,*gpu,UCL_NOT_PINNED);
|
||||
UCL_Vector<int,int> gpu_lib_data(15,*gpu,UCL_NOT_PINNED);
|
||||
k_info.set_size(1,1);
|
||||
k_info.run(&gpu_lib_data);
|
||||
gpu_lib_data.update_host(false);
|
||||
|
||||
_ptx_arch=static_cast<double>(gpu_lib_data[0])/100.0;
|
||||
#ifndef USE_OPENCL
|
||||
if (_ptx_arch>gpu->arch())
|
||||
if (_ptx_arch>gpu->arch() || floor(_ptx_arch)<floor(gpu->arch()))
|
||||
return -4;
|
||||
#endif
|
||||
|
||||
|
@ -606,6 +606,7 @@ int DeviceT::compile_kernels() {
|
|||
_block_nbor_build=gpu_lib_data[10];
|
||||
_block_bio_pair=gpu_lib_data[11];
|
||||
_max_bio_shared_types=gpu_lib_data[12];
|
||||
_block_ellipse=gpu_lib_data[14];
|
||||
|
||||
if (static_cast<size_t>(_block_pair)>gpu->group_size())
|
||||
_block_pair=gpu->group_size();
|
||||
|
|
|
@ -17,7 +17,8 @@
|
|||
#include "lal_preprocessor.h"
|
||||
#endif
|
||||
|
||||
__kernel void kernel_zero(__global int *mem, int numel) {
|
||||
__kernel void kernel_zero(__global int *restrict mem,
|
||||
int numel) {
|
||||
int ii=GLOBAL_ID_X;
|
||||
|
||||
if (ii<numel)
|
||||
|
@ -39,4 +40,5 @@ __kernel void kernel_info(__global int *info) {
|
|||
info[11]=BLOCK_BIO_PAIR;
|
||||
info[12]=MAX_BIO_SHARED_TYPES;
|
||||
info[13]=THREADS_PER_CHARGE;
|
||||
info[14]=BLOCK_ELLIPSE;
|
||||
}
|
||||
|
|
|
@ -228,6 +228,8 @@ class Device {
|
|||
inline int block_nbor_build() const { return _block_nbor_build; }
|
||||
/// Return the block size for "bio" pair styles
|
||||
inline int block_bio_pair() const { return _block_bio_pair; }
|
||||
/// Return the block size for "ellipse" pair styles
|
||||
inline int block_ellipse() const { return _block_ellipse; }
|
||||
/// Return the maximum number of atom types for shared mem with "bio" styles
|
||||
inline int max_bio_shared_types() const { return _max_bio_shared_types; }
|
||||
/// Architecture gpu code compiled for (returns 0 for OpenCL)
|
||||
|
@ -292,7 +294,7 @@ class Device {
|
|||
|
||||
int _num_mem_threads, _warp_size, _threads_per_atom, _threads_per_charge;
|
||||
int _pppm_max_spline, _pppm_block;
|
||||
int _block_pair, _max_shared_types;
|
||||
int _block_pair, _block_ellipse, _max_shared_types;
|
||||
int _block_cell_2d, _block_cell_id, _block_nbor_build;
|
||||
int _block_bio_pair, _max_bio_shared_types;
|
||||
|
||||
|
|
|
@ -15,6 +15,23 @@
|
|||
|
||||
#ifdef NV_KERNEL
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
texture<float4> mu_tex;
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
texture<int4,1> mu_tex;
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define pos_tex x_
|
||||
#define q_tex q_
|
||||
#define mu_tex mu_
|
||||
#endif
|
||||
|
||||
#if (ARCH < 300)
|
||||
|
||||
#define store_answers_tq(f, tor, energy, ecoul, virial, ii, inum, tid, \
|
||||
t_per_atom, offset, eflag, vflag, ans, engv) \
|
||||
|
@ -73,32 +90,63 @@
|
|||
ans[ii+inum]=tor; \
|
||||
}
|
||||
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
texture<float4> mu_tex;
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
texture<int4,1> mu_tex;
|
||||
|
||||
#define store_answers_tq(f, tor, energy, e_coul, virial, ii, inum, tid, \
|
||||
t_per_atom, offset, eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
f.x += shfl_xor(f.x, s, t_per_atom); \
|
||||
f.y += shfl_xor(f.y, s, t_per_atom); \
|
||||
f.z += shfl_xor(f.z, s, t_per_atom); \
|
||||
tor.x += shfl_xor(tor.x, s, t_per_atom); \
|
||||
tor.y += shfl_xor(tor.y, s, t_per_atom); \
|
||||
tor.z += shfl_xor(tor.z, s, t_per_atom); \
|
||||
energy += shfl_xor(energy, s, t_per_atom); \
|
||||
e_coul += shfl_xor(e_coul, s, t_per_atom); \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
virial[r] += shfl_xor(virial[r], s, t_per_atom); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (offset==0) { \
|
||||
engv+=ii; \
|
||||
if (eflag>0) { \
|
||||
*engv=energy; \
|
||||
engv+=inum; \
|
||||
*engv=e_coul; \
|
||||
engv+=inum; \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (int i=0; i<6; i++) { \
|
||||
*engv=virial[i]; \
|
||||
engv+=inum; \
|
||||
} \
|
||||
} \
|
||||
ans[ii]=f; \
|
||||
ans[ii+inum]=tor; \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define pos_tex x_
|
||||
#define q_tex q_
|
||||
#define mu_tex mu_
|
||||
#endif
|
||||
|
||||
__kernel void k_dipole_lj(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
__global numtyp4* lj3, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_ ,
|
||||
__global numtyp4 *mu_,
|
||||
__global numtyp *cutsq, const numtyp qqrd2e,
|
||||
const int t_per_atom) {
|
||||
__kernel void k_dipole_lj(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp4 *restrict mu_,
|
||||
const __global numtyp *restrict cutsq,
|
||||
const numtyp qqrd2e, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -125,7 +173,7 @@ __kernel void k_dipole_lj(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -291,16 +339,20 @@ __kernel void k_dipole_lj(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_dipole_lj_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
||||
__global numtyp4* lj3_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
__kernel void k_dipole_lj_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
__global numtyp4 *mu_,
|
||||
__global numtyp *_cutsq, const numtyp qqrd2e,
|
||||
const int t_per_atom) {
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp4 *restrict mu_,
|
||||
const __global numtyp *restrict _cutsq,
|
||||
const numtyp qqrd2e, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -332,7 +384,7 @@ __kernel void k_dipole_lj_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -16,6 +16,24 @@
|
|||
#ifdef NV_KERNEL
|
||||
#include "lal_aux_fun1.h"
|
||||
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
texture<float4> mu_tex;
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
texture<int4,1> mu_tex;
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define pos_tex x_
|
||||
#define q_tex q_
|
||||
#define mu_tex mu_
|
||||
#endif
|
||||
|
||||
#if (ARCH < 300)
|
||||
|
||||
#define store_answers_tq(f, tor, energy, ecoul, virial, ii, inum, tid, \
|
||||
t_per_atom, offset, eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
|
@ -73,32 +91,63 @@
|
|||
ans[ii+inum]=tor; \
|
||||
}
|
||||
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
texture<float4> mu_tex;
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
texture<int4,1> mu_tex;
|
||||
|
||||
#define store_answers_tq(f, tor, energy, e_coul, virial, ii, inum, tid, \
|
||||
t_per_atom, offset, eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
f.x += shfl_xor(f.x, s, t_per_atom); \
|
||||
f.y += shfl_xor(f.y, s, t_per_atom); \
|
||||
f.z += shfl_xor(f.z, s, t_per_atom); \
|
||||
tor.x += shfl_xor(tor.x, s, t_per_atom); \
|
||||
tor.y += shfl_xor(tor.y, s, t_per_atom); \
|
||||
tor.z += shfl_xor(tor.z, s, t_per_atom); \
|
||||
energy += shfl_xor(energy, s, t_per_atom); \
|
||||
e_coul += shfl_xor(e_coul, s, t_per_atom); \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
virial[r] += shfl_xor(virial[r], s, t_per_atom); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (offset==0) { \
|
||||
engv+=ii; \
|
||||
if (eflag>0) { \
|
||||
*engv=energy; \
|
||||
engv+=inum; \
|
||||
*engv=e_coul; \
|
||||
engv+=inum; \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (int i=0; i<6; i++) { \
|
||||
*engv=virial[i]; \
|
||||
engv+=inum; \
|
||||
} \
|
||||
} \
|
||||
ans[ii]=f; \
|
||||
ans[ii+inum]=tor; \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define pos_tex x_
|
||||
#define q_tex q_
|
||||
#define mu_tex mu_
|
||||
#endif
|
||||
|
||||
__kernel void k_dipole_lj_sf(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
__global numtyp4* lj3, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_ ,
|
||||
__global numtyp4 *mu_,
|
||||
__global numtyp *cutsq, const numtyp qqrd2e,
|
||||
const int t_per_atom) {
|
||||
__kernel void k_dipole_lj_sf(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_ ,
|
||||
const __global numtyp4 *restrict mu_,
|
||||
const __global numtyp *restrict cutsq,
|
||||
const numtyp qqrd2e, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -125,7 +174,7 @@ __kernel void k_dipole_lj_sf(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -322,15 +371,20 @@ __kernel void k_dipole_lj_sf(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_dipole_lj_sf_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
||||
__global numtyp4* lj3_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
__global numtyp4 *mu_,
|
||||
__global numtyp *_cutsq, const numtyp qqrd2e,
|
||||
__kernel void k_dipole_lj_sf_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag,
|
||||
const int inum, const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp4 *restrict mu_,
|
||||
const __global numtyp *restrict _cutsq,
|
||||
const numtyp qqrd2e,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
@ -363,7 +417,7 @@ __kernel void k_dipole_lj_sf_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -46,10 +46,11 @@ template <class numtyp, class acctyp>
|
|||
int EAMT::init(const int ntypes, double host_cutforcesq, int **host_type2rhor,
|
||||
int **host_type2z2r, int *host_type2frho,
|
||||
double ***host_rhor_spline, double ***host_z2r_spline,
|
||||
double ***host_frho_spline, double rdr, double rdrho, int nrhor,
|
||||
int nrho, int nz2r, int nfrho, int nr, const int nlocal,
|
||||
const int nall, const int max_nbors, const int maxspecial,
|
||||
const double cell_size, const double gpu_split, FILE *_screen)
|
||||
double ***host_frho_spline, double rdr, double rdrho,
|
||||
double rhomax, int nrhor, int nrho, int nz2r, int nfrho, int nr,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *_screen)
|
||||
{
|
||||
int success;
|
||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,
|
||||
|
@ -97,6 +98,7 @@ int EAMT::init(const int ntypes, double host_cutforcesq, int **host_type2rhor,
|
|||
_cutforcesq=host_cutforcesq;
|
||||
_rdr=rdr;
|
||||
_rdrho = rdrho;
|
||||
_rhomax=rhomax;
|
||||
_nrhor=nrhor;
|
||||
_nrho=nrho;
|
||||
_nz2r=nz2r;
|
||||
|
@ -468,15 +470,15 @@ void EAMT::loop(const bool _eflag, const bool _vflag) {
|
|||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&_fp, &this->ans->engv, &eflag, &ainum,
|
||||
&nbor_pitch, &_ntypes, &_cutforcesq, &_rdr, &_rdrho,
|
||||
&_nrho, &_nr, &this->_threads_per_atom);
|
||||
&_rhomax, &_nrho, &_nr, &this->_threads_per_atom);
|
||||
} else {
|
||||
this->k_energy.set_size(GX,BX);
|
||||
this->k_energy.run(&this->atom->x, &type2rhor_z2r, &type2frho,
|
||||
&rhor_spline2, &frho_spline1, &frho_spline2,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(), &_fp,
|
||||
&this->ans->engv,&eflag, &ainum, &nbor_pitch,
|
||||
&_ntypes, &_cutforcesq, &_rdr, &_rdrho, &_nrho, &_nr,
|
||||
&this->_threads_per_atom);
|
||||
&_ntypes, &_cutforcesq, &_rdr, &_rdrho, &_rhomax, &_nrho,
|
||||
&_nr, &this->_threads_per_atom);
|
||||
}
|
||||
|
||||
this->time_pair.stop();
|
||||
|
|
|
@ -52,8 +52,10 @@ texture<int4> z2r_sp2_tex;
|
|||
#define MIN(A,B) ((A) < (B) ? (A) : (B))
|
||||
#define MAX(A,B) ((A) > (B) ? (A) : (B))
|
||||
|
||||
#if (ARCH < 300)
|
||||
|
||||
#define store_energy_fp(rho,energy,ii,inum,tid,t_per_atom,offset, \
|
||||
eflag,vflag,engv,rdrho,nrho,i) \
|
||||
eflag,vflag,engv,rdrho,nrho,i,rhomax) \
|
||||
if (t_per_atom>1) { \
|
||||
__local acctyp red_acc[BLOCK_PAIR]; \
|
||||
red_acc[tid]=rho; \
|
||||
|
@ -76,10 +78,11 @@ texture<int4> z2r_sp2_tex;
|
|||
if (eflag>0) { \
|
||||
fetch4(coeff,index,frho_sp2_tex); \
|
||||
energy = ((coeff.x*p + coeff.y)*p + coeff.z)*p + coeff.w; \
|
||||
if (rho > rhomax) energy += fp*(rho-rhomax); \
|
||||
engv[ii]=(acctyp)2.0*energy; \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
#define store_answers_eam(f, energy, virial, ii, inum, tid, t_per_atom, \
|
||||
offset, elag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
|
@ -125,18 +128,80 @@ texture<int4> z2r_sp2_tex;
|
|||
ans[ii]=f; \
|
||||
}
|
||||
|
||||
__kernel void k_energy(__global numtyp4 *x_, __global int2 *type2rhor_z2r,
|
||||
__global int *type2frho,
|
||||
__global numtyp4 *rhor_spline2,
|
||||
__global numtyp4 *frho_spline1,
|
||||
__global numtyp4 *frho_spline2,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global numtyp *fp_, __global acctyp *engv,
|
||||
const int eflag, const int inum,
|
||||
const int nbor_pitch, const int ntypes,
|
||||
const numtyp cutforcesq, const numtyp rdr,
|
||||
const numtyp rdrho, const int nrho, const int nr,
|
||||
const int t_per_atom) {
|
||||
#else
|
||||
|
||||
#define store_energy_fp(rho,energy,ii,inum,tid,t_per_atom,offset, \
|
||||
eflag,vflag,engv,rdrho,nrho,i,rhomax) \
|
||||
if (t_per_atom>1) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) \
|
||||
rho += shfl_xor(rho, s, t_per_atom); \
|
||||
} \
|
||||
if (offset==0) { \
|
||||
numtyp p = rho*rdrho + (numtyp)1.0; \
|
||||
int m=p; \
|
||||
m = MAX(1,MIN(m,nrho-1)); \
|
||||
p -= m; \
|
||||
p = MIN(p,(numtyp)1.0); \
|
||||
int index = type2frho[itype]*(nrho+1)+m; \
|
||||
numtyp4 coeff; fetch4(coeff,index,frho_sp1_tex); \
|
||||
numtyp fp = (coeff.x*p + coeff.y)*p + coeff.z; \
|
||||
fp_[i]=fp; \
|
||||
if (eflag>0) { \
|
||||
fetch4(coeff,index,frho_sp2_tex); \
|
||||
energy = ((coeff.x*p + coeff.y)*p + coeff.z)*p + coeff.w; \
|
||||
if (rho > rhomax) energy += fp*(rho-rhomax); \
|
||||
engv[ii]=(acctyp)2.0*energy; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define store_answers_eam(f, energy, virial, ii, inum, tid, t_per_atom, \
|
||||
offset, eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
f.x += shfl_xor(f.x, s, t_per_atom); \
|
||||
f.y += shfl_xor(f.y, s, t_per_atom); \
|
||||
f.z += shfl_xor(f.z, s, t_per_atom); \
|
||||
energy += shfl_xor(energy, s, t_per_atom); \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
virial[r] += shfl_xor(virial[r], s, t_per_atom); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (offset==0) { \
|
||||
engv+=ii; \
|
||||
if (eflag>0) { \
|
||||
*engv+=energy; \
|
||||
engv+=inum; \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (int i=0; i<6; i++) { \
|
||||
*engv=virial[i]; \
|
||||
engv+=inum; \
|
||||
} \
|
||||
} \
|
||||
ans[ii]=f; \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
__kernel void k_energy(const __global numtyp4 *restrict x_,
|
||||
const __global int2 *restrict type2rhor_z2r,
|
||||
const __global int *restrict type2frho,
|
||||
const __global numtyp4 *restrict rhor_spline2,
|
||||
const __global numtyp4 *restrict frho_spline1,
|
||||
const __global numtyp4 *restrict frho_spline2,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global numtyp *restrict fp_,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int inum, const int nbor_pitch,
|
||||
const int ntypes, const numtyp cutforcesq,
|
||||
const numtyp rdr, const numtyp rdrho,
|
||||
const numtyp rhomax, const int nrho,
|
||||
const int nr, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -144,7 +209,7 @@ __kernel void k_energy(__global numtyp4 *x_, __global int2 *type2rhor_z2r,
|
|||
acctyp energy = (acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -180,24 +245,26 @@ __kernel void k_energy(__global numtyp4 *x_, __global int2 *type2rhor_z2r,
|
|||
} // for nbor
|
||||
|
||||
store_energy_fp(rho,energy,ii,inum,tid,t_per_atom,offset,
|
||||
eflag,vflag,engv,rdrho,nrho,i);
|
||||
eflag,vflag,engv,rdrho,nrho,i,rhomax);
|
||||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_energy_fast(__global numtyp4 *x_,
|
||||
__global int2 *type2rhor_z2r_in,
|
||||
__global int *type2frho_in,
|
||||
__global numtyp4 *rhor_spline2,
|
||||
__global numtyp4 *frho_spline1,
|
||||
__global numtyp4 *frho_spline2,
|
||||
__global int *dev_nbor,
|
||||
__global int *dev_packed, __global numtyp *fp_,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int inum, const int nbor_pitch,
|
||||
const int ntypes, const numtyp cutforcesq,
|
||||
const numtyp rdr, const numtyp rdrho,
|
||||
const int nrho, const int nr,
|
||||
const int t_per_atom) {
|
||||
__kernel void k_energy_fast(const __global numtyp4 *restrict x_,
|
||||
const __global int2 *restrict type2rhor_z2r_in,
|
||||
const __global int *restrict type2frho_in,
|
||||
const __global numtyp4 *restrict rhor_spline2,
|
||||
const __global numtyp4 *restrict frho_spline1,
|
||||
const __global numtyp4 *restrict frho_spline2,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global numtyp *restrict fp_,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int inum,
|
||||
const int nbor_pitch, const int ntypes,
|
||||
const numtyp cutforcesq, const numtyp rdr,
|
||||
const numtyp rdrho, const numtyp rhomax,
|
||||
const int nrho, const int nr,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -218,7 +285,7 @@ __kernel void k_energy_fast(__global numtyp4 *x_,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -254,22 +321,24 @@ __kernel void k_energy_fast(__global numtyp4 *x_,
|
|||
} // for nbor
|
||||
|
||||
store_energy_fp(rho,energy,ii,inum,tid,t_per_atom,offset,
|
||||
eflag,vflag,engv,rdrho,nrho,i);
|
||||
eflag,vflag,engv,rdrho,nrho,i,rhomax);
|
||||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_eam(__global numtyp4 *x_, __global numtyp *fp_,
|
||||
__global int2 *type2rhor_z2r,
|
||||
__global numtyp4 *rhor_spline1,
|
||||
__global numtyp4 *z2r_spline1,
|
||||
__global numtyp4 *z2r_spline2,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag,
|
||||
const int inum, const int nbor_pitch,
|
||||
const int ntypes, const numtyp cutforcesq,
|
||||
const numtyp rdr, const int nr,
|
||||
const int t_per_atom) {
|
||||
__kernel void k_eam(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp *fp_,
|
||||
const __global int2 *type2rhor_z2r,
|
||||
const __global numtyp4 *rhor_spline1,
|
||||
const __global numtyp4 *z2r_spline1,
|
||||
const __global numtyp4 *z2r_spline2,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *ans,
|
||||
__global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int ntypes,
|
||||
const numtyp cutforcesq, const numtyp rdr, const int nr,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -283,7 +352,7 @@ __kernel void k_eam(__global numtyp4 *x_, __global numtyp *fp_,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -364,18 +433,19 @@ __kernel void k_eam(__global numtyp4 *x_, __global numtyp *fp_,
|
|||
|
||||
}
|
||||
|
||||
__kernel void k_eam_fast(__global numtyp4 *x_, __global numtyp *fp_,
|
||||
__global int2 *type2rhor_z2r_in,
|
||||
__global numtyp4 *rhor_spline1,
|
||||
__global numtyp4 *z2r_spline1,
|
||||
__global numtyp4 *z2r_spline2,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const numtyp cutforcesq,
|
||||
const numtyp rdr, const int nr,
|
||||
const int t_per_atom) {
|
||||
__kernel void k_eam_fast(const __global numtyp4 *x_,
|
||||
const __global numtyp *fp_,
|
||||
const __global int2 *type2rhor_z2r_in,
|
||||
const __global numtyp4 *rhor_spline1,
|
||||
const __global numtyp4 *z2r_spline1,
|
||||
const __global numtyp4 *z2r_spline2,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *ans,
|
||||
__global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const numtyp cutforcesq,
|
||||
const numtyp rdr, const int nr, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -395,7 +465,7 @@ __kernel void k_eam_fast(__global numtyp4 *x_, __global numtyp *fp_,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -41,10 +41,10 @@ class EAM : public BaseAtomic<numtyp, acctyp> {
|
|||
int init(const int ntypes, double host_cutforcesq, int **host_type2rhor,
|
||||
int **host_type2z2r, int *host_type2frho, double ***host_rhor_spline,
|
||||
double ***host_z2r_spline, double ***host_frho_spline, double rdr,
|
||||
double rdrho, int nrhor, int nrho, int nz2r, int nfrho, int nr,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size, const double gpu_split,
|
||||
FILE *_screen);
|
||||
double rdrho, double rhomax, int nrhor, int nrho, int nz2r,
|
||||
int nfrho, int nr, const int nlocal, const int nall,
|
||||
const int max_nbors, const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *_screen);
|
||||
|
||||
// Copy charges to device asynchronously
|
||||
inline void add_fp_data() {
|
||||
|
@ -112,7 +112,7 @@ class EAM : public BaseAtomic<numtyp, acctyp> {
|
|||
UCL_D_Vec<numtyp4> frho_spline1, frho_spline2;
|
||||
UCL_D_Vec<numtyp4> rhor_spline1, rhor_spline2;
|
||||
|
||||
numtyp _cutforcesq,_rdr,_rdrho;
|
||||
numtyp _cutforcesq,_rdr,_rdrho, _rhomax;
|
||||
|
||||
int _nfrho,_nrhor,_nrho,_nz2r,_nr;
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ int eam_gpu_init(const int ntypes, double host_cutforcesq,
|
|||
int **host_type2rhor, int **host_type2z2r, int *host_type2frho,
|
||||
double ***host_rhor_spline, double ***host_z2r_spline,
|
||||
double ***host_frho_spline,
|
||||
double rdr, double rdrho, int nrhor,
|
||||
double rdr, double rdrho, double rhomax, int nrhor,
|
||||
int nrho, int nz2r, int nfrho, int nr,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
|
@ -66,9 +66,9 @@ int eam_gpu_init(const int ntypes, double host_cutforcesq,
|
|||
if (world_me==0)
|
||||
init_ok=EAMMF.init(ntypes, host_cutforcesq, host_type2rhor, host_type2z2r,
|
||||
host_type2frho, host_rhor_spline, host_z2r_spline,
|
||||
host_frho_spline, rdr, rdrho, nrhor, nrho, nz2r, nfrho,
|
||||
nr, nlocal, nall, 300, maxspecial, cell_size, gpu_split,
|
||||
screen);
|
||||
host_frho_spline, rdr, rdrho, rhomax, nrhor, nrho, nz2r,
|
||||
nfrho, nr, nlocal, nall, 300, maxspecial, cell_size,
|
||||
gpu_split, screen);
|
||||
|
||||
EAMMF.device->world_barrier();
|
||||
if (message)
|
||||
|
@ -86,9 +86,9 @@ int eam_gpu_init(const int ntypes, double host_cutforcesq,
|
|||
if (gpu_rank==i && world_me!=0)
|
||||
init_ok=EAMMF.init(ntypes, host_cutforcesq, host_type2rhor, host_type2z2r,
|
||||
host_type2frho, host_rhor_spline, host_z2r_spline,
|
||||
host_frho_spline, rdr, rdrho, nrhor, nrho, nz2r, nfrho,
|
||||
nr, nlocal, nall, 300, maxspecial, cell_size,
|
||||
gpu_split, screen);
|
||||
host_frho_spline, rdr, rdrho, rhomax, nrhor, nrho,
|
||||
nz2r, nfrho, nr, nlocal, nall, 300, maxspecial,
|
||||
cell_size, gpu_split, screen);
|
||||
|
||||
EAMMF.device->gpu_barrier();
|
||||
if (message)
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#include "lal_preprocessor.h"
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex, quat_tex;
|
||||
#else
|
||||
|
@ -30,11 +30,6 @@ texture<int4,1> pos_tex, quat_tex;
|
|||
#define quat_tex qif
|
||||
#endif
|
||||
|
||||
#define atom_info(t_per_atom, ii, tid, offset) \
|
||||
tid=THREAD_ID_X; \
|
||||
offset=tid & (t_per_atom-1); \
|
||||
ii=fast_mul((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom)+tid/t_per_atom;
|
||||
|
||||
#define nbor_info_e(nbor_mem, nbor_stride, t_per_atom, ii, offset, \
|
||||
i, numj, stride, list_end, nbor) \
|
||||
nbor=nbor_mem+ii; \
|
||||
|
@ -42,55 +37,11 @@ texture<int4,1> pos_tex, quat_tex;
|
|||
nbor+=nbor_stride; \
|
||||
numj=*nbor; \
|
||||
nbor+=nbor_stride; \
|
||||
list_end=nbor+fast_mul(nbor_stride,numj); \
|
||||
nbor+=fast_mul(offset,nbor_stride); \
|
||||
list_end=nbor+fast_mul(nbor_stride,numj); \
|
||||
nbor+=fast_mul(offset,nbor_stride); \
|
||||
stride=fast_mul(t_per_atom,nbor_stride);
|
||||
|
||||
#define store_answers(f, energy, virial, ii, inum, tid, t_per_atom, offset, \
|
||||
eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
__local acctyp red_acc[6][BLOCK_PAIR]; \
|
||||
red_acc[0][tid]=f.x; \
|
||||
red_acc[1][tid]=f.y; \
|
||||
red_acc[2][tid]=f.z; \
|
||||
red_acc[3][tid]=energy; \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
if (offset < s) { \
|
||||
for (int r=0; r<4; r++) \
|
||||
red_acc[r][tid] += red_acc[r][tid+s]; \
|
||||
} \
|
||||
} \
|
||||
f.x=red_acc[0][tid]; \
|
||||
f.y=red_acc[1][tid]; \
|
||||
f.z=red_acc[2][tid]; \
|
||||
energy=red_acc[3][tid]; \
|
||||
if (vflag>0) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
red_acc[r][tid]=virial[r]; \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
if (offset < s) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
red_acc[r][tid] += red_acc[r][tid+s]; \
|
||||
} \
|
||||
} \
|
||||
for (int r=0; r<6; r++) \
|
||||
virial[r]=red_acc[r][tid]; \
|
||||
} \
|
||||
} \
|
||||
if (offset==0) { \
|
||||
engv+=ii; \
|
||||
if (eflag>0) { \
|
||||
*engv=energy; \
|
||||
engv+=inum; \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (int i=0; i<6; i++) { \
|
||||
*engv=virial[i]; \
|
||||
engv+=inum; \
|
||||
} \
|
||||
} \
|
||||
ans[ii]=f; \
|
||||
}
|
||||
#if (ARCH < 300)
|
||||
|
||||
#define store_answers_t(f, tor, energy, virial, ii, astride, tid, \
|
||||
t_per_atom, offset, eflag, vflag, ans, engv) \
|
||||
|
@ -195,6 +146,80 @@ texture<int4,1> pos_tex, quat_tex;
|
|||
ans[ii]=old; \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define store_answers_t(f, tor, energy, virial, ii, astride, tid, \
|
||||
t_per_atom, offset, eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
f.x += shfl_xor(f.x, s, t_per_atom); \
|
||||
f.y += shfl_xor(f.y, s, t_per_atom); \
|
||||
f.z += shfl_xor(f.z, s, t_per_atom); \
|
||||
tor.x += shfl_xor(tor.x, s, t_per_atom); \
|
||||
tor.y += shfl_xor(tor.y, s, t_per_atom); \
|
||||
tor.z += shfl_xor(tor.z, s, t_per_atom); \
|
||||
energy += shfl_xor(energy, s, t_per_atom); \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
virial[r] += shfl_xor(virial[r], s, t_per_atom); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (offset==0) { \
|
||||
__global acctyp *ap1=engv+ii; \
|
||||
if (eflag>0) { \
|
||||
*ap1=energy; \
|
||||
ap1+=astride; \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (int i=0; i<6; i++) { \
|
||||
*ap1=virial[i]; \
|
||||
ap1+=astride; \
|
||||
} \
|
||||
} \
|
||||
ans[ii]=f; \
|
||||
ans[ii+astride]=tor; \
|
||||
}
|
||||
|
||||
#define acc_answers(f, energy, virial, ii, inum, tid, t_per_atom, offset, \
|
||||
eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
f.x += shfl_xor(f.x, s, t_per_atom); \
|
||||
f.y += shfl_xor(f.y, s, t_per_atom); \
|
||||
f.z += shfl_xor(f.z, s, t_per_atom); \
|
||||
energy += shfl_xor(energy, s, t_per_atom); \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
virial[r] += shfl_xor(virial[r], s, t_per_atom); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (offset==0) { \
|
||||
engv+=ii; \
|
||||
if (eflag>0) { \
|
||||
*engv+=energy; \
|
||||
engv+=inum; \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (int i=0; i<6; i++) { \
|
||||
*engv+=virial[i]; \
|
||||
engv+=inum; \
|
||||
} \
|
||||
} \
|
||||
acctyp4 old=ans[ii]; \
|
||||
old.x+=f.x; \
|
||||
old.y+=f.y; \
|
||||
old.z+=f.z; \
|
||||
ans[ii]=old; \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
dot product of 2 vectors
|
||||
------------------------------------------------------------------------- */
|
||||
|
|
|
@ -29,22 +29,24 @@ texture<int4,1> pos_tex;
|
|||
// -- Only unpack neighbors matching the specified inclusive range of forms
|
||||
// -- Only unpack neighbors within cutoff
|
||||
// ---------------------------------------------------------------------------
|
||||
__kernel void kernel_nbor(__global numtyp4 *x_, __global numtyp2 *cut_form,
|
||||
const int ntypes, __global int *dev_nbor,
|
||||
__kernel void kernel_nbor(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp2 *restrict cut_form,
|
||||
const int ntypes,
|
||||
__global int *dev_nbor,
|
||||
const int nbor_pitch, const int start, const int inum,
|
||||
__global int *dev_ij, const int form_low,
|
||||
const int form_high) {
|
||||
const __global int *dev_ij,
|
||||
const int form_low, const int form_high) {
|
||||
|
||||
// ii indexes the two interacting particles in gi
|
||||
int ii=GLOBAL_ID_X+start;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor=dev_ij+ii;
|
||||
const __global int *nbor=dev_ij+ii;
|
||||
int i=*nbor;
|
||||
nbor+=nbor_pitch;
|
||||
int numj=*nbor;
|
||||
nbor+=nbor_pitch;
|
||||
__global int *list_end=nbor+fast_mul(numj,nbor_pitch);
|
||||
const __global int *list_end=nbor+fast_mul(numj,nbor_pitch);
|
||||
__global int *packed=dev_nbor+ii+nbor_pitch+nbor_pitch;
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
|
@ -84,11 +86,13 @@ __kernel void kernel_nbor(__global numtyp4 *x_, __global numtyp2 *cut_form,
|
|||
// -- Only unpack neighbors within cutoff
|
||||
// -- Fast version of routine that uses shared memory for LJ constants
|
||||
// ---------------------------------------------------------------------------
|
||||
__kernel void kernel_nbor_fast(__global numtyp4 *x_, __global numtyp2 *cut_form,
|
||||
__global int *dev_nbor, const int nbor_pitch,
|
||||
const int start, const int inum,
|
||||
__global int *dev_ij, const int form_low,
|
||||
const int form_high) {
|
||||
__kernel void kernel_nbor_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp2 *restrict cut_form,
|
||||
__global int *dev_nbor,
|
||||
const int nbor_pitch, const int start,
|
||||
const int inum,
|
||||
const __global int *dev_ij,
|
||||
const int form_low, const int form_high) {
|
||||
|
||||
int ii=THREAD_ID_X;
|
||||
__local int form[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
|
@ -101,12 +105,12 @@ __kernel void kernel_nbor_fast(__global numtyp4 *x_, __global numtyp2 *cut_form,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor=dev_ij+ii;
|
||||
const __global int *nbor=dev_ij+ii;
|
||||
int i=*nbor;
|
||||
nbor+=nbor_pitch;
|
||||
int numj=*nbor;
|
||||
nbor+=nbor_pitch;
|
||||
__global int *list_end=nbor+fast_mul(numj,nbor_pitch);
|
||||
const __global int *list_end=nbor+fast_mul(numj,nbor_pitch);
|
||||
__global int *packed=dev_nbor+ii+nbor_pitch+nbor_pitch;
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
|
|
|
@ -24,12 +24,15 @@ texture<int4,1> pos_tex;
|
|||
#define pos_tex x_
|
||||
#endif
|
||||
|
||||
__kernel void k_gauss(__global numtyp4 *x_, __global numtyp4 *gauss1,
|
||||
__kernel void k_gauss(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict gauss1,
|
||||
const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
@ -48,7 +51,7 @@ __kernel void k_gauss(__global numtyp4 *x_, __global numtyp4 *gauss1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -104,10 +107,13 @@ __kernel void k_gauss(__global numtyp4 *x_, __global numtyp4 *gauss1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_gauss_fast(__global numtyp4 *x_, __global numtyp4 *gauss1_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
__kernel void k_gauss_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict gauss1_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
|
@ -131,7 +137,7 @@ __kernel void k_gauss_fast(__global numtyp4 *x_, __global numtyp4 *gauss1_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -80,13 +80,20 @@ ucl_inline void compute_eta_torque(numtyp m[9],numtyp m2[9], const numtyp4 shape
|
|||
m[6]*m[1]*m2[7]-(numtyp)2.0*m2[8]*m[3]*m[1])*den;
|
||||
}
|
||||
|
||||
__kernel void k_gayberne(__global numtyp4* x_,__global numtyp4 *q,
|
||||
__global numtyp4* shape, __global numtyp4* well,
|
||||
__global numtyp *gum, __global numtyp2* sig_eps,
|
||||
const int ntypes, __global numtyp *lshape,
|
||||
__global int *dev_nbor, const int stride,
|
||||
__global acctyp4 *ans, const int astride,
|
||||
__global acctyp *engv, __global int *err_flag,
|
||||
__kernel void k_gayberne(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict q,
|
||||
const __global numtyp4 *restrict shape,
|
||||
const __global numtyp4 *restrict well,
|
||||
const __global numtyp *restrict gum,
|
||||
const __global numtyp2 *restrict sig_eps,
|
||||
const int ntypes,
|
||||
const __global numtyp *restrict lshape,
|
||||
const __global int *dev_nbor,
|
||||
const int stride,
|
||||
__global acctyp4 *restrict ans,
|
||||
const int astride,
|
||||
__global acctyp *restrict engv,
|
||||
__global int *restrict err_flag,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
|
@ -112,7 +119,7 @@ __kernel void k_gayberne(__global numtyp4* x_,__global numtyp4 *q,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *nbor_end;
|
||||
const __global int *nbor, *nbor_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info_e(dev_nbor,stride,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
|
|
@ -17,15 +17,22 @@
|
|||
#include "lal_ellipsoid_extra.h"
|
||||
#endif
|
||||
|
||||
__kernel void k_gayberne_sphere_ellipsoid(__global numtyp4 *x_,
|
||||
__global numtyp4 *q, __global numtyp4* shape,
|
||||
__global numtyp4* well, __global numtyp *gum,
|
||||
__global numtyp2* sig_eps, const int ntypes,
|
||||
__global numtyp *lshape, __global int *dev_nbor,
|
||||
const int stride, __global acctyp4 *ans,
|
||||
__global acctyp *engv, __global int *err_flag,
|
||||
const int eflag, const int vflag,const int start,
|
||||
const int inum, const int t_per_atom) {
|
||||
__kernel void k_gayberne_sphere_ellipsoid(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict q,
|
||||
const __global numtyp4 *restrict shape,
|
||||
const __global numtyp4 *restrict well,
|
||||
const __global numtyp *restrict gum,
|
||||
const __global numtyp2 *restrict sig_eps,
|
||||
const int ntypes,
|
||||
const __global numtyp *restrict lshape,
|
||||
const __global int *dev_nbor,
|
||||
const int stride,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
__global int *restrict err_flag,
|
||||
const int eflag, const int vflag,
|
||||
const int start, const int inum,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
ii+=start;
|
||||
|
@ -46,7 +53,7 @@ __kernel void k_gayberne_sphere_ellipsoid(__global numtyp4 *x_,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *nbor_end;
|
||||
const __global int *nbor, *nbor_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info_e(dev_nbor,stride,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
@ -236,11 +243,16 @@ __kernel void k_gayberne_sphere_ellipsoid(__global numtyp4 *x_,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_gayberne_lj(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
__global numtyp4* lj3, const int lj_types,
|
||||
__global numtyp *gum, const int stride,
|
||||
__global int *dev_ij, __global acctyp4 *ans,
|
||||
__global acctyp *engv, __global int *err_flag,
|
||||
__kernel void k_gayberne_lj(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict gum,
|
||||
const int stride,
|
||||
const __global int *dev_ij,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
__global int *restrict err_flag,
|
||||
const int eflag, const int vflag, const int start,
|
||||
const int inum, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
|
@ -263,7 +275,7 @@ __kernel void k_gayberne_lj(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info_e(dev_ij,stride,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -318,13 +330,18 @@ __kernel void k_gayberne_lj(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_gayberne_lj_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
||||
__global numtyp4* lj3_in, __global numtyp *gum,
|
||||
const int stride, __global int *dev_ij,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
__global int *err_flag, const int eflag,
|
||||
const int vflag, const int start,
|
||||
const int inum, const int t_per_atom) {
|
||||
__kernel void k_gayberne_lj_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict gum,
|
||||
const int stride,
|
||||
const __global int *dev_ij,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
__global int *restrict err_flag,
|
||||
const int eflag, const int vflag,
|
||||
const int start, const int inum,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
ii+=start;
|
||||
|
@ -352,7 +369,7 @@ __kernel void k_gayberne_lj_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info_e(dev_ij,stride,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -24,13 +24,17 @@ texture<int4,1> pos_tex;
|
|||
#define pos_tex x_
|
||||
#endif
|
||||
|
||||
__kernel void k_lj(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
__global numtyp4* lj3, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
__kernel void k_lj(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -48,7 +52,7 @@ __kernel void k_lj(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -103,13 +107,16 @@ __kernel void k_lj(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_lj_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
||||
__global numtyp4* lj3_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
__kernel void k_lj_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -134,7 +141,7 @@ __kernel void k_lj_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -24,13 +24,17 @@ texture<int4,1> pos_tex;
|
|||
#define pos_tex x_
|
||||
#endif
|
||||
|
||||
__kernel void k_lj96(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
__global numtyp4* lj3, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
__kernel void k_lj96(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -48,7 +52,7 @@ __kernel void k_lj96(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -104,13 +108,16 @@ __kernel void k_lj96(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_lj96_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
||||
__global numtyp4* lj3_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
__kernel void k_lj96_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -135,7 +142,7 @@ __kernel void k_lj96_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -29,15 +29,20 @@ texture<int2> q_tex;
|
|||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
__kernel void k_lj_class2_long(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
__global numtyp4* lj3, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
__kernel void k_lj_class2_long(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag,
|
||||
const int inum, const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -60,7 +65,7 @@ __kernel void k_lj_class2_long(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -141,15 +146,21 @@ __kernel void k_lj_class2_long(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_lj_class2_long_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
||||
__global numtyp4* lj3_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
__kernel void k_lj_class2_long_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag,
|
||||
const int inum, const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const numtyp cut_coulsq,
|
||||
const numtyp qqrd2e,
|
||||
const numtyp g_ewald,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -175,7 +186,7 @@ __kernel void k_lj_class2_long_fast(__global numtyp4 *x_, __global numtyp4 *lj1_
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -29,15 +29,20 @@ texture<int2> q_tex;
|
|||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
__kernel void k_lj_coul(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
__global numtyp4* lj3, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_ ,
|
||||
__global numtyp *cutsq, const numtyp qqrd2e,
|
||||
const int t_per_atom) {
|
||||
__kernel void k_lj_coul(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp *restrict cutsq,
|
||||
const numtyp qqrd2e, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -60,7 +65,7 @@ __kernel void k_lj_coul(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -132,15 +137,19 @@ __kernel void k_lj_coul(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_lj_coul_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
||||
__global numtyp4* lj3_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
__global numtyp *_cutsq, const numtyp qqrd2e,
|
||||
const int t_per_atom) {
|
||||
__kernel void k_lj_coul_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp *restrict _cutsq,
|
||||
const numtyp qqrd2e, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -168,7 +177,7 @@ __kernel void k_lj_coul_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -29,16 +29,21 @@ texture<int2> q_tex;
|
|||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
__kernel void k_lj_debye_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
__global numtyp4* lj3, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_ ,
|
||||
__global numtyp *cutsq, const numtyp qqrd2e,
|
||||
const numtyp kappa,
|
||||
const int t_per_atom) {
|
||||
__kernel void k_lj_debye(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_ ,
|
||||
const __global numtyp *restrict cutsq,
|
||||
const numtyp qqrd2e, const numtyp kappa,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -61,7 +66,7 @@ __kernel void k_lj_debye_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -139,16 +144,20 @@ __kernel void k_lj_debye_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_lj_debye_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
||||
__global numtyp4* lj3_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
__global numtyp *_cutsq, const numtyp qqrd2e,
|
||||
const numtyp kappa,
|
||||
const int t_per_atom) {
|
||||
__kernel void k_lj_debye_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp *restrict _cutsq,
|
||||
const numtyp qqrd2e, const numtyp kappa,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -176,7 +185,7 @@ __kernel void k_lj_debye_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -29,15 +29,20 @@ texture<int2> q_tex;
|
|||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
__kernel void k_lj_coul_long(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
__global numtyp4* lj3, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
__kernel void k_lj_coul_long(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -60,7 +65,7 @@ __kernel void k_lj_coul_long(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -139,15 +144,19 @@ __kernel void k_lj_coul_long(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_lj_coul_long_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
||||
__global numtyp4* lj3_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
__kernel void k_lj_coul_long_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag,
|
||||
const int inum, const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -173,7 +182,7 @@ __kernel void k_lj_coul_long_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -31,13 +31,18 @@ texture<int2> q_tex;
|
|||
|
||||
#define MY_PIS (acctyp)1.77245385090551602729
|
||||
|
||||
__kernel void k_lj_dsf(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
__global numtyp4* lj3, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_ ,
|
||||
__kernel void k_lj_dsf(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_ ,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp e_shift, const numtyp f_shift,
|
||||
const numtyp alpha, const int t_per_atom) {
|
||||
|
@ -63,7 +68,7 @@ __kernel void k_lj_dsf(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -72,6 +77,12 @@ __kernel void k_lj_dsf(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
int itype=ix.w;
|
||||
|
||||
if (eflag>0) {
|
||||
acctyp e_self = -((acctyp)0.5*e_shift + alpha/MY_PIS) *
|
||||
qtmp*qtmp*qqrd2e/(acctyp)t_per_atom;
|
||||
e_coul += (acctyp)2.0*e_self;
|
||||
}
|
||||
|
||||
for ( ; nbor<list_end; nbor+=n_stride) {
|
||||
int j=*nbor;
|
||||
|
||||
|
@ -107,8 +118,8 @@ __kernel void k_lj_dsf(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
numtyp erfcd = ucl_exp(-alpha*alpha*rsq);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*alpha*r);
|
||||
erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd;
|
||||
forcecoul = prefactor * (erfcc + 2.0*alpha/MY_PIS*r*erfcd +
|
||||
rsq*f_shift);
|
||||
forcecoul = prefactor * (erfcc + (numtyp)2.0*alpha/MY_PIS*r*erfcd +
|
||||
rsq*f_shift);
|
||||
} else
|
||||
forcecoul = (numtyp)0.0;
|
||||
|
||||
|
@ -144,12 +155,17 @@ __kernel void k_lj_dsf(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_lj_dsf_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
||||
__global numtyp4* lj3_in, __global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
__kernel void k_lj_dsf_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp e_shift, const numtyp f_shift,
|
||||
const numtyp alpha, const int t_per_atom) {
|
||||
|
@ -178,7 +194,7 @@ __kernel void k_lj_dsf_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -188,6 +204,12 @@ __kernel void k_lj_dsf_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
int iw=ix.w;
|
||||
int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
|
||||
|
||||
if (eflag>0) {
|
||||
acctyp e_self = -((acctyp)0.5*e_shift + alpha/MY_PIS) *
|
||||
qtmp*qtmp*qqrd2e/(acctyp)t_per_atom;
|
||||
e_coul += (acctyp)2.0*e_self;
|
||||
}
|
||||
|
||||
for ( ; nbor<list_end; nbor+=n_stride) {
|
||||
int j=*nbor;
|
||||
|
||||
|
@ -222,8 +244,8 @@ __kernel void k_lj_dsf_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
numtyp erfcd = ucl_exp(-alpha*alpha*rsq);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*alpha*r);
|
||||
erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd;
|
||||
forcecoul = prefactor * (erfcc + 2.0*alpha/MY_PIS*r*erfcd +
|
||||
rsq*f_shift);
|
||||
forcecoul = prefactor * (erfcc + (numtyp)2.0*alpha/MY_PIS*r*erfcd +
|
||||
rsq*f_shift);
|
||||
} else
|
||||
forcecoul = (numtyp)0.0;
|
||||
|
||||
|
|
|
@ -26,12 +26,16 @@ texture<int4,1> pos_tex;
|
|||
#define pos_tex x_
|
||||
#endif
|
||||
|
||||
__kernel void k_lj_expand(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
__global numtyp4* lj3, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
__kernel void k_lj_expand(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
@ -50,7 +54,7 @@ __kernel void k_lj_expand(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -108,11 +112,14 @@ __kernel void k_lj_expand(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_lj_expand_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
||||
__global numtyp4* lj3_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
__kernel void k_lj_expand_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
|
@ -139,7 +146,7 @@ __kernel void k_lj_expand_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -26,13 +26,17 @@ texture<int4,1> pos_tex;
|
|||
#define pos_tex x_
|
||||
#endif
|
||||
|
||||
__kernel void k_morse(__global numtyp4 *x_, __global numtyp4 *mor1,
|
||||
__global numtyp2* mor2, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
__kernel void k_morse(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict mor1,
|
||||
const __global numtyp2 *restrict mor2,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -50,7 +54,7 @@ __kernel void k_morse(__global numtyp4 *x_, __global numtyp4 *mor1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -106,13 +110,16 @@ __kernel void k_morse(__global numtyp4 *x_, __global numtyp4 *mor1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_morse_fast(__global numtyp4 *x_, __global numtyp4 *mor1_in,
|
||||
__global numtyp2* mor2_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
__kernel void k_morse_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict mor1_in,
|
||||
const __global numtyp2 *restrict mor2_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -137,7 +144,7 @@ __kernel void k_morse_fast(__global numtyp4 *x_, __global numtyp4 *mor1_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -17,7 +17,8 @@
|
|||
#include "lal_preprocessor.h"
|
||||
#endif
|
||||
|
||||
__kernel void kernel_unpack(__global int *dev_nbor, __global int *dev_ij,
|
||||
__kernel void kernel_unpack(__global int *dev_nbor,
|
||||
const __global int *dev_ij,
|
||||
const int inum, const int t_per_atom) {
|
||||
int tid=THREAD_ID_X;
|
||||
int offset=tid & (t_per_atom-1);
|
||||
|
@ -27,8 +28,8 @@ __kernel void kernel_unpack(__global int *dev_nbor, __global int *dev_ij,
|
|||
__global int *nbor=dev_nbor+ii+inum;
|
||||
int numj=*nbor;
|
||||
nbor+=inum;
|
||||
__global int *list=dev_ij+*nbor;
|
||||
__global int *list_end=list+numj;
|
||||
const __global int *list=dev_ij+*nbor;
|
||||
const __global int *list_end=list+numj;
|
||||
list+=offset;
|
||||
nbor+=fast_mul(ii,t_per_atom-1)+offset;
|
||||
int stride=fast_mul(t_per_atom,inum);
|
||||
|
|
|
@ -22,7 +22,9 @@ texture<float4> pos_tex;
|
|||
texture<int4,1> pos_tex;
|
||||
#endif
|
||||
|
||||
__kernel void calc_cell_id(numtyp4 *pos, unsigned *cell_id, int *particle_id,
|
||||
__kernel void calc_cell_id(const numtyp4 *restrict pos,
|
||||
unsigned *restrict cell_id,
|
||||
int *restrict particle_id,
|
||||
numtyp boxlo0, numtyp boxlo1, numtyp boxlo2,
|
||||
numtyp i_cell_size, int ncellx, int ncelly,
|
||||
int ncellz, int inum, int nall,
|
||||
|
@ -62,8 +64,9 @@ __kernel void calc_cell_id(numtyp4 *pos, unsigned *cell_id, int *particle_id,
|
|||
}
|
||||
}
|
||||
|
||||
__kernel void kernel_calc_cell_counts(unsigned *cell_id,
|
||||
int *cell_counts, int nall, int ncell) {
|
||||
__kernel void kernel_calc_cell_counts(const unsigned *restrict cell_id,
|
||||
int *restrict cell_counts,
|
||||
int nall, int ncell) {
|
||||
int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (idx < nall) {
|
||||
int id = cell_id[idx];
|
||||
|
@ -94,8 +97,9 @@ __kernel void kernel_calc_cell_counts(unsigned *cell_id,
|
|||
|
||||
|
||||
|
||||
__kernel void transpose(__global int *out, __global int *in, int columns_in,
|
||||
int rows_in)
|
||||
__kernel void transpose(__global int *restrict out,
|
||||
const __global int *restrict in,
|
||||
int columns_in, int rows_in)
|
||||
{
|
||||
__local int block[BLOCK_CELL_2D][BLOCK_CELL_2D+1];
|
||||
|
||||
|
@ -117,9 +121,9 @@ __kernel void transpose(__global int *out, __global int *in, int columns_in,
|
|||
out[j*rows_in+i] = block[ti][tj];
|
||||
}
|
||||
|
||||
__kernel void calc_neigh_list_cell(__global numtyp4 *x_,
|
||||
__global int *cell_particle_id,
|
||||
__global int *cell_counts,
|
||||
__kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_,
|
||||
const __global int *restrict cell_particle_id,
|
||||
const __global int *restrict cell_counts,
|
||||
__global int *nbor_list,
|
||||
__global int *host_nbor_list,
|
||||
__global int *host_numj,
|
||||
|
@ -234,8 +238,10 @@ __kernel void calc_neigh_list_cell(__global numtyp4 *x_,
|
|||
|
||||
__kernel void kernel_special(__global int *dev_nbor,
|
||||
__global int *host_nbor_list,
|
||||
__global int *host_numj, __global int *tag,
|
||||
__global int *nspecial, __global int *special,
|
||||
const __global int *host_numj,
|
||||
const __global int *restrict tag,
|
||||
const __global int *restrict nspecial,
|
||||
const __global int *restrict special,
|
||||
int inum, int nt, int max_nbors, int t_per_atom) {
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=fast_mul((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
|
||||
|
|
|
@ -41,15 +41,18 @@ texture<int2> q_tex;
|
|||
// Number of pencils per block for charge spread
|
||||
#define BLOCK_PENCILS (PPPM_BLOCK_1D/PENCIL_SIZE)
|
||||
|
||||
__kernel void particle_map(__global numtyp4 *x_, __global numtyp *q_,
|
||||
__kernel void particle_map(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp *restrict q_,
|
||||
const grdtyp delvolinv, const int nlocal,
|
||||
__global int *counts, __global grdtyp4 *ans,
|
||||
__global int *restrict counts,
|
||||
__global grdtyp4 *restrict ans,
|
||||
const grdtyp b_lo_x, const grdtyp b_lo_y,
|
||||
const grdtyp b_lo_z, const grdtyp delxinv,
|
||||
const grdtyp delyinv, const grdtyp delzinv,
|
||||
const int nlocal_x, const int nlocal_y,
|
||||
const int nlocal_z, const int atom_stride,
|
||||
const int max_atoms, __global int *error) {
|
||||
const int max_atoms,
|
||||
__global int *restrict error) {
|
||||
// ii indexes the two interacting particles in gi
|
||||
int ii=GLOBAL_ID_X;
|
||||
|
||||
|
@ -97,8 +100,10 @@ __kernel void particle_map(__global numtyp4 *x_, __global numtyp *q_,
|
|||
|
||||
/* --------------------------- */
|
||||
|
||||
__kernel void make_rho(__global int *counts, __global grdtyp4 *atoms,
|
||||
__global grdtyp *brick, __global grdtyp *_rho_coeff,
|
||||
__kernel void make_rho(const __global int *restrict counts,
|
||||
const __global grdtyp4 *restrict atoms,
|
||||
__global grdtyp *restrict brick,
|
||||
const __global grdtyp *restrict _rho_coeff,
|
||||
const int atom_stride, const int npts_x,
|
||||
const int npts_y, const int npts_z, const int nlocal_x,
|
||||
const int nlocal_y, const int nlocal_z,
|
||||
|
@ -192,15 +197,17 @@ __kernel void make_rho(__global int *counts, __global grdtyp4 *atoms,
|
|||
}
|
||||
}
|
||||
|
||||
__kernel void interp(__global numtyp4 *x_, __global numtyp *q_,
|
||||
const int nlocal, __global grdtyp4 *brick,
|
||||
__global grdtyp *_rho_coeff, const int npts_x,
|
||||
const int npts_yx, const grdtyp b_lo_x,
|
||||
__kernel void interp(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp *restrict q_,
|
||||
const int nlocal,
|
||||
const __global grdtyp4 *restrict brick,
|
||||
const __global grdtyp *restrict _rho_coeff,
|
||||
const int npts_x, const int npts_yx, const grdtyp b_lo_x,
|
||||
const grdtyp b_lo_y, const grdtyp b_lo_z,
|
||||
const grdtyp delxinv, const grdtyp delyinv,
|
||||
const grdtyp delzinv, const int order,
|
||||
const int order2, const grdtyp qqrd2e_scale,
|
||||
__global acctyp4 *ans) {
|
||||
__global acctyp4 *restrict ans) {
|
||||
__local grdtyp rho_coeff[PPPM_MAX_SPLINE*PPPM_MAX_SPLINE];
|
||||
__local grdtyp rho1d_0[PPPM_MAX_SPLINE][PPPM_BLOCK_1D];
|
||||
__local grdtyp rho1d_1[PPPM_MAX_SPLINE][PPPM_BLOCK_1D];
|
||||
|
|
|
@ -56,8 +56,7 @@
|
|||
// Definition: Default thread block size for "bio" pair styles
|
||||
// MAX_BIO_SHARED_TYPES
|
||||
// Definition: Max # of atom type params can be stored in shared memory
|
||||
// Restrictions: MAX_BIO_SHARED_TYPES<=BLOCK_BIO_PAIR*2 &&
|
||||
// MAX_BIO_SHARED_TYPES>=BLOCK_BIO_PAIR
|
||||
// Restrictions: MAX_BIO_SHARED_TYPES<=BLOCK_BIO_PAIR*2
|
||||
//
|
||||
//*************************************************************************/
|
||||
|
||||
|
@ -80,6 +79,7 @@
|
|||
#define __kernel extern "C" __global__
|
||||
#define __local __shared__
|
||||
#define __global
|
||||
#define restrict __restrict__
|
||||
#define atom_add atomicAdd
|
||||
#define ucl_inline static __inline__ __device__
|
||||
|
||||
|
@ -116,8 +116,22 @@
|
|||
#define BLOCK_NBOR_BUILD 128
|
||||
#define BLOCK_PAIR 512
|
||||
#define BLOCK_BIO_PAIR 512
|
||||
#define BLOCK_ELLIPSE 256
|
||||
#define MAX_SHARED_TYPES 11
|
||||
|
||||
#ifdef _SINGLE_SINGLE
|
||||
#define shfl_xor __shfl_xor
|
||||
#else
|
||||
ucl_inline double shfl_xor(double var, int laneMask, int width) {
|
||||
int2 tmp;
|
||||
tmp.x = __double2hiint(var);
|
||||
tmp.y = __double2loint(var);
|
||||
tmp.x = __shfl_xor(tmp.x,laneMask,width);
|
||||
tmp.y = __shfl_xor(tmp.y,laneMask,width);
|
||||
return __hiloint2double(tmp.x,tmp.y);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -380,3 +394,7 @@ typedef struct _double4 double4;
|
|||
#define NEIGHMASK 0x3FFFFFFF
|
||||
ucl_inline int sbmask(int j) { return j >> SBBITS & 3; };
|
||||
|
||||
#ifndef BLOCK_ELLIPSE
|
||||
#define BLOCK_ELLIPSE BLOCK_PAIR
|
||||
#endif
|
||||
|
||||
|
|
|
@ -32,14 +32,20 @@ ucl_inline numtyp det_prime(const numtyp m[9], const numtyp m2[9])
|
|||
return ans;
|
||||
}
|
||||
|
||||
__kernel void k_resquared(__global numtyp4* x_,__global numtyp4 *q,
|
||||
__global numtyp4* shape, __global numtyp4* well,
|
||||
__global numtyp *splj, __global numtyp2* sig_eps,
|
||||
const int ntypes, __global int *dev_nbor,
|
||||
const int stride, __global acctyp4 *ans,
|
||||
const int astride, __global acctyp *engv,
|
||||
__global int *err_flag, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
__kernel void k_resquared(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict q,
|
||||
const __global numtyp4 *restrict shape,
|
||||
const __global numtyp4 *restrict well,
|
||||
const __global numtyp *restrict splj,
|
||||
const __global numtyp2 *restrict sig_eps,
|
||||
const int ntypes,
|
||||
const __global int *dev_nbor,
|
||||
const int stride,
|
||||
__global acctyp4 *restrict ans,
|
||||
const int astride,
|
||||
__global acctyp *restrict engv,
|
||||
__global int *restrict err_flag,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
@ -68,7 +74,7 @@ __kernel void k_resquared(__global numtyp4* x_,__global numtyp4 *q,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *nbor_end;
|
||||
const __global int *nbor, *nbor_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info_e(dev_nbor,stride,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
|
|
@ -17,15 +17,132 @@
|
|||
#include "lal_ellipsoid_extra.h"
|
||||
#endif
|
||||
|
||||
__kernel void k_resquared_ellipsoid_sphere(__global numtyp4* x_,
|
||||
__global numtyp4 *q, __global numtyp4* shape,
|
||||
__global numtyp4* well, __global numtyp *splj,
|
||||
__global numtyp2* sig_eps, const int ntypes,
|
||||
__global int *dev_nbor, const int stride,
|
||||
__global acctyp4 *ans, const int astride,
|
||||
__global acctyp *engv, __global int *err_flag,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int t_per_atom) {
|
||||
#if (ARCH < 300)
|
||||
|
||||
#define store_answers_rt(f, tor, energy, virial, ii, astride, tid, \
|
||||
t_per_atom, offset, eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
__local acctyp red_acc[7][BLOCK_PAIR]; \
|
||||
red_acc[0][tid]=f.x; \
|
||||
red_acc[1][tid]=f.y; \
|
||||
red_acc[2][tid]=f.z; \
|
||||
red_acc[3][tid]=tor.x; \
|
||||
red_acc[4][tid]=tor.y; \
|
||||
red_acc[5][tid]=tor.z; \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
if (offset < s) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
red_acc[r][tid] += red_acc[r][tid+s]; \
|
||||
} \
|
||||
} \
|
||||
f.x=red_acc[0][tid]; \
|
||||
f.y=red_acc[1][tid]; \
|
||||
f.z=red_acc[2][tid]; \
|
||||
tor.x=red_acc[3][tid]; \
|
||||
tor.y=red_acc[4][tid]; \
|
||||
tor.z=red_acc[5][tid]; \
|
||||
if (eflag>0 || vflag>0) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
red_acc[r][tid]=virial[r]; \
|
||||
red_acc[6][tid]=energy; \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
if (offset < s) { \
|
||||
for (int r=0; r<7; r++) \
|
||||
red_acc[r][tid] += red_acc[r][tid+s]; \
|
||||
} \
|
||||
} \
|
||||
for (int r=0; r<6; r++) \
|
||||
virial[r]=red_acc[r][tid]; \
|
||||
energy=red_acc[6][tid]; \
|
||||
} \
|
||||
} \
|
||||
if (offset==0) { \
|
||||
__global acctyp *ap1=engv+ii; \
|
||||
if (eflag>0) { \
|
||||
*ap1+=energy; \
|
||||
ap1+=astride; \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (int i=0; i<6; i++) { \
|
||||
*ap1+=virial[i]; \
|
||||
ap1+=astride; \
|
||||
} \
|
||||
} \
|
||||
acctyp4 old=ans[ii]; \
|
||||
old.x+=f.x; \
|
||||
old.y+=f.y; \
|
||||
old.z+=f.z; \
|
||||
ans[ii]=old; \
|
||||
old=ans[ii+astride]; \
|
||||
old.x+=tor.x; \
|
||||
old.y+=tor.y; \
|
||||
old.z+=tor.z; \
|
||||
ans[ii+astride]=old; \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define store_answers_rt(f, tor, energy, virial, ii, astride, tid, \
|
||||
t_per_atom, offset, eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
f.x += shfl_xor(f.x, s, t_per_atom); \
|
||||
f.y += shfl_xor(f.y, s, t_per_atom); \
|
||||
f.z += shfl_xor(f.z, s, t_per_atom); \
|
||||
tor.x += shfl_xor(tor.x, s, t_per_atom); \
|
||||
tor.y += shfl_xor(tor.y, s, t_per_atom); \
|
||||
tor.z += shfl_xor(tor.z, s, t_per_atom); \
|
||||
energy += shfl_xor(energy, s, t_per_atom); \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
virial[r] += shfl_xor(virial[r], s, t_per_atom); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (offset==0) { \
|
||||
__global acctyp *ap1=engv+ii; \
|
||||
if (eflag>0) { \
|
||||
*ap1+=energy; \
|
||||
ap1+=astride; \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (int i=0; i<6; i++) { \
|
||||
*ap1+=virial[i]; \
|
||||
ap1+=astride; \
|
||||
} \
|
||||
} \
|
||||
acctyp4 old=ans[ii]; \
|
||||
old.x+=f.x; \
|
||||
old.y+=f.y; \
|
||||
old.z+=f.z; \
|
||||
ans[ii]=old; \
|
||||
old=ans[ii+astride]; \
|
||||
old.x+=tor.x; \
|
||||
old.y+=tor.y; \
|
||||
old.z+=tor.z; \
|
||||
ans[ii+astride]=old; \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
__kernel void k_resquared_ellipsoid_sphere(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict q,
|
||||
const __global numtyp4 *restrict shape,
|
||||
const __global numtyp4 *restrict well,
|
||||
const __global numtyp *restrict splj,
|
||||
const __global numtyp2 *restrict sig_eps,
|
||||
const int ntypes,
|
||||
const __global int *dev_nbor,
|
||||
const int stride,
|
||||
__global acctyp4 *restrict ans,
|
||||
const int astride,
|
||||
__global acctyp *restrict engv,
|
||||
__global int *restrict err_flag,
|
||||
const int eflag, const int vflag,
|
||||
const int inum,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -55,7 +172,7 @@ __kernel void k_resquared_ellipsoid_sphere(__global numtyp4* x_,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *nbor_end;
|
||||
const __global int *nbor, *nbor_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info_e(dev_nbor,stride,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
@ -260,86 +377,26 @@ __kernel void k_resquared_ellipsoid_sphere(__global numtyp4* x_,
|
|||
}
|
||||
|
||||
} // for nbor
|
||||
|
||||
// Reduce answers
|
||||
if (t_per_atom>1) {
|
||||
__local acctyp red_acc[7][BLOCK_PAIR];
|
||||
|
||||
red_acc[0][tid]=f.x;
|
||||
red_acc[1][tid]=f.y;
|
||||
red_acc[2][tid]=f.z;
|
||||
red_acc[3][tid]=tor.x;
|
||||
red_acc[4][tid]=tor.y;
|
||||
red_acc[5][tid]=tor.z;
|
||||
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) {
|
||||
if (offset < s) {
|
||||
for (int r=0; r<6; r++)
|
||||
red_acc[r][tid] += red_acc[r][tid+s];
|
||||
}
|
||||
}
|
||||
|
||||
f.x=red_acc[0][tid];
|
||||
f.y=red_acc[1][tid];
|
||||
f.z=red_acc[2][tid];
|
||||
tor.x=red_acc[3][tid];
|
||||
tor.y=red_acc[4][tid];
|
||||
tor.z=red_acc[5][tid];
|
||||
|
||||
if (eflag>0 || vflag>0) {
|
||||
for (int r=0; r<6; r++)
|
||||
red_acc[r][tid]=virial[r];
|
||||
red_acc[6][tid]=energy;
|
||||
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) {
|
||||
if (offset < s) {
|
||||
for (int r=0; r<7; r++)
|
||||
red_acc[r][tid] += red_acc[r][tid+s];
|
||||
}
|
||||
}
|
||||
|
||||
for (int r=0; r<6; r++)
|
||||
virial[r]=red_acc[r][tid];
|
||||
energy=red_acc[6][tid];
|
||||
}
|
||||
}
|
||||
|
||||
// Store answers
|
||||
if (offset==0) {
|
||||
__global acctyp *ap1=engv+ii;
|
||||
if (eflag>0) {
|
||||
*ap1+=energy;
|
||||
ap1+=astride;
|
||||
}
|
||||
if (vflag>0) {
|
||||
for (int i=0; i<6; i++) {
|
||||
*ap1+=virial[i];
|
||||
ap1+=astride;
|
||||
}
|
||||
}
|
||||
acctyp4 old=ans[ii];
|
||||
old.x+=f.x;
|
||||
old.y+=f.y;
|
||||
old.z+=f.z;
|
||||
ans[ii]=old;
|
||||
|
||||
old=ans[ii+astride];
|
||||
old.x+=tor.x;
|
||||
old.y+=tor.y;
|
||||
old.z+=tor.z;
|
||||
ans[ii+astride]=old;
|
||||
}
|
||||
store_answers_rt(f,tor,energy,virial,ii,astride,tid,t_per_atom,offset,eflag,
|
||||
vflag,ans,engv);
|
||||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_resquared_sphere_ellipsoid(__global numtyp4 *x_,
|
||||
__global numtyp4 *q, __global numtyp4* shape,
|
||||
__global numtyp4* well, __global numtyp *splj,
|
||||
__global numtyp2* sig_eps, const int ntypes,
|
||||
__global int *dev_nbor, const int stride,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
__global int *err_flag, const int eflag, const int vflag,
|
||||
const int start, const int inum, const int t_per_atom) {
|
||||
__kernel void k_resquared_sphere_ellipsoid(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict q,
|
||||
const __global numtyp4 *restrict shape,
|
||||
const __global numtyp4 *restrict well,
|
||||
const __global numtyp *restrict splj,
|
||||
const __global numtyp2 *restrict sig_eps,
|
||||
const int ntypes,
|
||||
const __global int *dev_nbor,
|
||||
const int stride,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
__global int *restrict err_flag,
|
||||
const int eflag, const int vflag,
|
||||
const int start, const int inum,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
ii+=start;
|
||||
|
@ -366,7 +423,7 @@ __kernel void k_resquared_sphere_ellipsoid(__global numtyp4 *x_,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *nbor_end;
|
||||
const __global int *nbor, *nbor_end;
|
||||
int j, numj, n_stride;
|
||||
nbor_info_e(dev_nbor,stride,t_per_atom,ii,offset,j,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
@ -525,11 +582,16 @@ __kernel void k_resquared_sphere_ellipsoid(__global numtyp4 *x_,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_resquared_lj(__global numtyp4 *x_, __global numtyp4 *lj1,
|
||||
__global numtyp4* lj3, const int lj_types,
|
||||
__global numtyp *gum, const int stride,
|
||||
__global int *dev_ij, __global acctyp4 *ans,
|
||||
__global acctyp *engv, __global int *err_flag,
|
||||
__kernel void k_resquared_lj(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict gum,
|
||||
const int stride,
|
||||
const __global int *dev_ij,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
__global int *restrict err_flag,
|
||||
const int eflag, const int vflag, const int start,
|
||||
const int inum, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
|
@ -552,7 +614,7 @@ __kernel void k_resquared_lj(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info_e(dev_ij,stride,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -606,12 +668,18 @@ __kernel void k_resquared_lj(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_resquared_lj_fast(__global numtyp4 *x_,
|
||||
__global numtyp4 *lj1_in, __global numtyp4* lj3_in,
|
||||
__global numtyp *gum, const int stride, __global int *dev_ij,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
__global int *err_flag, const int eflag, const int vflag,
|
||||
const int start, const int inum, const int t_per_atom) {
|
||||
__kernel void k_resquared_lj_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict gum,
|
||||
const int stride,
|
||||
const __global int *dev_ij,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
__global int *restrict err_flag,
|
||||
const int eflag, const int vflag,
|
||||
const int start, const int inum,
|
||||
const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
ii+=start;
|
||||
|
@ -639,7 +707,7 @@ __kernel void k_resquared_lj_fast(__global numtyp4 *x_,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info_e(dev_ij,stride,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -39,18 +39,21 @@ typedef union {
|
|||
|
||||
/// ---------------- LOOKUP -------------------------------------------------
|
||||
|
||||
__kernel void k_table(__global numtyp4 *x_, __global int *tabindex,
|
||||
__global numtyp4* coeff2,
|
||||
__global numtyp4 *coeff3,
|
||||
__global numtyp4 *coeff4,
|
||||
const int lj_types,
|
||||
__global numtyp *cutsq,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
int tablength) {
|
||||
__kernel void k_table(const __global numtyp4 *restrict x_,
|
||||
const __global int *restrict tabindex,
|
||||
const __global numtyp4 *restrict coeff2,
|
||||
const __global numtyp4 *restrict coeff3,
|
||||
const __global numtyp4 *restrict coeff4,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict cutsq,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
int tablength) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -70,7 +73,7 @@ __kernel void k_table(__global numtyp4 *x_, __global int *tabindex,
|
|||
int tlm1 = tablength - 1;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -130,17 +133,20 @@ __kernel void k_table(__global numtyp4 *x_, __global int *tabindex,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_table_fast(__global numtyp4 *x_, __global int *tabindex,
|
||||
__global numtyp4* coeff2,
|
||||
__global numtyp4 *coeff3,
|
||||
__global numtyp4 *coeff4,
|
||||
__global numtyp *cutsq_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
int tablength) {
|
||||
__kernel void k_table_fast(const __global numtyp4 *restrict x_,
|
||||
const __global int *restrict tabindex,
|
||||
const __global numtyp4 *restrict coeff2,
|
||||
const __global numtyp4 *restrict coeff3,
|
||||
const __global numtyp4 *restrict coeff4,
|
||||
const __global numtyp *restrict cutsq_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
int tablength) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -164,7 +170,7 @@ __kernel void k_table_fast(__global numtyp4 *x_, __global int *tabindex,
|
|||
int tlm1 = tablength - 1;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -227,18 +233,21 @@ __kernel void k_table_fast(__global numtyp4 *x_, __global int *tabindex,
|
|||
|
||||
/// ---------------- LINEAR -------------------------------------------------
|
||||
|
||||
__kernel void k_table_linear(__global numtyp4 *x_, __global int *tabindex,
|
||||
__global numtyp4* coeff2,
|
||||
__global numtyp4 *coeff3,
|
||||
__global numtyp4 *coeff4,
|
||||
const int lj_types,
|
||||
__global numtyp *cutsq,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
int tablength) {
|
||||
__kernel void k_table_linear(const __global numtyp4 *restrict x_,
|
||||
const __global int *restrict tabindex,
|
||||
const __global numtyp4 *restrict coeff2,
|
||||
const __global numtyp4 *restrict coeff3,
|
||||
const __global numtyp4 *restrict coeff4,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict cutsq,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
int tablength) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -258,7 +267,7 @@ __kernel void k_table_linear(__global numtyp4 *x_, __global int *tabindex,
|
|||
int tlm1 = tablength - 1;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -322,17 +331,20 @@ __kernel void k_table_linear(__global numtyp4 *x_, __global int *tabindex,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_table_linear_fast(__global numtyp4 *x_, __global int *tabindex,
|
||||
__global numtyp4* coeff2,
|
||||
__global numtyp4 *coeff3,
|
||||
__global numtyp4 *coeff4,
|
||||
__global numtyp *cutsq_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
int tablength) {
|
||||
__kernel void k_table_linear_fast(const __global numtyp4 *restrict x_,
|
||||
const __global int *restrict tabindex,
|
||||
const __global numtyp4 *restrict coeff2,
|
||||
const __global numtyp4 *restrict coeff3,
|
||||
const __global numtyp4 *restrict coeff4,
|
||||
const __global numtyp *restrict cutsq_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag,
|
||||
const int inum, const int nbor_pitch,
|
||||
const int t_per_atom, int tablength) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -356,7 +368,7 @@ __kernel void k_table_linear_fast(__global numtyp4 *x_, __global int *tabindex,
|
|||
int tlm1 = tablength - 1;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -423,18 +435,21 @@ __kernel void k_table_linear_fast(__global numtyp4 *x_, __global int *tabindex,
|
|||
|
||||
/// ---------------- SPLINE -------------------------------------------------
|
||||
|
||||
__kernel void k_table_spline(__global numtyp4 *x_, __global int *tabindex,
|
||||
__global numtyp4* coeff2,
|
||||
__global numtyp4 *coeff3,
|
||||
__global numtyp4 *coeff4,
|
||||
const int lj_types,
|
||||
__global numtyp *cutsq,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
int tablength) {
|
||||
__kernel void k_table_spline(const __global numtyp4 *restrict x_,
|
||||
const __global int *restrict tabindex,
|
||||
const __global numtyp4 *restrict coeff2,
|
||||
const __global numtyp4 *restrict coeff3,
|
||||
const __global numtyp4 *restrict coeff4,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict cutsq,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
int tablength) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -454,7 +469,7 @@ __kernel void k_table_spline(__global numtyp4 *x_, __global int *tabindex,
|
|||
int tlm1 = tablength - 1;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -525,17 +540,20 @@ __kernel void k_table_spline(__global numtyp4 *x_, __global int *tabindex,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_table_spline_fast(__global numtyp4 *x_, __global int *tabindex,
|
||||
__global numtyp4* coeff2,
|
||||
__global numtyp4 *coeff3,
|
||||
__global numtyp4 *coeff4,
|
||||
__global numtyp *cutsq_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
int tablength) {
|
||||
__kernel void k_table_spline_fast(const __global numtyp4 *x_,
|
||||
const __global int *tabindex,
|
||||
const __global numtyp4* coeff2,
|
||||
const __global numtyp4 *coeff3,
|
||||
const __global numtyp4 *coeff4,
|
||||
const __global numtyp *cutsq_in,
|
||||
const __global numtyp* sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *ans,
|
||||
__global acctyp *engv,
|
||||
const int eflag, const int vflag,
|
||||
const int inum, const int nbor_pitch,
|
||||
const int t_per_atom, int tablength) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -559,7 +577,7 @@ __kernel void k_table_spline_fast(__global numtyp4 *x_, __global int *tabindex,
|
|||
int tlm1 = tablength - 1;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -633,19 +651,23 @@ __kernel void k_table_spline_fast(__global numtyp4 *x_, __global int *tabindex,
|
|||
|
||||
/// ---------------- BITMAP -------------------------------------------------
|
||||
|
||||
__kernel void k_table_bitmap(__global numtyp4 *x_, __global int *tabindex,
|
||||
__global int *nshiftbits, __global int *nmask,
|
||||
__global numtyp4* coeff2,
|
||||
__global numtyp4 *coeff3,
|
||||
__global numtyp4 *coeff4,
|
||||
const int lj_types,
|
||||
__global numtyp *cutsq,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
int tablength) {
|
||||
__kernel void k_table_bitmap(const __global numtyp4 *x_,
|
||||
const __global int *tabindex,
|
||||
const __global int *nshiftbits,
|
||||
const __global int *nmask,
|
||||
const __global numtyp4* coeff2,
|
||||
const __global numtyp4 *coeff3,
|
||||
const __global numtyp4 *coeff4,
|
||||
const int lj_types,
|
||||
const __global numtyp *cutsq,
|
||||
const __global numtyp* sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *ans,
|
||||
__global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
int tablength) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -665,7 +687,7 @@ __kernel void k_table_bitmap(__global numtyp4 *x_, __global int *tabindex,
|
|||
int tlm1 = tablength - 1;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -732,18 +754,22 @@ __kernel void k_table_bitmap(__global numtyp4 *x_, __global int *tabindex,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_table_bitmap_fast(__global numtyp4 *x_, __global int *tabindex,
|
||||
__global int *nshiftbits, __global int *nmask,
|
||||
__global numtyp4* coeff2,
|
||||
__global numtyp4 *coeff3,
|
||||
__global numtyp4 *coeff4,
|
||||
__global numtyp *cutsq_in,
|
||||
__global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
int tablength) {
|
||||
__kernel void k_table_bitmap_fast(const __global numtyp4 *x_,
|
||||
const __global int *tabindex,
|
||||
const __global int *nshiftbits,
|
||||
const __global int *nmask,
|
||||
const __global numtyp4* coeff2,
|
||||
const __global numtyp4 *coeff3,
|
||||
const __global numtyp4 *coeff4,
|
||||
const __global numtyp *cutsq_in,
|
||||
const __global numtyp* sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *ans,
|
||||
__global acctyp *engv,
|
||||
const int eflag, const int vflag,
|
||||
const int inum, const int nbor_pitch,
|
||||
const int t_per_atom, int tablength) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -767,7 +793,7 @@ __kernel void k_table_bitmap_fast(__global numtyp4 *x_, __global int *tabindex,
|
|||
int tlm1 = tablength - 1;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -24,13 +24,16 @@ texture<int4,1> pos_tex;
|
|||
#define pos_tex x_
|
||||
#endif
|
||||
|
||||
__kernel void k_yukawa(__global numtyp4 *x_, __global numtyp4 *coeff,
|
||||
const numtyp kappa, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
__kernel void k_yukawa(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff,
|
||||
const numtyp kappa, const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -48,7 +51,7 @@ __kernel void k_yukawa(__global numtyp4 *x_, __global numtyp4 *coeff,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -105,12 +108,16 @@ __kernel void k_yukawa(__global numtyp4 *x_, __global numtyp4 *coeff,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_yukawa_fast(__global numtyp4 *x_, __global numtyp4 *coeff_in,
|
||||
const numtyp kappa, __global numtyp* sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
__kernel void k_yukawa_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff_in,
|
||||
const numtyp kappa,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -132,7 +139,7 @@ __kernel void k_yukawa_fast(__global numtyp4 *x_, __global numtyp4 *coeff_in,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
|
@ -30,7 +30,7 @@ extern Device<PRECISION,ACC_PRECISION> device;
|
|||
|
||||
template <class numtyp, class acctyp>
|
||||
YukawaColloidT::YukawaColloid() : BaseAtomic<numtyp,acctyp>(),
|
||||
_allocated(false), _max_rad_size(0) {
|
||||
_max_rad_size(0), _allocated(false) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
|
|
|
@ -29,12 +29,16 @@ texture<int2> rad_tex;
|
|||
#define rad_tex rad_
|
||||
#endif
|
||||
|
||||
__kernel void k_yukawa_colloid(__global numtyp4 *x_, __global numtyp *rad_,
|
||||
__global numtyp4 *coeff, const int lj_types,
|
||||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum,
|
||||
__kernel void k_yukawa_colloid(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp *restrict rad_,
|
||||
const __global numtyp4 *restrict coeff,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
const numtyp kappa) {
|
||||
int tid, ii, offset;
|
||||
|
@ -54,7 +58,7 @@ __kernel void k_yukawa_colloid(__global numtyp4 *x_, __global numtyp *rad_,
|
|||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
@ -113,13 +117,17 @@ __kernel void k_yukawa_colloid(__global numtyp4 *x_, __global numtyp *rad_,
|
|||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_yukawa_colloid_fast(__global numtyp4 *x_, __global numtyp *rad_,
|
||||
__global numtyp4 *coeff_in, __global numtyp *sp_lj_in,
|
||||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom,
|
||||
const numtyp kappa) {
|
||||
__kernel void k_yukawa_colloid_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp *restrict rad_,
|
||||
const __global numtyp4 *restrict coeff_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag,
|
||||
const int inum, const int nbor_pitch,
|
||||
const int t_per_atom, const numtyp kappa) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
|
@ -141,7 +149,7 @@ __kernel void k_yukawa_colloid_fast(__global numtyp4 *x_, __global numtyp *rad_,
|
|||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
__global int *nbor, *list_end;
|
||||
const __global int *nbor, *list_end;
|
||||
int i, numj, n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,list_end,nbor);
|
||||
|
|
Loading…
Reference in New Issue