forked from lijiext/lammps
Fixed a bug with tersoff/gpu with building the short neighbor list
This commit is contained in:
parent
558d2eb84f
commit
6de020805f
|
@ -250,10 +250,9 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
|||
(BX/this->_threads_per_atom)));
|
||||
|
||||
this->k_short_nbor.set_size(GX,BX);
|
||||
this->k_short_nbor.run(&this->atom->x, &cutsq, &map,
|
||||
&elem2param, &_nelements, &_nparams,
|
||||
this->k_short_nbor.run(&this->atom->x, &elem2param, &_nelements, &_nparams,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->dev_short_nbor, &ainum,
|
||||
&this->dev_short_nbor, &_cutshortsq, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom);
|
||||
|
||||
// re-allocate zetaij if necessary
|
||||
|
|
|
@ -165,13 +165,12 @@ _texture( ts5_tex,int4);
|
|||
#endif
|
||||
|
||||
__kernel void k_tersoff_short_nbor(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp *restrict cutsq,
|
||||
const __global int *restrict map,
|
||||
const __global int *restrict elem2param,
|
||||
const int nelements, const int nparams,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
__global int * dev_short_nbor,
|
||||
const double _cutshortsq,
|
||||
const int inum, const int nbor_pitch,
|
||||
const int t_per_atom) {
|
||||
__local int n_stride;
|
||||
|
@ -186,7 +185,6 @@ __kernel void k_tersoff_short_nbor(const __global numtyp4 *restrict x_,
|
|||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
int itype=ix.w;
|
||||
itype=map[itype];
|
||||
|
||||
int ncount = 0;
|
||||
int m = nbor;
|
||||
|
@ -201,8 +199,6 @@ __kernel void k_tersoff_short_nbor(const __global numtyp4 *restrict x_,
|
|||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
int jtype=jx.w;
|
||||
jtype=map[jtype];
|
||||
int ijparam=elem2param[itype*nelements*nelements+jtype*nelements+jtype];
|
||||
|
||||
// Compute r12
|
||||
numtyp delx = ix.x-jx.x;
|
||||
|
@ -210,7 +206,7 @@ __kernel void k_tersoff_short_nbor(const __global numtyp4 *restrict x_,
|
|||
numtyp delz = ix.z-jx.z;
|
||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
if (rsq<cutsq[ijparam]) {
|
||||
if (rsq<_cutshortsq) {
|
||||
dev_short_nbor[nbor_short] = nj;
|
||||
nbor_short += n_stride;
|
||||
ncount++;
|
||||
|
@ -307,6 +303,7 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_,
|
|||
delr1.y = jx.y-ix.y;
|
||||
delr1.z = jx.z-ix.z;
|
||||
numtyp rsq1 = delr1.x*delr1.x+delr1.y*delr1.y+delr1.z*delr1.z;
|
||||
//if (rsq1 >= cutsq[ijparam]) continue;
|
||||
|
||||
// compute zeta_ij
|
||||
z = (acctyp)0;
|
||||
|
@ -460,7 +457,8 @@ __kernel void k_tersoff_repulsive(const __global numtyp4 *restrict x_,
|
|||
numtyp delz = ix.z-jx.z;
|
||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
// rsq<cutsq[ijparam]
|
||||
if (rsq >= cutsq[ijparam]) continue;
|
||||
|
||||
numtyp feng[2];
|
||||
numtyp ijparam_lam1 = ts1[ijparam].x;
|
||||
numtyp4 ts2_ijparam = ts2[ijparam];
|
||||
|
@ -574,6 +572,7 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_,
|
|||
delr1[1] = jx.y-ix.y;
|
||||
delr1[2] = jx.z-ix.z;
|
||||
numtyp rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
|
||||
if (rsq1 >= cutsq[ijparam]) continue;
|
||||
|
||||
numtyp r1 = ucl_sqrt(rsq1);
|
||||
numtyp r1inv = ucl_rsqrt(rsq1);
|
||||
|
@ -757,6 +756,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
|
|||
delr1[1] = jx.y-ix.y;
|
||||
delr1[2] = jx.z-ix.z;
|
||||
numtyp rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
|
||||
//if (rsq1 >= cutsq[ijparam]) continue;
|
||||
|
||||
numtyp mdelr1[3];
|
||||
mdelr1[0] = -delr1[0];
|
||||
|
@ -853,7 +853,6 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
|
|||
delr2[2] = kx.z-jx.z;
|
||||
numtyp rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
|
||||
|
||||
if (rsq2 > cutsq[jikparam]) continue;
|
||||
numtyp r2 = ucl_sqrt(rsq2);
|
||||
numtyp r2inv = ucl_rsqrt(rsq2);
|
||||
numtyp4 ts1_param, ts2_param, ts4_param;
|
||||
|
@ -995,6 +994,7 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||
delr1[1] = jx.y-ix.y;
|
||||
delr1[2] = jx.z-ix.z;
|
||||
numtyp rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
|
||||
//if (rsq1 >= cutsq[ijparam]) continue;
|
||||
|
||||
numtyp mdelr1[3];
|
||||
mdelr1[0] = -delr1[0];
|
||||
|
|
Loading…
Reference in New Issue