forked from lijiext/lammps
Renamed dev_acc to dev_ilist for better description and updated the 3-body styles accordingly; also fixed bugs with accessing dev_packed from the three_end kernel of tersoff/mod and tersoff/zbl gpu styles for OpenCL builds
This commit is contained in:
parent
94da4be922
commit
d71e037c23
|
@ -127,10 +127,11 @@ void Neighbor::alloc(bool &success) {
|
|||
dev_packed.clear();
|
||||
success=success && (dev_packed.alloc((_max_nbors+2)*_max_atoms,*dev,
|
||||
_packed_permissions)==UCL_SUCCESS);
|
||||
dev_acc.clear();
|
||||
success=success && (dev_acc.alloc(_max_atoms,*dev,
|
||||
dev_ilist.clear();
|
||||
success=success && (dev_ilist.alloc(_max_atoms,*dev,
|
||||
UCL_READ_WRITE)==UCL_SUCCESS);
|
||||
_c_bytes+=dev_packed.row_bytes()+dev_acc.row_bytes();
|
||||
dev_ilist.clear();
|
||||
_c_bytes+=dev_packed.row_bytes()+dev_ilist.row_bytes();
|
||||
}
|
||||
if (_max_host>0) {
|
||||
nbor_host.clear();
|
||||
|
@ -197,7 +198,7 @@ void Neighbor::clear() {
|
|||
|
||||
host_packed.clear();
|
||||
host_acc.clear();
|
||||
dev_acc.clear();
|
||||
dev_ilist.clear();
|
||||
dev_nbor.clear();
|
||||
nbor_host.clear();
|
||||
dev_packed.clear();
|
||||
|
@ -289,7 +290,7 @@ void Neighbor::get_host(const int inum, int *ilist, int *numj,
|
|||
int i=ilist[ii];
|
||||
host_view[i] = ii;
|
||||
}
|
||||
ucl_copy(dev_acc,host_view,true);
|
||||
ucl_copy(dev_ilist,host_view,true);
|
||||
|
||||
time_nbor.stop();
|
||||
|
||||
|
|
|
@ -200,7 +200,7 @@ class Neighbor {
|
|||
/// Host storage for nbor counts (row 1) & accumulated neighbor counts (row2)
|
||||
UCL_H_Vec<int> host_acc;
|
||||
/// Device storage for accessing atom indices from the neighbor list (3-body)
|
||||
UCL_D_Vec<int> dev_acc;
|
||||
UCL_D_Vec<int> dev_ilist;
|
||||
|
||||
// ----------------- Data for GPU Neighbor Calculation ---------------
|
||||
|
||||
|
|
|
@ -243,7 +243,7 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
|||
this->k_three_end_vatom.run(&this->atom->x, &sw1, &sw2, &sw3,
|
||||
&map, &elem2param, &_nelements,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
|
||||
|
@ -252,7 +252,7 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
|||
this->k_three_end.run(&this->atom->x, &sw1, &sw2, &sw3,
|
||||
&map, &elem2param, &_nelements,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
|
||||
|
|
|
@ -544,7 +544,7 @@ __kernel void k_sw_three_end(const __global numtyp4 *restrict x_,
|
|||
const int nelements,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
|
@ -614,13 +614,13 @@ __kernel void k_sw_three_end(const __global numtyp4 *restrict x_,
|
|||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
|
@ -698,7 +698,7 @@ __kernel void k_sw_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||
const int nelements,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
|
@ -768,13 +768,13 @@ __kernel void k_sw_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
|
|
|
@ -311,7 +311,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
|||
this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
|
||||
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
|
||||
|
@ -320,7 +320,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
|||
this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
|
||||
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
}
|
||||
|
|
|
@ -696,7 +696,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
|
|||
const __global acctyp4 *restrict zetaij,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
|
@ -777,13 +777,13 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
|
|||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
|
@ -941,7 +941,7 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||
const __global acctyp4 *restrict zetaij,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
|
@ -1022,13 +1022,13 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
|
|
|
@ -311,7 +311,7 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
|||
this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &ts5, &cutsq,
|
||||
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
|
||||
|
@ -320,7 +320,7 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
|||
this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &ts5, &cutsq,
|
||||
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
}
|
||||
|
|
|
@ -272,7 +272,7 @@ __kernel void k_tersoff_mod_zeta(const __global numtyp4 *restrict x_,
|
|||
|
||||
if (ii<inum) {
|
||||
int nbor_j, nbor_end, i, numj;
|
||||
const int* nbor_mem=dev_packed;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
|
@ -432,7 +432,7 @@ __kernel void k_tersoff_mod_repulsive(const __global numtyp4 *restrict x_,
|
|||
|
||||
if (ii<inum) {
|
||||
int nbor, nbor_end, i, numj;
|
||||
const int* nbor_mem=dev_packed;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
|
@ -547,7 +547,7 @@ __kernel void k_tersoff_mod_three_center(const __global numtyp4 *restrict x_,
|
|||
|
||||
if (ii<inum) {
|
||||
int i, numj, nbor_j, nbor_end;
|
||||
const int* nbor_mem=dev_packed;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
|
@ -702,7 +702,7 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
|
|||
const __global acctyp4 *restrict zetaij,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
|
@ -740,7 +740,7 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
|
|||
|
||||
if (ii<inum) {
|
||||
int i, numj, nbor_j, nbor_end, k_end;
|
||||
const int* nbor_mem=dev_packed;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
|
@ -785,13 +785,13 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
|
|||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
|
@ -956,7 +956,7 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||
const __global acctyp4 *restrict zetaij,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
|
@ -994,7 +994,7 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||
|
||||
if (ii<inum) {
|
||||
int i, numj, nbor_j, nbor_end, k_end;
|
||||
const int* nbor_mem = dev_packed;
|
||||
const __global int* nbor_mem = dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
|
@ -1039,13 +1039,13 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
|
|
|
@ -337,7 +337,7 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
|||
this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
|
||||
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
|
||||
|
@ -346,7 +346,7 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
|||
this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
|
||||
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
}
|
||||
|
|
|
@ -278,7 +278,7 @@ __kernel void k_tersoff_zbl_zeta(const __global numtyp4 *restrict x_,
|
|||
|
||||
if (ii<inum) {
|
||||
int nbor_j, nbor_end, i, numj;
|
||||
const int* nbor_mem=dev_packed;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
|
@ -445,7 +445,7 @@ __kernel void k_tersoff_zbl_repulsive(const __global numtyp4 *restrict x_,
|
|||
|
||||
if (ii<inum) {
|
||||
int nbor, nbor_end, i, numj;
|
||||
const int* nbor_mem=dev_packed;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
|
@ -563,7 +563,7 @@ __kernel void k_tersoff_zbl_three_center(const __global numtyp4 *restrict x_,
|
|||
|
||||
if (ii<inum) {
|
||||
int i, numj, nbor_j, nbor_end;
|
||||
const int* nbor_mem=dev_packed;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
|
@ -714,7 +714,7 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
|
|||
const __global acctyp4 *restrict zetaij,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
|
@ -750,7 +750,7 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
|
|||
|
||||
if (ii<inum) {
|
||||
int i, numj, nbor_j, nbor_end, k_end;
|
||||
const int* nbor_mem=dev_packed;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
|
@ -795,13 +795,13 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
|
|||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
|
@ -959,7 +959,7 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||
const __global acctyp4 *restrict zetaij,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
|
@ -995,7 +995,7 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||
|
||||
if (ii<inum) {
|
||||
int i, numj, nbor_j, nbor_end, k_end;
|
||||
const int* nbor_mem = dev_packed;
|
||||
const __global int* nbor_mem = dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
|
@ -1040,13 +1040,13 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
|
|
|
@ -278,7 +278,7 @@ void VashishtaT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
|||
this->k_three_end_vatom.run(&this->atom->x, ¶m1, ¶m2, ¶m3, ¶m4, ¶m5,
|
||||
&map, &elem2param, &_nelements,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
} else {
|
||||
|
@ -286,7 +286,7 @@ void VashishtaT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
|||
this->k_three_end.run(&this->atom->x, ¶m1, ¶m2, ¶m3, ¶m4, ¶m5,
|
||||
&map, &elem2param, &_nelements,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
}
|
||||
|
|
|
@ -554,7 +554,7 @@ __kernel void k_vashishta_three_end(const __global numtyp4 *restrict x_,
|
|||
const int nelements,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
|
@ -623,13 +623,13 @@ __kernel void k_vashishta_three_end(const __global numtyp4 *restrict x_,
|
|||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
|
@ -709,7 +709,7 @@ __kernel void k_vashishta_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||
const int nelements,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
|
@ -778,13 +778,13 @@ __kernel void k_vashishta_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
|
|
Loading…
Reference in New Issue