diff --git a/lib/cuda/cuda_pair.cu b/lib/cuda/cuda_pair.cu index 531db7e2b3..b7b2523529 100644 --- a/lib/cuda/cuda_pair.cu +++ b/lib/cuda/cuda_pair.cu @@ -36,6 +36,9 @@ enum COUL_FORCES {COUL_NONE,COUL_CHARMM,COUL_CHARMM_IMPLICIT,COUL_CUT,COUL_LONG, #define DATA_V_RADIUS 512 #define DATA_OMEGA_RMASS 1024 +#define SBBITS 30 +#define NEIGHMASK 0x3FFFFFFF + #define MY_PREFIX cuda_pair #define IncludeCommonNeigh #include "cuda_shared.h" @@ -858,6 +861,9 @@ void Cuda_Pair_PostKernel_AllStyles(cuda_shared_data* sdata, dim3& grid, int& sh #include "cuda_pair_kernel.cu" +#include "pair_manybody_const.h" +#include "pair_tersoff_cuda.cu" +#include "pair_sw_cuda.cu" void Cuda_Pair_UpdateNmax(cuda_shared_data* sdata) { diff --git a/lib/cuda/cuda_pair_kernel.cu b/lib/cuda/cuda_pair_kernel.cu index fe7a38a782..35a0ef1f1a 100644 --- a/lib/cuda/cuda_pair_kernel.cu +++ b/lib/cuda/cuda_pair_kernel.cu @@ -20,7 +20,6 @@ This software is distributed under the GNU General Public License. ------------------------------------------------------------------------- */ - #define EWALD_F 1.12837917 #define EWALD_P 0.3275911 #define A1 0.254829592 @@ -29,6 +28,10 @@ #define A4 -1.453152027 #define A5 1.061405429 +inline __device__ int sbmask(int j) { + return j >> SBBITS & 3; +} + template __global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_atom) { @@ -88,8 +91,8 @@ __global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_at fytmp = F_F(0.0); fztmp = F_F(0.0); - if(coul_type!=COUL_NONE) - qtmp = fetchQ(i); + if(coul_type!=COUL_NONE) + qtmp = fetchQ(i); jnum = _numneigh[i]; jlist = &_neighbors[i]; @@ -103,10 +106,10 @@ __global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_at { fpair=F_F(0.0); j = jlist[jj*_nlocal]; - factor_lj = j<_nall ? F_F(1.0) : _special_lj[j/_nall]; - if(coul_type!=COUL_NONE) - factor_coul = j<_nall ? F_F(1.0) : _special_coul[j/_nall]; - j = j<_nall ? j : j % _nall; + factor_lj = _special_lj[sbmask(j)]; + if(coul_type!=COUL_NONE) + factor_coul = _special_coul[sbmask(j)]; + j &= NEIGHMASK; myxtype = fetchXType(j); delx = xtmp - myxtype.x; @@ -230,7 +233,6 @@ __global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_at fpair += forcecoul*r2inv; } break; - } } in_cutoff=in_cutoff || in_coul_cutoff; @@ -388,12 +390,12 @@ template nex_mol|sneighlist->nex_group|sneighlist->nex_type; if(exclude) NeighborBuildFullBin_Kernel<1><<>> - (sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom); + (sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom,sdata->pair.neighall); else NeighborBuildFullBin_Kernel<0><<>> - (sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom); + (sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom,sdata->pair.neighall); } //NeighborBuildFullBin_Kernel_Restrict<<>> // (sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff); diff --git a/lib/cuda/neighbor_kernel.cu b/lib/cuda/neighbor_kernel.cu index ad1a6a8fe7..965aa2b1cf 100644 --- a/lib/cuda/neighbor_kernel.cu +++ b/lib/cuda/neighbor_kernel.cu @@ -21,6 +21,8 @@ This software is distributed under the GNU General Public License. ------------------------------------------------------------------------- */ +#define SBBITS 30 + __global__ void Binning_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,int bin_dim_z, CUDA_FLOAT rez_bin_size_x,CUDA_FLOAT rez_bin_size_y,CUDA_FLOAT rez_bin_size_z) { @@ -109,8 +111,9 @@ __device__ inline int find_special(int3 &n, int* list,int & tag,int3 flag) } template -__global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,CUDA_FLOAT globcutoff,int block_style) +__global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,CUDA_FLOAT globcutoff,int block_style, bool neighall) { + int natoms = neighall?_nall:_nlocal; //const bool domol=false; int bin_dim_z=gridDim.y; CUDA_FLOAT* binned_x=(CUDA_FLOAT*) _buffer; @@ -152,7 +155,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_ int jnum=0; int itype; - if(i<_nlocal) + if(i _maxneighbors) ((int*)_buffer)[0] = -jnum; - if(i<_nlocal) + if(i0) { if(block_style) - _neighbors[i*_maxneighbors+k]=j+which*_nall; + _neighbors[i*_maxneighbors+k]=j ^ (which << SBBITS); else - _neighbors[i+k*_nlocal]=j+which*_nall; + _neighbors[i+k*_nlocal]=j ^ (which << SBBITS); } else if(which<0) {