forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7140 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
497290eadc
commit
1049b963d7
|
@ -36,6 +36,9 @@ enum COUL_FORCES {COUL_NONE,COUL_CHARMM,COUL_CHARMM_IMPLICIT,COUL_CUT,COUL_LONG,
|
|||
#define DATA_V_RADIUS 512
|
||||
#define DATA_OMEGA_RMASS 1024
|
||||
|
||||
#define SBBITS 30
|
||||
#define NEIGHMASK 0x3FFFFFFF
|
||||
|
||||
#define MY_PREFIX cuda_pair
|
||||
#define IncludeCommonNeigh
|
||||
#include "cuda_shared.h"
|
||||
|
@ -858,6 +861,9 @@ void Cuda_Pair_PostKernel_AllStyles(cuda_shared_data* sdata, dim3& grid, int& sh
|
|||
|
||||
#include "cuda_pair_kernel.cu"
|
||||
|
||||
#include "pair_manybody_const.h"
|
||||
#include "pair_tersoff_cuda.cu"
|
||||
#include "pair_sw_cuda.cu"
|
||||
|
||||
void Cuda_Pair_UpdateNmax(cuda_shared_data* sdata)
|
||||
{
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
|
||||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#define EWALD_F 1.12837917
|
||||
#define EWALD_P 0.3275911
|
||||
#define A1 0.254829592
|
||||
|
@ -29,6 +28,10 @@
|
|||
#define A4 -1.453152027
|
||||
#define A5 1.061405429
|
||||
|
||||
inline __device__ int sbmask(int j) {
|
||||
return j >> SBBITS & 3;
|
||||
}
|
||||
|
||||
template <const PAIR_FORCES pair_type,const COUL_FORCES coul_type,const unsigned int extended_data>
|
||||
__global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_atom)
|
||||
{
|
||||
|
@ -88,8 +91,8 @@ __global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_at
|
|||
fytmp = F_F(0.0);
|
||||
fztmp = F_F(0.0);
|
||||
|
||||
if(coul_type!=COUL_NONE)
|
||||
qtmp = fetchQ(i);
|
||||
if(coul_type!=COUL_NONE)
|
||||
qtmp = fetchQ(i);
|
||||
|
||||
jnum = _numneigh[i];
|
||||
jlist = &_neighbors[i];
|
||||
|
@ -103,10 +106,10 @@ __global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_at
|
|||
{
|
||||
fpair=F_F(0.0);
|
||||
j = jlist[jj*_nlocal];
|
||||
factor_lj = j<_nall ? F_F(1.0) : _special_lj[j/_nall];
|
||||
if(coul_type!=COUL_NONE)
|
||||
factor_coul = j<_nall ? F_F(1.0) : _special_coul[j/_nall];
|
||||
j = j<_nall ? j : j % _nall;
|
||||
factor_lj = _special_lj[sbmask(j)];
|
||||
if(coul_type!=COUL_NONE)
|
||||
factor_coul = _special_coul[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
myxtype = fetchXType(j);
|
||||
delx = xtmp - myxtype.x;
|
||||
|
@ -230,7 +233,6 @@ __global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_at
|
|||
fpair += forcecoul*r2inv;
|
||||
}
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
in_cutoff=in_cutoff || in_coul_cutoff;
|
||||
|
@ -388,12 +390,12 @@ template <const PAIR_FORCES pair_type,const COUL_FORCES coul_type,const unsigned
|
|||
{
|
||||
fpair=F_F(0.0);
|
||||
j = jlist[jj];
|
||||
factor_lj = j<_nall ? F_F(1.0) : _special_lj[j/_nall];
|
||||
if(coul_type!=COUL_NONE)
|
||||
factor_coul = j<_nall ? F_F(1.0) : _special_coul[j/_nall];
|
||||
j = j<_nall ? j : j % _nall;
|
||||
|
||||
myxtype = fetchXType(j);
|
||||
factor_lj = _special_lj[sbmask(j)];
|
||||
if(coul_type!=COUL_NONE)
|
||||
factor_coul = _special_coul[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
myxtype = fetchXType(j);
|
||||
|
||||
delx = xtmp - myxtype.x;
|
||||
dely = ytmp - myxtype.y;
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
|
||||
extern __shared__ ENERGY_FLOAT sharedmem[];
|
||||
|
||||
static inline __device__ void PairVirialCompute_A_Kernel(int &eflag,int &vflag,int coulflag=0)
|
||||
static inline __device__ void PairVirialCompute_A_Kernel(int eflag,int vflag,int coulflag=0)
|
||||
{
|
||||
__syncthreads();
|
||||
ENERGY_FLOAT* shared=sharedmem;
|
||||
|
|
|
@ -78,22 +78,37 @@
|
|||
//-----------PPPM-----------------
|
||||
//--------------------------------
|
||||
|
||||
#ifndef PPPM_PRECISION
|
||||
#define PPPM_PRECISION CUDA_PRECISION
|
||||
#endif
|
||||
|
||||
#ifdef PPPM_PRECISION
|
||||
#if PPPM_PRECISION == 1
|
||||
#define PPPM_FLOAT float
|
||||
#ifdef float3
|
||||
#define PPPM_FLOAT3 float3
|
||||
#else
|
||||
struct PPPM_FLOAT3
|
||||
{
|
||||
PPPM_FLOAT x;
|
||||
PPPM_FLOAT y;
|
||||
PPPM_FLOAT z;
|
||||
};
|
||||
#endif
|
||||
#define PPPM_F(x) x##f
|
||||
#endif
|
||||
#if PPPM_PRECISION == 2
|
||||
#define PPPM_FLOAT double
|
||||
struct PPPM_FLOAT3
|
||||
{
|
||||
PPPM_FLOAT x;
|
||||
PPPM_FLOAT y;
|
||||
PPPM_FLOAT z;
|
||||
};
|
||||
#define PPPM_F(x) x
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef PPPM_PRECISION
|
||||
#define PPPM_FLOAT CUDA_FLOAT
|
||||
#define PPPM_F(x) CUDA_F(x)
|
||||
#define PPPM_PRECISION CUDA_PRECISION
|
||||
#endif
|
||||
|
||||
//--------------------------------
|
||||
//-----------FORCE-----------------
|
||||
|
|
|
@ -141,6 +141,7 @@ struct cuda_shared_pair // relevent data from pair class
|
|||
int collect_forces_later;
|
||||
int use_block_per_atom;
|
||||
int override_block_per_atom;
|
||||
bool neighall;
|
||||
|
||||
};
|
||||
|
||||
|
@ -217,7 +218,7 @@ struct cuda_shared_pppm
|
|||
int nlower;
|
||||
int nupper;
|
||||
PPPM_FLOAT shiftone;
|
||||
|
||||
PPPM_FLOAT3* fH;
|
||||
};
|
||||
|
||||
struct cuda_shared_comm
|
||||
|
|
|
@ -243,10 +243,10 @@ int Cuda_NeighborBuildFullBin(cuda_shared_data* sdata, cuda_shared_neighlist* sn
|
|||
int exclude=sneighlist->nex_mol|sneighlist->nex_group|sneighlist->nex_type;
|
||||
if(exclude)
|
||||
NeighborBuildFullBin_Kernel<1><<<grid,threads,shared_size>>>
|
||||
(sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom);
|
||||
(sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom,sdata->pair.neighall);
|
||||
else
|
||||
NeighborBuildFullBin_Kernel<0><<<grid,threads,shared_size>>>
|
||||
(sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom);
|
||||
(sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom,sdata->pair.neighall);
|
||||
}
|
||||
//NeighborBuildFullBin_Kernel_Restrict<<<grid,threads,(2*sizeof(int)+3*sizeof(X_FLOAT))*threads.x+sizeof(int)>>>
|
||||
// (sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff);
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
This software is distributed under the GNU General Public License.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#define SBBITS 30
|
||||
|
||||
__global__ void Binning_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,int bin_dim_z,
|
||||
CUDA_FLOAT rez_bin_size_x,CUDA_FLOAT rez_bin_size_y,CUDA_FLOAT rez_bin_size_z)
|
||||
{
|
||||
|
@ -109,8 +111,9 @@ __device__ inline int find_special(int3 &n, int* list,int & tag,int3 flag)
|
|||
}
|
||||
|
||||
template <const unsigned int exclude>
|
||||
__global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,CUDA_FLOAT globcutoff,int block_style)
|
||||
__global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,CUDA_FLOAT globcutoff,int block_style, bool neighall)
|
||||
{
|
||||
int natoms = neighall?_nall:_nlocal;
|
||||
//const bool domol=false;
|
||||
int bin_dim_z=gridDim.y;
|
||||
CUDA_FLOAT* binned_x=(CUDA_FLOAT*) _buffer;
|
||||
|
@ -152,7 +155,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
|
|||
int jnum=0;
|
||||
int itype;
|
||||
|
||||
if(i<_nlocal)
|
||||
if(i<natoms)
|
||||
{
|
||||
jnum = 0;
|
||||
_ilist[i]=i;
|
||||
|
@ -186,7 +189,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
|
|||
int kk=threadIdx.x;
|
||||
for(int k = 0; k < MIN(bin_c-otherActOffset,blockDim.x); ++k)
|
||||
{
|
||||
if(i<_nlocal)
|
||||
if(i<natoms)
|
||||
{
|
||||
kk++;
|
||||
kk=kk<MIN(bin_c-otherActOffset,blockDim.x)?kk:0;
|
||||
|
@ -209,7 +212,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
|
|||
if(block_style)
|
||||
_neighbors[i*_maxneighbors+jnum]= j;
|
||||
else
|
||||
_neighbors[i+jnum*_nlocal]= j;
|
||||
_neighbors[i+jnum*natoms]= j;
|
||||
}
|
||||
++jnum;
|
||||
}
|
||||
|
@ -244,7 +247,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
|
|||
|
||||
for(int k = 0; k < MIN(blockDim.x,obin_c-otherActOffset); ++k)
|
||||
{
|
||||
if(i<_nlocal)
|
||||
if(i<natoms)
|
||||
{
|
||||
int j = other_id[k];
|
||||
if(exclude && exclusion(i,j,itype,_type[j])) continue;
|
||||
|
@ -266,7 +269,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
|
|||
if(block_style)
|
||||
_neighbors[i*_maxneighbors+jnum]= j;
|
||||
else
|
||||
_neighbors[i+jnum*_nlocal]= j;
|
||||
_neighbors[i+jnum*natoms]= j;
|
||||
}
|
||||
++jnum;
|
||||
}
|
||||
|
@ -279,7 +282,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
|
|||
|
||||
if(jnum > _maxneighbors) ((int*)_buffer)[0] = -jnum;
|
||||
|
||||
if(i<_nlocal)
|
||||
if(i<natoms)
|
||||
_numneigh[i] = jnum;
|
||||
}
|
||||
}
|
||||
|
@ -341,9 +344,9 @@ __global__ void FindSpecial(int block_style)
|
|||
if(which>0)
|
||||
{
|
||||
if(block_style)
|
||||
_neighbors[i*_maxneighbors+k]=j+which*_nall;
|
||||
_neighbors[i*_maxneighbors+k]=j ^ (which << SBBITS);
|
||||
else
|
||||
_neighbors[i+k*_nlocal]=j+which*_nall;
|
||||
_neighbors[i+k*_nlocal]=j ^ (which << SBBITS);
|
||||
}
|
||||
else if(which<0)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue