forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7140 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
497290eadc
commit
1049b963d7
|
@ -36,6 +36,9 @@ enum COUL_FORCES {COUL_NONE,COUL_CHARMM,COUL_CHARMM_IMPLICIT,COUL_CUT,COUL_LONG,
|
||||||
#define DATA_V_RADIUS 512
|
#define DATA_V_RADIUS 512
|
||||||
#define DATA_OMEGA_RMASS 1024
|
#define DATA_OMEGA_RMASS 1024
|
||||||
|
|
||||||
|
#define SBBITS 30
|
||||||
|
#define NEIGHMASK 0x3FFFFFFF
|
||||||
|
|
||||||
#define MY_PREFIX cuda_pair
|
#define MY_PREFIX cuda_pair
|
||||||
#define IncludeCommonNeigh
|
#define IncludeCommonNeigh
|
||||||
#include "cuda_shared.h"
|
#include "cuda_shared.h"
|
||||||
|
@ -858,6 +861,9 @@ void Cuda_Pair_PostKernel_AllStyles(cuda_shared_data* sdata, dim3& grid, int& sh
|
||||||
|
|
||||||
#include "cuda_pair_kernel.cu"
|
#include "cuda_pair_kernel.cu"
|
||||||
|
|
||||||
|
#include "pair_manybody_const.h"
|
||||||
|
#include "pair_tersoff_cuda.cu"
|
||||||
|
#include "pair_sw_cuda.cu"
|
||||||
|
|
||||||
void Cuda_Pair_UpdateNmax(cuda_shared_data* sdata)
|
void Cuda_Pair_UpdateNmax(cuda_shared_data* sdata)
|
||||||
{
|
{
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
|
|
||||||
This software is distributed under the GNU General Public License.
|
This software is distributed under the GNU General Public License.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#define EWALD_F 1.12837917
|
#define EWALD_F 1.12837917
|
||||||
#define EWALD_P 0.3275911
|
#define EWALD_P 0.3275911
|
||||||
#define A1 0.254829592
|
#define A1 0.254829592
|
||||||
|
@ -29,6 +28,10 @@
|
||||||
#define A4 -1.453152027
|
#define A4 -1.453152027
|
||||||
#define A5 1.061405429
|
#define A5 1.061405429
|
||||||
|
|
||||||
|
inline __device__ int sbmask(int j) {
|
||||||
|
return j >> SBBITS & 3;
|
||||||
|
}
|
||||||
|
|
||||||
template <const PAIR_FORCES pair_type,const COUL_FORCES coul_type,const unsigned int extended_data>
|
template <const PAIR_FORCES pair_type,const COUL_FORCES coul_type,const unsigned int extended_data>
|
||||||
__global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_atom)
|
__global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_atom)
|
||||||
{
|
{
|
||||||
|
@ -88,8 +91,8 @@ __global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_at
|
||||||
fytmp = F_F(0.0);
|
fytmp = F_F(0.0);
|
||||||
fztmp = F_F(0.0);
|
fztmp = F_F(0.0);
|
||||||
|
|
||||||
if(coul_type!=COUL_NONE)
|
if(coul_type!=COUL_NONE)
|
||||||
qtmp = fetchQ(i);
|
qtmp = fetchQ(i);
|
||||||
|
|
||||||
jnum = _numneigh[i];
|
jnum = _numneigh[i];
|
||||||
jlist = &_neighbors[i];
|
jlist = &_neighbors[i];
|
||||||
|
@ -103,10 +106,10 @@ __global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_at
|
||||||
{
|
{
|
||||||
fpair=F_F(0.0);
|
fpair=F_F(0.0);
|
||||||
j = jlist[jj*_nlocal];
|
j = jlist[jj*_nlocal];
|
||||||
factor_lj = j<_nall ? F_F(1.0) : _special_lj[j/_nall];
|
factor_lj = _special_lj[sbmask(j)];
|
||||||
if(coul_type!=COUL_NONE)
|
if(coul_type!=COUL_NONE)
|
||||||
factor_coul = j<_nall ? F_F(1.0) : _special_coul[j/_nall];
|
factor_coul = _special_coul[sbmask(j)];
|
||||||
j = j<_nall ? j : j % _nall;
|
j &= NEIGHMASK;
|
||||||
|
|
||||||
myxtype = fetchXType(j);
|
myxtype = fetchXType(j);
|
||||||
delx = xtmp - myxtype.x;
|
delx = xtmp - myxtype.x;
|
||||||
|
@ -230,7 +233,6 @@ __global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_at
|
||||||
fpair += forcecoul*r2inv;
|
fpair += forcecoul*r2inv;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
in_cutoff=in_cutoff || in_coul_cutoff;
|
in_cutoff=in_cutoff || in_coul_cutoff;
|
||||||
|
@ -388,12 +390,12 @@ template <const PAIR_FORCES pair_type,const COUL_FORCES coul_type,const unsigned
|
||||||
{
|
{
|
||||||
fpair=F_F(0.0);
|
fpair=F_F(0.0);
|
||||||
j = jlist[jj];
|
j = jlist[jj];
|
||||||
factor_lj = j<_nall ? F_F(1.0) : _special_lj[j/_nall];
|
factor_lj = _special_lj[sbmask(j)];
|
||||||
if(coul_type!=COUL_NONE)
|
if(coul_type!=COUL_NONE)
|
||||||
factor_coul = j<_nall ? F_F(1.0) : _special_coul[j/_nall];
|
factor_coul = _special_coul[sbmask(j)];
|
||||||
j = j<_nall ? j : j % _nall;
|
j &= NEIGHMASK;
|
||||||
|
|
||||||
myxtype = fetchXType(j);
|
myxtype = fetchXType(j);
|
||||||
|
|
||||||
delx = xtmp - myxtype.x;
|
delx = xtmp - myxtype.x;
|
||||||
dely = ytmp - myxtype.y;
|
dely = ytmp - myxtype.y;
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
|
|
||||||
extern __shared__ ENERGY_FLOAT sharedmem[];
|
extern __shared__ ENERGY_FLOAT sharedmem[];
|
||||||
|
|
||||||
static inline __device__ void PairVirialCompute_A_Kernel(int &eflag,int &vflag,int coulflag=0)
|
static inline __device__ void PairVirialCompute_A_Kernel(int eflag,int vflag,int coulflag=0)
|
||||||
{
|
{
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
ENERGY_FLOAT* shared=sharedmem;
|
ENERGY_FLOAT* shared=sharedmem;
|
||||||
|
|
|
@ -78,22 +78,37 @@
|
||||||
//-----------PPPM-----------------
|
//-----------PPPM-----------------
|
||||||
//--------------------------------
|
//--------------------------------
|
||||||
|
|
||||||
|
#ifndef PPPM_PRECISION
|
||||||
|
#define PPPM_PRECISION CUDA_PRECISION
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef PPPM_PRECISION
|
#ifdef PPPM_PRECISION
|
||||||
#if PPPM_PRECISION == 1
|
#if PPPM_PRECISION == 1
|
||||||
#define PPPM_FLOAT float
|
#define PPPM_FLOAT float
|
||||||
|
#ifdef float3
|
||||||
|
#define PPPM_FLOAT3 float3
|
||||||
|
#else
|
||||||
|
struct PPPM_FLOAT3
|
||||||
|
{
|
||||||
|
PPPM_FLOAT x;
|
||||||
|
PPPM_FLOAT y;
|
||||||
|
PPPM_FLOAT z;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
#define PPPM_F(x) x##f
|
#define PPPM_F(x) x##f
|
||||||
#endif
|
#endif
|
||||||
#if PPPM_PRECISION == 2
|
#if PPPM_PRECISION == 2
|
||||||
#define PPPM_FLOAT double
|
#define PPPM_FLOAT double
|
||||||
|
struct PPPM_FLOAT3
|
||||||
|
{
|
||||||
|
PPPM_FLOAT x;
|
||||||
|
PPPM_FLOAT y;
|
||||||
|
PPPM_FLOAT z;
|
||||||
|
};
|
||||||
#define PPPM_F(x) x
|
#define PPPM_F(x) x
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef PPPM_PRECISION
|
|
||||||
#define PPPM_FLOAT CUDA_FLOAT
|
|
||||||
#define PPPM_F(x) CUDA_F(x)
|
|
||||||
#define PPPM_PRECISION CUDA_PRECISION
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//--------------------------------
|
//--------------------------------
|
||||||
//-----------FORCE-----------------
|
//-----------FORCE-----------------
|
||||||
|
|
|
@ -141,6 +141,7 @@ struct cuda_shared_pair // relevent data from pair class
|
||||||
int collect_forces_later;
|
int collect_forces_later;
|
||||||
int use_block_per_atom;
|
int use_block_per_atom;
|
||||||
int override_block_per_atom;
|
int override_block_per_atom;
|
||||||
|
bool neighall;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -217,7 +218,7 @@ struct cuda_shared_pppm
|
||||||
int nlower;
|
int nlower;
|
||||||
int nupper;
|
int nupper;
|
||||||
PPPM_FLOAT shiftone;
|
PPPM_FLOAT shiftone;
|
||||||
|
PPPM_FLOAT3* fH;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct cuda_shared_comm
|
struct cuda_shared_comm
|
||||||
|
|
|
@ -243,10 +243,10 @@ int Cuda_NeighborBuildFullBin(cuda_shared_data* sdata, cuda_shared_neighlist* sn
|
||||||
int exclude=sneighlist->nex_mol|sneighlist->nex_group|sneighlist->nex_type;
|
int exclude=sneighlist->nex_mol|sneighlist->nex_group|sneighlist->nex_type;
|
||||||
if(exclude)
|
if(exclude)
|
||||||
NeighborBuildFullBin_Kernel<1><<<grid,threads,shared_size>>>
|
NeighborBuildFullBin_Kernel<1><<<grid,threads,shared_size>>>
|
||||||
(sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom);
|
(sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom,sdata->pair.neighall);
|
||||||
else
|
else
|
||||||
NeighborBuildFullBin_Kernel<0><<<grid,threads,shared_size>>>
|
NeighborBuildFullBin_Kernel<0><<<grid,threads,shared_size>>>
|
||||||
(sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom);
|
(sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom,sdata->pair.neighall);
|
||||||
}
|
}
|
||||||
//NeighborBuildFullBin_Kernel_Restrict<<<grid,threads,(2*sizeof(int)+3*sizeof(X_FLOAT))*threads.x+sizeof(int)>>>
|
//NeighborBuildFullBin_Kernel_Restrict<<<grid,threads,(2*sizeof(int)+3*sizeof(X_FLOAT))*threads.x+sizeof(int)>>>
|
||||||
// (sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff);
|
// (sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff);
|
||||||
|
|
|
@ -21,6 +21,8 @@
|
||||||
This software is distributed under the GNU General Public License.
|
This software is distributed under the GNU General Public License.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#define SBBITS 30
|
||||||
|
|
||||||
__global__ void Binning_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,int bin_dim_z,
|
__global__ void Binning_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,int bin_dim_z,
|
||||||
CUDA_FLOAT rez_bin_size_x,CUDA_FLOAT rez_bin_size_y,CUDA_FLOAT rez_bin_size_z)
|
CUDA_FLOAT rez_bin_size_x,CUDA_FLOAT rez_bin_size_y,CUDA_FLOAT rez_bin_size_z)
|
||||||
{
|
{
|
||||||
|
@ -109,8 +111,9 @@ __device__ inline int find_special(int3 &n, int* list,int & tag,int3 flag)
|
||||||
}
|
}
|
||||||
|
|
||||||
template <const unsigned int exclude>
|
template <const unsigned int exclude>
|
||||||
__global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,CUDA_FLOAT globcutoff,int block_style)
|
__global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,CUDA_FLOAT globcutoff,int block_style, bool neighall)
|
||||||
{
|
{
|
||||||
|
int natoms = neighall?_nall:_nlocal;
|
||||||
//const bool domol=false;
|
//const bool domol=false;
|
||||||
int bin_dim_z=gridDim.y;
|
int bin_dim_z=gridDim.y;
|
||||||
CUDA_FLOAT* binned_x=(CUDA_FLOAT*) _buffer;
|
CUDA_FLOAT* binned_x=(CUDA_FLOAT*) _buffer;
|
||||||
|
@ -152,7 +155,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
|
||||||
int jnum=0;
|
int jnum=0;
|
||||||
int itype;
|
int itype;
|
||||||
|
|
||||||
if(i<_nlocal)
|
if(i<natoms)
|
||||||
{
|
{
|
||||||
jnum = 0;
|
jnum = 0;
|
||||||
_ilist[i]=i;
|
_ilist[i]=i;
|
||||||
|
@ -186,7 +189,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
|
||||||
int kk=threadIdx.x;
|
int kk=threadIdx.x;
|
||||||
for(int k = 0; k < MIN(bin_c-otherActOffset,blockDim.x); ++k)
|
for(int k = 0; k < MIN(bin_c-otherActOffset,blockDim.x); ++k)
|
||||||
{
|
{
|
||||||
if(i<_nlocal)
|
if(i<natoms)
|
||||||
{
|
{
|
||||||
kk++;
|
kk++;
|
||||||
kk=kk<MIN(bin_c-otherActOffset,blockDim.x)?kk:0;
|
kk=kk<MIN(bin_c-otherActOffset,blockDim.x)?kk:0;
|
||||||
|
@ -209,7 +212,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
|
||||||
if(block_style)
|
if(block_style)
|
||||||
_neighbors[i*_maxneighbors+jnum]= j;
|
_neighbors[i*_maxneighbors+jnum]= j;
|
||||||
else
|
else
|
||||||
_neighbors[i+jnum*_nlocal]= j;
|
_neighbors[i+jnum*natoms]= j;
|
||||||
}
|
}
|
||||||
++jnum;
|
++jnum;
|
||||||
}
|
}
|
||||||
|
@ -244,7 +247,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
|
||||||
|
|
||||||
for(int k = 0; k < MIN(blockDim.x,obin_c-otherActOffset); ++k)
|
for(int k = 0; k < MIN(blockDim.x,obin_c-otherActOffset); ++k)
|
||||||
{
|
{
|
||||||
if(i<_nlocal)
|
if(i<natoms)
|
||||||
{
|
{
|
||||||
int j = other_id[k];
|
int j = other_id[k];
|
||||||
if(exclude && exclusion(i,j,itype,_type[j])) continue;
|
if(exclude && exclusion(i,j,itype,_type[j])) continue;
|
||||||
|
@ -266,7 +269,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
|
||||||
if(block_style)
|
if(block_style)
|
||||||
_neighbors[i*_maxneighbors+jnum]= j;
|
_neighbors[i*_maxneighbors+jnum]= j;
|
||||||
else
|
else
|
||||||
_neighbors[i+jnum*_nlocal]= j;
|
_neighbors[i+jnum*natoms]= j;
|
||||||
}
|
}
|
||||||
++jnum;
|
++jnum;
|
||||||
}
|
}
|
||||||
|
@ -279,7 +282,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
|
||||||
|
|
||||||
if(jnum > _maxneighbors) ((int*)_buffer)[0] = -jnum;
|
if(jnum > _maxneighbors) ((int*)_buffer)[0] = -jnum;
|
||||||
|
|
||||||
if(i<_nlocal)
|
if(i<natoms)
|
||||||
_numneigh[i] = jnum;
|
_numneigh[i] = jnum;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -341,9 +344,9 @@ __global__ void FindSpecial(int block_style)
|
||||||
if(which>0)
|
if(which>0)
|
||||||
{
|
{
|
||||||
if(block_style)
|
if(block_style)
|
||||||
_neighbors[i*_maxneighbors+k]=j+which*_nall;
|
_neighbors[i*_maxneighbors+k]=j ^ (which << SBBITS);
|
||||||
else
|
else
|
||||||
_neighbors[i+k*_nlocal]=j+which*_nall;
|
_neighbors[i+k*_nlocal]=j ^ (which << SBBITS);
|
||||||
}
|
}
|
||||||
else if(which<0)
|
else if(which<0)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue