git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7140 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2011-10-20 14:32:41 +00:00
parent 497290eadc
commit 1049b963d7
7 changed files with 59 additions and 32 deletions

View File

@ -36,6 +36,9 @@ enum COUL_FORCES {COUL_NONE,COUL_CHARMM,COUL_CHARMM_IMPLICIT,COUL_CUT,COUL_LONG,
#define DATA_V_RADIUS 512
#define DATA_OMEGA_RMASS 1024
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
#define MY_PREFIX cuda_pair
#define IncludeCommonNeigh
#include "cuda_shared.h"
@ -858,6 +861,9 @@ void Cuda_Pair_PostKernel_AllStyles(cuda_shared_data* sdata, dim3& grid, int& sh
#include "cuda_pair_kernel.cu"
#include "pair_manybody_const.h"
#include "pair_tersoff_cuda.cu"
#include "pair_sw_cuda.cu"
void Cuda_Pair_UpdateNmax(cuda_shared_data* sdata)
{

View File

@ -20,7 +20,6 @@
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
@ -29,6 +28,10 @@
#define A4 -1.453152027
#define A5 1.061405429
inline __device__ int sbmask(int j) {
return j >> SBBITS & 3;
}
template <const PAIR_FORCES pair_type,const COUL_FORCES coul_type,const unsigned int extended_data>
__global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_atom)
{
@ -88,8 +91,8 @@ __global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_at
fytmp = F_F(0.0);
fztmp = F_F(0.0);
if(coul_type!=COUL_NONE)
qtmp = fetchQ(i);
if(coul_type!=COUL_NONE)
qtmp = fetchQ(i);
jnum = _numneigh[i];
jlist = &_neighbors[i];
@ -103,10 +106,10 @@ __global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_at
{
fpair=F_F(0.0);
j = jlist[jj*_nlocal];
factor_lj = j<_nall ? F_F(1.0) : _special_lj[j/_nall];
if(coul_type!=COUL_NONE)
factor_coul = j<_nall ? F_F(1.0) : _special_coul[j/_nall];
j = j<_nall ? j : j % _nall;
factor_lj = _special_lj[sbmask(j)];
if(coul_type!=COUL_NONE)
factor_coul = _special_coul[sbmask(j)];
j &= NEIGHMASK;
myxtype = fetchXType(j);
delx = xtmp - myxtype.x;
@ -230,7 +233,6 @@ __global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_at
fpair += forcecoul*r2inv;
}
break;
}
}
in_cutoff=in_cutoff || in_coul_cutoff;
@ -388,12 +390,12 @@ template <const PAIR_FORCES pair_type,const COUL_FORCES coul_type,const unsigned
{
fpair=F_F(0.0);
j = jlist[jj];
factor_lj = j<_nall ? F_F(1.0) : _special_lj[j/_nall];
if(coul_type!=COUL_NONE)
factor_coul = j<_nall ? F_F(1.0) : _special_coul[j/_nall];
j = j<_nall ? j : j % _nall;
myxtype = fetchXType(j);
factor_lj = _special_lj[sbmask(j)];
if(coul_type!=COUL_NONE)
factor_coul = _special_coul[sbmask(j)];
j &= NEIGHMASK;
myxtype = fetchXType(j);
delx = xtmp - myxtype.x;
dely = ytmp - myxtype.y;

View File

@ -23,7 +23,7 @@
extern __shared__ ENERGY_FLOAT sharedmem[];
static inline __device__ void PairVirialCompute_A_Kernel(int &eflag,int &vflag,int coulflag=0)
static inline __device__ void PairVirialCompute_A_Kernel(int eflag,int vflag,int coulflag=0)
{
__syncthreads();
ENERGY_FLOAT* shared=sharedmem;

View File

@ -78,22 +78,37 @@
//-----------PPPM-----------------
//--------------------------------
#ifndef PPPM_PRECISION
#define PPPM_PRECISION CUDA_PRECISION
#endif
#ifdef PPPM_PRECISION
#if PPPM_PRECISION == 1
#define PPPM_FLOAT float
#ifdef float3
#define PPPM_FLOAT3 float3
#else
struct PPPM_FLOAT3
{
PPPM_FLOAT x;
PPPM_FLOAT y;
PPPM_FLOAT z;
};
#endif
#define PPPM_F(x) x##f
#endif
#if PPPM_PRECISION == 2
#define PPPM_FLOAT double
struct PPPM_FLOAT3
{
PPPM_FLOAT x;
PPPM_FLOAT y;
PPPM_FLOAT z;
};
#define PPPM_F(x) x
#endif
#endif
#ifndef PPPM_PRECISION
#define PPPM_FLOAT CUDA_FLOAT
#define PPPM_F(x) CUDA_F(x)
#define PPPM_PRECISION CUDA_PRECISION
#endif
//--------------------------------
//-----------FORCE-----------------

View File

@ -141,6 +141,7 @@ struct cuda_shared_pair // relevent data from pair class
int collect_forces_later;
int use_block_per_atom;
int override_block_per_atom;
bool neighall;
};
@ -217,7 +218,7 @@ struct cuda_shared_pppm
int nlower;
int nupper;
PPPM_FLOAT shiftone;
PPPM_FLOAT3* fH;
};
struct cuda_shared_comm

View File

@ -243,10 +243,10 @@ int Cuda_NeighborBuildFullBin(cuda_shared_data* sdata, cuda_shared_neighlist* sn
int exclude=sneighlist->nex_mol|sneighlist->nex_group|sneighlist->nex_type;
if(exclude)
NeighborBuildFullBin_Kernel<1><<<grid,threads,shared_size>>>
(sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom);
(sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom,sdata->pair.neighall);
else
NeighborBuildFullBin_Kernel<0><<<grid,threads,shared_size>>>
(sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom);
(sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom,sdata->pair.neighall);
}
//NeighborBuildFullBin_Kernel_Restrict<<<grid,threads,(2*sizeof(int)+3*sizeof(X_FLOAT))*threads.x+sizeof(int)>>>
// (sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff);

View File

@ -21,6 +21,8 @@
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#define SBBITS 30
__global__ void Binning_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,int bin_dim_z,
CUDA_FLOAT rez_bin_size_x,CUDA_FLOAT rez_bin_size_y,CUDA_FLOAT rez_bin_size_z)
{
@ -109,8 +111,9 @@ __device__ inline int find_special(int3 &n, int* list,int & tag,int3 flag)
}
template <const unsigned int exclude>
__global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,CUDA_FLOAT globcutoff,int block_style)
__global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,CUDA_FLOAT globcutoff,int block_style, bool neighall)
{
int natoms = neighall?_nall:_nlocal;
//const bool domol=false;
int bin_dim_z=gridDim.y;
CUDA_FLOAT* binned_x=(CUDA_FLOAT*) _buffer;
@ -152,7 +155,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
int jnum=0;
int itype;
if(i<_nlocal)
if(i<natoms)
{
jnum = 0;
_ilist[i]=i;
@ -186,7 +189,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
int kk=threadIdx.x;
for(int k = 0; k < MIN(bin_c-otherActOffset,blockDim.x); ++k)
{
if(i<_nlocal)
if(i<natoms)
{
kk++;
kk=kk<MIN(bin_c-otherActOffset,blockDim.x)?kk:0;
@ -209,7 +212,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
if(block_style)
_neighbors[i*_maxneighbors+jnum]= j;
else
_neighbors[i+jnum*_nlocal]= j;
_neighbors[i+jnum*natoms]= j;
}
++jnum;
}
@ -244,7 +247,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
for(int k = 0; k < MIN(blockDim.x,obin_c-otherActOffset); ++k)
{
if(i<_nlocal)
if(i<natoms)
{
int j = other_id[k];
if(exclude && exclusion(i,j,itype,_type[j])) continue;
@ -266,7 +269,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
if(block_style)
_neighbors[i*_maxneighbors+jnum]= j;
else
_neighbors[i+jnum*_nlocal]= j;
_neighbors[i+jnum*natoms]= j;
}
++jnum;
}
@ -279,7 +282,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
if(jnum > _maxneighbors) ((int*)_buffer)[0] = -jnum;
if(i<_nlocal)
if(i<natoms)
_numneigh[i] = jnum;
}
}
@ -341,9 +344,9 @@ __global__ void FindSpecial(int block_style)
if(which>0)
{
if(block_style)
_neighbors[i*_maxneighbors+k]=j+which*_nall;
_neighbors[i*_maxneighbors+k]=j ^ (which << SBBITS);
else
_neighbors[i+k*_nlocal]=j+which*_nall;
_neighbors[i+k*_nlocal]=j ^ (which << SBBITS);
}
else if(which<0)
{