git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7140 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2011-10-20 14:32:41 +00:00
parent 497290eadc
commit 1049b963d7
7 changed files with 59 additions and 32 deletions

View File

@ -36,6 +36,9 @@ enum COUL_FORCES {COUL_NONE,COUL_CHARMM,COUL_CHARMM_IMPLICIT,COUL_CUT,COUL_LONG,
#define DATA_V_RADIUS 512 #define DATA_V_RADIUS 512
#define DATA_OMEGA_RMASS 1024 #define DATA_OMEGA_RMASS 1024
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
#define MY_PREFIX cuda_pair #define MY_PREFIX cuda_pair
#define IncludeCommonNeigh #define IncludeCommonNeigh
#include "cuda_shared.h" #include "cuda_shared.h"
@ -858,6 +861,9 @@ void Cuda_Pair_PostKernel_AllStyles(cuda_shared_data* sdata, dim3& grid, int& sh
#include "cuda_pair_kernel.cu" #include "cuda_pair_kernel.cu"
#include "pair_manybody_const.h"
#include "pair_tersoff_cuda.cu"
#include "pair_sw_cuda.cu"
void Cuda_Pair_UpdateNmax(cuda_shared_data* sdata) void Cuda_Pair_UpdateNmax(cuda_shared_data* sdata)
{ {

View File

@ -20,7 +20,6 @@
This software is distributed under the GNU General Public License. This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
#define EWALD_F 1.12837917 #define EWALD_F 1.12837917
#define EWALD_P 0.3275911 #define EWALD_P 0.3275911
#define A1 0.254829592 #define A1 0.254829592
@ -29,6 +28,10 @@
#define A4 -1.453152027 #define A4 -1.453152027
#define A5 1.061405429 #define A5 1.061405429
inline __device__ int sbmask(int j) {
return j >> SBBITS & 3;
}
template <const PAIR_FORCES pair_type,const COUL_FORCES coul_type,const unsigned int extended_data> template <const PAIR_FORCES pair_type,const COUL_FORCES coul_type,const unsigned int extended_data>
__global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_atom) __global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_atom)
{ {
@ -103,10 +106,10 @@ __global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_at
{ {
fpair=F_F(0.0); fpair=F_F(0.0);
j = jlist[jj*_nlocal]; j = jlist[jj*_nlocal];
factor_lj = j<_nall ? F_F(1.0) : _special_lj[j/_nall]; factor_lj = _special_lj[sbmask(j)];
if(coul_type!=COUL_NONE) if(coul_type!=COUL_NONE)
factor_coul = j<_nall ? F_F(1.0) : _special_coul[j/_nall]; factor_coul = _special_coul[sbmask(j)];
j = j<_nall ? j : j % _nall; j &= NEIGHMASK;
myxtype = fetchXType(j); myxtype = fetchXType(j);
delx = xtmp - myxtype.x; delx = xtmp - myxtype.x;
@ -230,7 +233,6 @@ __global__ void Pair_Kernel_TpA(int eflag, int vflag,int eflag_atom,int vflag_at
fpair += forcecoul*r2inv; fpair += forcecoul*r2inv;
} }
break; break;
} }
} }
in_cutoff=in_cutoff || in_coul_cutoff; in_cutoff=in_cutoff || in_coul_cutoff;
@ -388,10 +390,10 @@ template <const PAIR_FORCES pair_type,const COUL_FORCES coul_type,const unsigned
{ {
fpair=F_F(0.0); fpair=F_F(0.0);
j = jlist[jj]; j = jlist[jj];
factor_lj = j<_nall ? F_F(1.0) : _special_lj[j/_nall]; factor_lj = _special_lj[sbmask(j)];
if(coul_type!=COUL_NONE) if(coul_type!=COUL_NONE)
factor_coul = j<_nall ? F_F(1.0) : _special_coul[j/_nall]; factor_coul = _special_coul[sbmask(j)];
j = j<_nall ? j : j % _nall; j &= NEIGHMASK;
myxtype = fetchXType(j); myxtype = fetchXType(j);

View File

@ -23,7 +23,7 @@
extern __shared__ ENERGY_FLOAT sharedmem[]; extern __shared__ ENERGY_FLOAT sharedmem[];
static inline __device__ void PairVirialCompute_A_Kernel(int &eflag,int &vflag,int coulflag=0) static inline __device__ void PairVirialCompute_A_Kernel(int eflag,int vflag,int coulflag=0)
{ {
__syncthreads(); __syncthreads();
ENERGY_FLOAT* shared=sharedmem; ENERGY_FLOAT* shared=sharedmem;

View File

@ -78,22 +78,37 @@
//-----------PPPM----------------- //-----------PPPM-----------------
//-------------------------------- //--------------------------------
#ifndef PPPM_PRECISION
#define PPPM_PRECISION CUDA_PRECISION
#endif
#ifdef PPPM_PRECISION #ifdef PPPM_PRECISION
#if PPPM_PRECISION == 1 #if PPPM_PRECISION == 1
#define PPPM_FLOAT float #define PPPM_FLOAT float
#ifdef float3
#define PPPM_FLOAT3 float3
#else
struct PPPM_FLOAT3
{
PPPM_FLOAT x;
PPPM_FLOAT y;
PPPM_FLOAT z;
};
#endif
#define PPPM_F(x) x##f #define PPPM_F(x) x##f
#endif #endif
#if PPPM_PRECISION == 2 #if PPPM_PRECISION == 2
#define PPPM_FLOAT double #define PPPM_FLOAT double
struct PPPM_FLOAT3
{
PPPM_FLOAT x;
PPPM_FLOAT y;
PPPM_FLOAT z;
};
#define PPPM_F(x) x #define PPPM_F(x) x
#endif #endif
#endif #endif
#ifndef PPPM_PRECISION
#define PPPM_FLOAT CUDA_FLOAT
#define PPPM_F(x) CUDA_F(x)
#define PPPM_PRECISION CUDA_PRECISION
#endif
//-------------------------------- //--------------------------------
//-----------FORCE----------------- //-----------FORCE-----------------

View File

@ -141,6 +141,7 @@ struct cuda_shared_pair // relevent data from pair class
int collect_forces_later; int collect_forces_later;
int use_block_per_atom; int use_block_per_atom;
int override_block_per_atom; int override_block_per_atom;
bool neighall;
}; };
@ -217,7 +218,7 @@ struct cuda_shared_pppm
int nlower; int nlower;
int nupper; int nupper;
PPPM_FLOAT shiftone; PPPM_FLOAT shiftone;
PPPM_FLOAT3* fH;
}; };
struct cuda_shared_comm struct cuda_shared_comm

View File

@ -243,10 +243,10 @@ int Cuda_NeighborBuildFullBin(cuda_shared_data* sdata, cuda_shared_neighlist* sn
int exclude=sneighlist->nex_mol|sneighlist->nex_group|sneighlist->nex_type; int exclude=sneighlist->nex_mol|sneighlist->nex_group|sneighlist->nex_type;
if(exclude) if(exclude)
NeighborBuildFullBin_Kernel<1><<<grid,threads,shared_size>>> NeighborBuildFullBin_Kernel<1><<<grid,threads,shared_size>>>
(sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom); (sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom,sdata->pair.neighall);
else else
NeighborBuildFullBin_Kernel<0><<<grid,threads,shared_size>>> NeighborBuildFullBin_Kernel<0><<<grid,threads,shared_size>>>
(sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom); (sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff,sdata->pair.use_block_per_atom,sdata->pair.neighall);
} }
//NeighborBuildFullBin_Kernel_Restrict<<<grid,threads,(2*sizeof(int)+3*sizeof(X_FLOAT))*threads.x+sizeof(int)>>> //NeighborBuildFullBin_Kernel_Restrict<<<grid,threads,(2*sizeof(int)+3*sizeof(X_FLOAT))*threads.x+sizeof(int)>>>
// (sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff); // (sneighlist->binned_id,sneighlist->bin_nmax,sneighlist->bin_dim[0],sneighlist->bin_dim[1],globcutoff);

View File

@ -21,6 +21,8 @@
This software is distributed under the GNU General Public License. This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
#define SBBITS 30
__global__ void Binning_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,int bin_dim_z, __global__ void Binning_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,int bin_dim_z,
CUDA_FLOAT rez_bin_size_x,CUDA_FLOAT rez_bin_size_y,CUDA_FLOAT rez_bin_size_z) CUDA_FLOAT rez_bin_size_x,CUDA_FLOAT rez_bin_size_y,CUDA_FLOAT rez_bin_size_z)
{ {
@ -109,8 +111,9 @@ __device__ inline int find_special(int3 &n, int* list,int & tag,int3 flag)
} }
template <const unsigned int exclude> template <const unsigned int exclude>
__global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,CUDA_FLOAT globcutoff,int block_style) __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_dim_x,int bin_dim_y,CUDA_FLOAT globcutoff,int block_style, bool neighall)
{ {
int natoms = neighall?_nall:_nlocal;
//const bool domol=false; //const bool domol=false;
int bin_dim_z=gridDim.y; int bin_dim_z=gridDim.y;
CUDA_FLOAT* binned_x=(CUDA_FLOAT*) _buffer; CUDA_FLOAT* binned_x=(CUDA_FLOAT*) _buffer;
@ -152,7 +155,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
int jnum=0; int jnum=0;
int itype; int itype;
if(i<_nlocal) if(i<natoms)
{ {
jnum = 0; jnum = 0;
_ilist[i]=i; _ilist[i]=i;
@ -186,7 +189,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
int kk=threadIdx.x; int kk=threadIdx.x;
for(int k = 0; k < MIN(bin_c-otherActOffset,blockDim.x); ++k) for(int k = 0; k < MIN(bin_c-otherActOffset,blockDim.x); ++k)
{ {
if(i<_nlocal) if(i<natoms)
{ {
kk++; kk++;
kk=kk<MIN(bin_c-otherActOffset,blockDim.x)?kk:0; kk=kk<MIN(bin_c-otherActOffset,blockDim.x)?kk:0;
@ -209,7 +212,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
if(block_style) if(block_style)
_neighbors[i*_maxneighbors+jnum]= j; _neighbors[i*_maxneighbors+jnum]= j;
else else
_neighbors[i+jnum*_nlocal]= j; _neighbors[i+jnum*natoms]= j;
} }
++jnum; ++jnum;
} }
@ -244,7 +247,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
for(int k = 0; k < MIN(blockDim.x,obin_c-otherActOffset); ++k) for(int k = 0; k < MIN(blockDim.x,obin_c-otherActOffset); ++k)
{ {
if(i<_nlocal) if(i<natoms)
{ {
int j = other_id[k]; int j = other_id[k];
if(exclude && exclusion(i,j,itype,_type[j])) continue; if(exclude && exclusion(i,j,itype,_type[j])) continue;
@ -266,7 +269,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
if(block_style) if(block_style)
_neighbors[i*_maxneighbors+jnum]= j; _neighbors[i*_maxneighbors+jnum]= j;
else else
_neighbors[i+jnum*_nlocal]= j; _neighbors[i+jnum*natoms]= j;
} }
++jnum; ++jnum;
} }
@ -279,7 +282,7 @@ __global__ void NeighborBuildFullBin_Kernel(int* binned_id,int bin_nmax,int bin_
if(jnum > _maxneighbors) ((int*)_buffer)[0] = -jnum; if(jnum > _maxneighbors) ((int*)_buffer)[0] = -jnum;
if(i<_nlocal) if(i<natoms)
_numneigh[i] = jnum; _numneigh[i] = jnum;
} }
} }
@ -341,9 +344,9 @@ __global__ void FindSpecial(int block_style)
if(which>0) if(which>0)
{ {
if(block_style) if(block_style)
_neighbors[i*_maxneighbors+k]=j+which*_nall; _neighbors[i*_maxneighbors+k]=j ^ (which << SBBITS);
else else
_neighbors[i+k*_nlocal]=j+which*_nall; _neighbors[i+k*_nlocal]=j ^ (which << SBBITS);
} }
else if(which<0) else if(which<0)
{ {