lammps/lib/cuda/atom_vec_cuda_kernel.cu

372 lines
13 KiB
Plaintext

/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#define RIMLARGER 1.000001
#define RIMSMALLER 0.999999
#define SMALL 1e-5
extern __shared__ int shared[];
template <const unsigned int data_mask>
__global__ void Cuda_AtomVecCuda_PackComm_Kernel(int* sendlist,int n,int maxlistlength,int iswap,X_FLOAT dx,X_FLOAT dy,X_FLOAT dz,void* buffer)
{
int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x;
int* list=sendlist+iswap*maxlistlength;
if(i<n)
{
int j=list[i];
if(j>_nmax) _flag[0]=1;
int k=0;
if(data_mask & X_MASK){
((X_FLOAT*) buffer)[i+k*n]=_x[j] + dx; k++;
((X_FLOAT*) buffer)[i+k*n] = _x[j+_nmax] + dy; k++;
((X_FLOAT*) buffer)[i+k*n] = _x[j+2*_nmax] + dz; k++;}
if(data_mask & V_MASK){
((X_FLOAT*) buffer)[i+k*n]=_v[j]; k++;
((X_FLOAT*) buffer)[i+k*n] = _v[j+_nmax]; k++;
((X_FLOAT*) buffer)[i+k*n] = _v[j+2*_nmax]; k++;}
if(data_mask & OMEGA_MASK){
((X_FLOAT*) buffer)[i+k*n]=_omega[j]; k++;
((X_FLOAT*) buffer)[i+k*n] = _omega[j+_nmax]; k++;
((X_FLOAT*) buffer)[i+k*n] = _omega[j+2*_nmax]; k++;}
if(data_mask & RADIUS_MASK) ((X_FLOAT*) buffer)[i+k*n]=_radius[j]; k++;
if(data_mask & RMASS_MASK) ((X_FLOAT*) buffer)[i+k*n]=_rmass[j]; k++;
}
}
template <const unsigned int data_mask>
__global__ void Cuda_AtomVecCuda_PackComm_Self_Kernel(int* sendlist,int n,int maxlistlength,int iswap,X_FLOAT dx,X_FLOAT dy,X_FLOAT dz,int first)
{
int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x;
int* list=sendlist+iswap*maxlistlength;
if(i<n)
{
int j=i;
j=list[i];
if(data_mask & X_MASK){
_x[i+first]=_x[j] + dx;
_x[i+first+_nmax] = _x[j+_nmax] + dy;
_x[i+first+2*_nmax] = _x[j+2*_nmax] + dz;}
if(data_mask & V_MASK){
_v[i+first]=_v[j];
_v[i+first+_nmax] = _v[j+_nmax];
_v[i+first+2*_nmax] = _v[j+2*_nmax];}
if(data_mask & OMEGA_MASK) {
_omega[i+first] = _omega[j];
_omega[i+first+_nmax] = _omega[j+_nmax];
_omega[i+first+2*_nmax] = _omega[j+2*_nmax];}
if(data_mask & RADIUS_MASK) _radius[i+first]=_radius[j];
if(data_mask & RMASS_MASK) _rmass[i+first]=_rmass[j];
}
}
template <const unsigned int data_mask>
__global__ void Cuda_AtomVecCuda_UnpackComm_Kernel(int n,int first,void* buffer)
{
int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x;
if(i<n)
{
int k=0;
if(data_mask & X_MASK){
_x[i+first]=((X_FLOAT*) buffer)[i+k*n]; k++;
_x[i+first+_nmax]=((X_FLOAT*) buffer)[i+k*n]; k++;
_x[i+first+2*_nmax]=((X_FLOAT*) buffer)[i+k*n]; k++;}
if(data_mask & V_MASK){
_v[i+first]=((X_FLOAT*) buffer)[i+k*n]; k++;
_v[i+first+_nmax]=((X_FLOAT*) buffer)[i+k*n]; k++;
_v[i+first+2*_nmax]=((X_FLOAT*) buffer)[i+k*n]; k++;}
if(data_mask & OMEGA_MASK){
_omega[i+first]=((X_FLOAT*) buffer)[i+k*n]; k++;
_omega[i+first+_nmax]=((X_FLOAT*) buffer)[i+k*n]; k++;
_omega[i+first+2*_nmax]=((X_FLOAT*) buffer)[i+k*n]; k++;}
if(data_mask & RADIUS_MASK) _radius[i+first] = ((X_FLOAT*) buffer)[i+k*n]; k++;
if(data_mask & RMASS_MASK) _rmass[i+first] = ((X_FLOAT*) buffer)[i+k*n]; k++;
}
}
__global__ void Cuda_AtomVecCuda_PackExchangeList_Kernel(int n,int dim)
{
double* buf=(double*) _buffer;
buf=&buf[1];
//X_FLOAT lo=slablo[iswap];
//X_FLOAT hi=slabhi[iswap];
int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x;
bool add=false;
if(i<_nlocal)
{
double xdim_tmp=static_cast <double> (_x[i+dim*_nmax]);
if (xdim_tmp < _sublo[dim] || xdim_tmp >= _subhi[dim])
{
add=true;
}
}
shared[threadIdx.x]=add?1:0;
__syncthreads();
int nsend=0;
if(threadIdx.x==0)
{
for(int k=0;k<blockDim.x;k++)
{
if(shared[k]) {nsend++; shared[k]=nsend;}
}
shared[blockDim.x]=atomicAdd((int*) _buffer,nsend);
}
__syncthreads();
nsend=shared[blockDim.x]+shared[threadIdx.x]-1;
if(add&&nsend+1<n)
buf[nsend]=i;
}
template <const unsigned int data_mask>
__global__ void Cuda_AtomVecCuda_PackExchange_Kernel(int nsend, int* copylist)
{
double* buf=(double*) _buffer;
int k=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x;
if(k>=nsend) return;
buf=&buf[1+k];
int i=static_cast <int> (buf[0]);
int j=copylist[k];
int m=1;
if(data_mask & X_MASK){
buf[(m++)*nsend] = static_cast <double> (_x[i]);
buf[(m++)*nsend] = static_cast <double> (_x[i+_nmax]);
buf[(m++)*nsend] = static_cast <double> (_x[i+2*_nmax]);}
if(data_mask & V_MASK){
buf[(m++)*nsend] = _v[i];
buf[(m++)*nsend] = _v[i+_nmax];
buf[(m++)*nsend] = _v[i+2*_nmax];}
if(data_mask & TAG_MASK) buf[(m++)*nsend] = _tag[i];
if(data_mask & TYPE_MASK) buf[(m++)*nsend] = _type[i];
if(data_mask & MASK_MASK) buf[(m++)*nsend] = _mask[i];
if(data_mask & IMAGE_MASK) buf[(m++)*nsend] = _image[i];
if(data_mask & Q_MASK) buf[(m++)*nsend] = _q[i];
if(data_mask & MOLECULE_MASK) buf[(m++)*nsend] = _molecule[i];
if(data_mask & RADIUS_MASK) buf[(m++)*nsend] = _radius[i];
if(data_mask & DENSITY_MASK) buf[(m++)*nsend] = _density[i];
if(data_mask & RMASS_MASK) buf[(m++)*nsend] = _rmass[i];
if(data_mask & OMEGA_MASK) {
buf[(m++)*nsend] = _omega[i];
buf[(m++)*nsend] = _omega[i+_nmax];
buf[(m++)*nsend] = _omega[i+2*_nmax];}
/* if(data_mask & NSPECIAL_MASK)
{
buf[(m++)*nsend] = _nspecial[i];
buf[(m++)*nsend] = _nspecial[i+_nmax];
buf[(m++)*nsend] = _nspecial[i+2* _nmax];
}*/
if(i>=_nlocal) return;
if(data_mask & X_MASK){
_x[i] = _x[j];
_x[i+_nmax] = _x[j+_nmax];
_x[i+2*_nmax] = _x[j+2*_nmax];}
if(data_mask & V_MASK){
_v[i] = _v[j];
_v[i+_nmax] = _v[j+_nmax];
_v[i+2*_nmax] = _v[j+2*_nmax];}
if(data_mask & TAG_MASK) _tag[i] = _tag[j];
if(data_mask & TYPE_MASK) _type[i] = _type[j];
if(data_mask & MASK_MASK) _mask[i] = _mask[j];
if(data_mask & IMAGE_MASK) _image[i] = _image[j];
if(data_mask & Q_MASK) _q[i] = _q[j];
if(data_mask & MOLECULE_MASK) _molecule[i]= _molecule[j];
if(data_mask & RADIUS_MASK) _radius[i] = _radius[j];
if(data_mask & DENSITY_MASK) _density[i] = _density[j];
if(data_mask & RMASS_MASK) _rmass[i] = _rmass[j];
if(data_mask & OMEGA_MASK)
{
_omega[i] = _omega[j];
_omega[i+_nmax] = _omega[j+_nmax];
_omega[i+2*_nmax] = _omega[j+2*_nmax];
}
/* if(data_mask & NSPECIAL_MASK)
{
_nspecial[i] = _nspecial[j];
_nspecial[i+_nmax] = _nspecial[j+_nmax];
_nspecial[i+2* _nmax] = _nspecial[j+2* _nmax];
}*/
}
template <const unsigned int data_mask>
__global__ void Cuda_AtomVecCuda_UnpackExchange_Kernel(int dim,int nsend,int* copylist)
{
double* buf=(double*) _buffer;
int k=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x;
if(k>=nsend) return;
buf=&buf[1+k];
int i=-1;
double xdim_tmp = buf[(1+dim)*nsend];
if(xdim_tmp>=_sublo[dim]-SMALL && xdim_tmp<_subhi[dim]+SMALL)
{
i=atomicAdd(_flag,1)+_nlocal;
int m=1;
if(data_mask & X_MASK){
_x[i] = buf[(m++)*nsend];
_x[i+_nmax] = buf[(m++)*nsend];
_x[i+2*_nmax] = buf[(m++)*nsend];}
if(data_mask & V_MASK){
_v[i] = buf[(m++)*nsend];
_v[i+_nmax] = buf[(m++)*nsend];
_v[i+2*_nmax] = buf[(m++)*nsend];}
if(data_mask & TAG_MASK) _tag[i] = buf[(m++)*nsend];
if(data_mask & TYPE_MASK) _type[i] = buf[(m++)*nsend];
if(data_mask & MASK_MASK) _mask[i] = buf[(m++)*nsend];
if(data_mask & IMAGE_MASK) _image[i] = buf[(m++)*nsend];
if(data_mask & Q_MASK) _q[i] = buf[(m++)*nsend];
if(data_mask & MOLECULE_MASK) _molecule[i] = buf[(m++)*nsend];
if(data_mask & RADIUS_MASK) _radius[i] = buf[(m++)*nsend];
if(data_mask & DENSITY_MASK) _density[i] = buf[(m++)*nsend];
if(data_mask & RMASS_MASK) _rmass[i] = buf[(m++)*nsend];
if(data_mask & OMEGA_MASK)
{
_omega[i] = buf[(m++)*nsend];
_omega[i+_nmax] = buf[(m++)*nsend];
_omega[i+2*_nmax] = buf[(m++)*nsend];
}
/* if(data_mask & NSPECIAL_MASK)
{
_nspecial[i] = buf[(m++)*nsend];
_nspecial[i+_nmax] = buf[(m++)*nsend];
_nspecial[i+2*_nmax] = buf[(m++)*nsend];
}*/
}
copylist[k]=i;
}
template <const unsigned int data_mask>
__global__ void Cuda_AtomVecCuda_PackBorder_Kernel(int* sendlist,int n,int maxlistlength,int iswap,X_FLOAT dx,X_FLOAT dy,X_FLOAT dz)
{
int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x;
int* list=sendlist+iswap*maxlistlength;
if(i<n)
{
int j=list[i];
int m=0;
if(data_mask & X_MASK) {
((X_FLOAT*) _buffer)[i+(m++)*n]= _x[j] + dx;
((X_FLOAT*) _buffer)[i+(m++)*n] = _x[j+_nmax] + dy;
((X_FLOAT*) _buffer)[i+(m++)*n] = _x[j+2*_nmax] + dz;}
if(data_mask & V_MASK) {
((X_FLOAT*) _buffer)[i+(m++)*n]= _v[j];
((X_FLOAT*) _buffer)[i+(m++)*n] = _v[j+_nmax];
((X_FLOAT*) _buffer)[i+(m++)*n] = _v[j+2*_nmax];}
if(data_mask & TAG_MASK) ((X_FLOAT*) _buffer)[i+(m++)*n] = _tag[j];
if(data_mask & TYPE_MASK) ((X_FLOAT*) _buffer)[i+(m++)*n] = _type[j];
if(data_mask & MASK_MASK) ((X_FLOAT*) _buffer)[i+(m++)*n] = _mask[j];
if(data_mask & Q_MASK) ((X_FLOAT*) _buffer)[i+(m++)*n] = _q[j];
if(data_mask & MOLECULE_MASK) ((X_FLOAT*) _buffer)[i+(m++)*n] = _molecule[j];
if(data_mask & RADIUS_MASK) ((X_FLOAT*) _buffer)[i+(m++)*n] = _radius[i];
if(data_mask & DENSITY_MASK) ((X_FLOAT*) _buffer)[i+(m++)*n] = _density[i];
if(data_mask & RMASS_MASK) ((X_FLOAT*) _buffer)[i+(m++)*n] = _rmass[i];
if(data_mask & OMEGA_MASK) {
((X_FLOAT*) _buffer)[i+(m++)*n] = _omega[i];
((X_FLOAT*) _buffer)[i+(m++)*n] = _omega[i+_nmax];
((X_FLOAT*) _buffer)[i+(m++)*n] = _omega[i+2*_nmax];}
}
}
template <const unsigned int data_mask>
__global__ void Cuda_AtomVecCuda_PackBorder_Self_Kernel(int* sendlist,int n,int maxlistlength,int iswap,X_FLOAT dx,X_FLOAT dy,X_FLOAT dz,int first)
{
int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x;
int* list=sendlist+iswap*maxlistlength;
if(i<n)
{
int j=list[i];
if(data_mask & X_MASK) {
_x[i+first]= _x[j] + dx;
_x[i+first+_nmax] = _x[j+_nmax] + dy;
_x[i+first+2*_nmax] = _x[j+2*_nmax] + dz;}
if(data_mask & V_MASK) {
_v[i+first]= _v[j];
_v[i+first+_nmax] = _v[j+_nmax];
_v[i+first+2*_nmax] = _v[j+2*_nmax];}
if(data_mask & TAG_MASK) _tag[i+first] = _tag[j];
if(data_mask & TYPE_MASK) _type[i+first] = _type[j];
if(data_mask & MASK_MASK) _mask[i+first] = _mask[j];
if(data_mask & Q_MASK) _q[i+first] = _q[j];
if(data_mask & MOLECULE_MASK) _molecule[i+first] = _molecule[j];
if(data_mask & RADIUS_MASK) _radius[i+first] = _radius[j];
if(data_mask & DENSITY_MASK) _density[i+first] = _density[j];
if(data_mask & RMASS_MASK) _rmass[i+first] = _rmass[j];
if(data_mask & OMEGA_MASK) {
_omega[i+first]= _omega[j];
_omega[i+first+_nmax] = _omega[j+_nmax];
_omega[i+first+2*_nmax] = _omega[j+2*_nmax];}
}
}
template <const unsigned int data_mask>
__global__ void Cuda_AtomVecCuda_UnpackBorder_Kernel(int n,int first)
{
int i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x;
if(i<n)
{
if(i+first<_nmax)
{
int m=0;
if(data_mask & X_MASK) {
_x[i+first]=((X_FLOAT*) _buffer)[i+(m++)*n];
_x[i+first+_nmax]=((X_FLOAT*) _buffer)[i+(m++)*n];
_x[i+first+2*_nmax]=((X_FLOAT*) _buffer)[i+(m++)*n];}
if(data_mask & V_MASK) {
_v[i+first]=((X_FLOAT*) _buffer)[i+(m++)*n];
_v[i+first+_nmax]=((X_FLOAT*) _buffer)[i+(m++)*n];
_v[i+first+2*_nmax]=((X_FLOAT*) _buffer)[i+(m++)*n];}
if(data_mask & TAG_MASK) _tag[i+first] = static_cast<int> (((X_FLOAT*) _buffer)[i+(m++)*n]);
if(data_mask & TYPE_MASK) _type[i+first] = static_cast<int> (((X_FLOAT*) _buffer)[i+(m++)*n]);
if(data_mask & MASK_MASK) _mask[i+first] = static_cast<int> (((X_FLOAT*) _buffer)[i+(m++)*n]);
if(data_mask & Q_MASK) _q[i+first]=((X_FLOAT*) _buffer)[i+(m++)*n];
if(data_mask & MOLECULE_MASK) _molecule[i+first] = static_cast<int> (((X_FLOAT*) _buffer)[i+(m++)*n]);
if(data_mask & RADIUS_MASK) _radius[i+first]=((X_FLOAT*) _buffer)[i+(m++)*n];
if(data_mask & DENSITY_MASK) _density[i+first]=((X_FLOAT*) _buffer)[i+(m++)*n];
if(data_mask & RMASS_MASK) _rmass[i+first]=((X_FLOAT*) _buffer)[i+(m++)*n];
if(data_mask & OMEGA_MASK) {
_omega[i+first]=((X_FLOAT*) _buffer)[i+(m++)*n];
_omega[i+first+_nmax]=((X_FLOAT*) _buffer)[i+(m++)*n];
_omega[i+first+2*_nmax]=((X_FLOAT*) _buffer)[i+(m++)*n];}
}
else
{
_flag[0]=1;
}
}
}