lammps/lib/cuda/cuda_data_kernel.cu

157 lines
3.5 KiB
Plaintext

__global__ void CudaData_Upload_Kernel_DoubleFloat(double* buffer,float* dev_data,
unsigned nx,unsigned ny,unsigned nz,copy_mode mode)
{
if(mode==x) mode=xx;
unsigned length=nx;
if(ny>0) length*=ny;
if(nz>0) length*=nz;
unsigned i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x,j,k,l;
if(i>=length) return;
switch(mode)
{
case xx:
{
dev_data[i]=buffer[i];
}
case xy:
{
dev_data[i]=buffer[i];
}
case yx:
{
j=i/ny;
k=i%ny;
dev_data[k*nx+j]=buffer[j*ny+k];
}
case xyz:
{
dev_data[i]=buffer[i];
}
case xzy:
{
j=i/(ny*nz);
k=(i%(ny*nz))/nz;
l=i%nz;
dev_data[j*ny*nz+l*ny+k]=buffer[j*ny*nz+k*nz+l];
}
}
}
__global__ void CudaData_Upload_Kernel_DoubleDouble(double* buffer,double* dev_data,
unsigned nx,unsigned ny,unsigned nz,copy_mode mode)
{
if(mode==x) mode=xx;
unsigned length=nx;
if(ny>0) length*=ny;
if(nz>0) length*=nz;
unsigned i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x,j,k,l;
if(i>=length) return;
switch(mode)
{
case xx:
dev_data[i]=buffer[i];
case xy:
dev_data[i]=buffer[i];
case yx:
j=i/ny;
k=i%ny;
dev_data[k*nx+j]=buffer[j*ny+k];
case xyz:
dev_data[i]=buffer[i];
case xzy:
j=i/(ny*nz);
k=(i%(ny*nz))/nz;
l=i%nz;
dev_data[j*ny*nz+l*ny+k]=buffer[j*ny*nz+k*nz+l];
}
}
__global__ void CudaData_Upload_Kernel_FloatDouble(float* buffer,double* dev_data,
unsigned nx,unsigned ny,unsigned nz,copy_mode mode)
{
if(mode==x) mode=xx;
unsigned length=nx;
if(ny>0) length*=ny;
if(nz>0) length*=nz;
unsigned i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x,j,k,l;
if(i>=length) return;
switch(mode)
{
case xx:
dev_data[i]=buffer[i];
case xy:
dev_data[i]=buffer[i];
case yx:
j=i/ny;
k=i%ny;
dev_data[k*nx+j]=buffer[j*ny+k];
case xyz:
dev_data[i]=buffer[i];
case xzy:
j=i/(ny*nz);
k=(i%(ny*nz))/nz;
l=i%nz;
dev_data[j*ny*nz+l*ny+k]=buffer[j*ny*nz+k*nz+l];
}
}
__global__ void CudaData_Upload_Kernel_FloatFloat(float* buffer,float* dev_data,
unsigned nx,unsigned ny,unsigned nz,copy_mode mode)
{
if(mode==x) mode=xx;
unsigned length=nx;
if(ny>0) length*=ny;
if(nz>0) length*=nz;
unsigned i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x,j,k,l;
if(i>=length) return;
switch(mode)
{
case xx:
dev_data[i]=buffer[i];
case xy:
dev_data[i]=buffer[i];
case yx:
j=i/ny;
k=i%ny;
dev_data[k*nx+j]=buffer[j*ny+k];
case xyz:
dev_data[i]=buffer[i];
case xzy:
j=i/(ny*nz);
k=(i%(ny*nz))/nz;
l=i%nz;
dev_data[j*ny*nz+l*ny+k]=buffer[j*ny*nz+k*nz+l];
}
}
__global__ void CudaData_Upload_Kernel_IntInt(int* buffer,int* dev_data,
unsigned nx,unsigned ny,unsigned nz,copy_mode mode)
{
if(mode==x) mode=xx;
unsigned length=nx;
if(ny>0) length*=ny;
if(nz>0) length*=nz;
unsigned i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x,j,k,l;
if(i>=length) return;
switch(mode)
{
case xx:
dev_data[i]=buffer[i];
case xy:
dev_data[i]=buffer[i];
case yx:
j=i/ny;
k=i%ny;
dev_data[k*nx+j]=buffer[j*ny+k];
case xyz:
dev_data[i]=buffer[i];
case xzy:
j=i/(ny*nz);
k=(i%(ny*nz))/nz;
l=i%nz;
dev_data[j*ny*nz+l*ny+k]=buffer[j*ny*nz+k*nz+l];
}
}