forked from lijiext/lammps
157 lines
3.5 KiB
Plaintext
157 lines
3.5 KiB
Plaintext
__global__ void CudaData_Upload_Kernel_DoubleFloat(double* buffer,float* dev_data,
|
|
unsigned nx,unsigned ny,unsigned nz,copy_mode mode)
|
|
{
|
|
if(mode==x) mode=xx;
|
|
unsigned length=nx;
|
|
if(ny>0) length*=ny;
|
|
if(nz>0) length*=nz;
|
|
unsigned i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x,j,k,l;
|
|
|
|
|
|
if(i>=length) return;
|
|
switch(mode)
|
|
{
|
|
case xx:
|
|
{
|
|
dev_data[i]=buffer[i];
|
|
}
|
|
case xy:
|
|
{
|
|
dev_data[i]=buffer[i];
|
|
}
|
|
case yx:
|
|
{
|
|
j=i/ny;
|
|
k=i%ny;
|
|
dev_data[k*nx+j]=buffer[j*ny+k];
|
|
}
|
|
case xyz:
|
|
{
|
|
dev_data[i]=buffer[i];
|
|
}
|
|
case xzy:
|
|
{
|
|
j=i/(ny*nz);
|
|
k=(i%(ny*nz))/nz;
|
|
l=i%nz;
|
|
dev_data[j*ny*nz+l*ny+k]=buffer[j*ny*nz+k*nz+l];
|
|
}
|
|
}
|
|
}
|
|
|
|
__global__ void CudaData_Upload_Kernel_DoubleDouble(double* buffer,double* dev_data,
|
|
unsigned nx,unsigned ny,unsigned nz,copy_mode mode)
|
|
{
|
|
if(mode==x) mode=xx;
|
|
unsigned length=nx;
|
|
if(ny>0) length*=ny;
|
|
if(nz>0) length*=nz;
|
|
unsigned i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x,j,k,l;
|
|
if(i>=length) return;
|
|
switch(mode)
|
|
{
|
|
case xx:
|
|
dev_data[i]=buffer[i];
|
|
case xy:
|
|
dev_data[i]=buffer[i];
|
|
case yx:
|
|
j=i/ny;
|
|
k=i%ny;
|
|
dev_data[k*nx+j]=buffer[j*ny+k];
|
|
case xyz:
|
|
dev_data[i]=buffer[i];
|
|
case xzy:
|
|
j=i/(ny*nz);
|
|
k=(i%(ny*nz))/nz;
|
|
l=i%nz;
|
|
dev_data[j*ny*nz+l*ny+k]=buffer[j*ny*nz+k*nz+l];
|
|
}
|
|
}
|
|
|
|
__global__ void CudaData_Upload_Kernel_FloatDouble(float* buffer,double* dev_data,
|
|
unsigned nx,unsigned ny,unsigned nz,copy_mode mode)
|
|
{
|
|
if(mode==x) mode=xx;
|
|
unsigned length=nx;
|
|
if(ny>0) length*=ny;
|
|
if(nz>0) length*=nz;
|
|
unsigned i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x,j,k,l;
|
|
if(i>=length) return;
|
|
switch(mode)
|
|
{
|
|
case xx:
|
|
dev_data[i]=buffer[i];
|
|
case xy:
|
|
dev_data[i]=buffer[i];
|
|
case yx:
|
|
j=i/ny;
|
|
k=i%ny;
|
|
dev_data[k*nx+j]=buffer[j*ny+k];
|
|
case xyz:
|
|
dev_data[i]=buffer[i];
|
|
case xzy:
|
|
j=i/(ny*nz);
|
|
k=(i%(ny*nz))/nz;
|
|
l=i%nz;
|
|
dev_data[j*ny*nz+l*ny+k]=buffer[j*ny*nz+k*nz+l];
|
|
}
|
|
}
|
|
|
|
__global__ void CudaData_Upload_Kernel_FloatFloat(float* buffer,float* dev_data,
|
|
unsigned nx,unsigned ny,unsigned nz,copy_mode mode)
|
|
{
|
|
if(mode==x) mode=xx;
|
|
unsigned length=nx;
|
|
if(ny>0) length*=ny;
|
|
if(nz>0) length*=nz;
|
|
unsigned i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x,j,k,l;
|
|
if(i>=length) return;
|
|
switch(mode)
|
|
{
|
|
case xx:
|
|
dev_data[i]=buffer[i];
|
|
case xy:
|
|
dev_data[i]=buffer[i];
|
|
case yx:
|
|
j=i/ny;
|
|
k=i%ny;
|
|
dev_data[k*nx+j]=buffer[j*ny+k];
|
|
case xyz:
|
|
dev_data[i]=buffer[i];
|
|
case xzy:
|
|
j=i/(ny*nz);
|
|
k=(i%(ny*nz))/nz;
|
|
l=i%nz;
|
|
dev_data[j*ny*nz+l*ny+k]=buffer[j*ny*nz+k*nz+l];
|
|
}
|
|
}
|
|
|
|
__global__ void CudaData_Upload_Kernel_IntInt(int* buffer,int* dev_data,
|
|
unsigned nx,unsigned ny,unsigned nz,copy_mode mode)
|
|
{
|
|
if(mode==x) mode=xx;
|
|
unsigned length=nx;
|
|
if(ny>0) length*=ny;
|
|
if(nz>0) length*=nz;
|
|
unsigned i=(blockIdx.x*gridDim.y+blockIdx.y)*blockDim.x+threadIdx.x,j,k,l;
|
|
if(i>=length) return;
|
|
switch(mode)
|
|
{
|
|
case xx:
|
|
dev_data[i]=buffer[i];
|
|
case xy:
|
|
dev_data[i]=buffer[i];
|
|
case yx:
|
|
j=i/ny;
|
|
k=i%ny;
|
|
dev_data[k*nx+j]=buffer[j*ny+k];
|
|
case xyz:
|
|
dev_data[i]=buffer[i];
|
|
case xzy:
|
|
j=i/(ny*nz);
|
|
k=(i%(ny*nz))/nz;
|
|
l=i%nz;
|
|
dev_data[j*ny*nz+l*ny+k]=buffer[j*ny*nz+k*nz+l];
|
|
}
|
|
}
|