forked from lijiext/lammps
196 lines
3.9 KiB
Plaintext
196 lines
3.9 KiB
Plaintext
__global__ void CudaData_Upload_Kernel_DoubleFloat(double* buffer, float* dev_data,
|
|
unsigned nx, unsigned ny, unsigned nz, copy_mode mode)
|
|
{
|
|
if(mode == x) mode = xx;
|
|
|
|
unsigned length = nx;
|
|
|
|
if(ny > 0) length *= ny;
|
|
|
|
if(nz > 0) length *= nz;
|
|
|
|
unsigned i = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x, j, k, l;
|
|
|
|
|
|
if(i >= length) return;
|
|
|
|
switch(mode) {
|
|
case xx: {
|
|
dev_data[i] = buffer[i];
|
|
}
|
|
|
|
case xy: {
|
|
dev_data[i] = buffer[i];
|
|
}
|
|
|
|
case yx: {
|
|
j = i / ny;
|
|
k = i % ny;
|
|
dev_data[k * nx + j] = buffer[j * ny + k];
|
|
}
|
|
|
|
case xyz: {
|
|
dev_data[i] = buffer[i];
|
|
}
|
|
|
|
case xzy: {
|
|
j = i / (ny * nz);
|
|
k = (i % (ny * nz)) / nz;
|
|
l = i % nz;
|
|
dev_data[j * ny * nz + l * ny + k] = buffer[j * ny * nz + k * nz + l];
|
|
}
|
|
}
|
|
}
|
|
|
|
__global__ void CudaData_Upload_Kernel_DoubleDouble(double* buffer, double* dev_data,
|
|
unsigned nx, unsigned ny, unsigned nz, copy_mode mode)
|
|
{
|
|
if(mode == x) mode = xx;
|
|
|
|
unsigned length = nx;
|
|
|
|
if(ny > 0) length *= ny;
|
|
|
|
if(nz > 0) length *= nz;
|
|
|
|
unsigned i = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x, j, k, l;
|
|
|
|
if(i >= length) return;
|
|
|
|
switch(mode) {
|
|
case xx:
|
|
dev_data[i] = buffer[i];
|
|
|
|
case xy:
|
|
dev_data[i] = buffer[i];
|
|
|
|
case yx:
|
|
j = i / ny;
|
|
k = i % ny;
|
|
dev_data[k * nx + j] = buffer[j * ny + k];
|
|
|
|
case xyz:
|
|
dev_data[i] = buffer[i];
|
|
|
|
case xzy:
|
|
j = i / (ny * nz);
|
|
k = (i % (ny * nz)) / nz;
|
|
l = i % nz;
|
|
dev_data[j * ny * nz + l * ny + k] = buffer[j * ny * nz + k * nz + l];
|
|
}
|
|
}
|
|
|
|
__global__ void CudaData_Upload_Kernel_FloatDouble(float* buffer, double* dev_data,
|
|
unsigned nx, unsigned ny, unsigned nz, copy_mode mode)
|
|
{
|
|
if(mode == x) mode = xx;
|
|
|
|
unsigned length = nx;
|
|
|
|
if(ny > 0) length *= ny;
|
|
|
|
if(nz > 0) length *= nz;
|
|
|
|
unsigned i = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x, j, k, l;
|
|
|
|
if(i >= length) return;
|
|
|
|
switch(mode) {
|
|
case xx:
|
|
dev_data[i] = buffer[i];
|
|
|
|
case xy:
|
|
dev_data[i] = buffer[i];
|
|
|
|
case yx:
|
|
j = i / ny;
|
|
k = i % ny;
|
|
dev_data[k * nx + j] = buffer[j * ny + k];
|
|
|
|
case xyz:
|
|
dev_data[i] = buffer[i];
|
|
|
|
case xzy:
|
|
j = i / (ny * nz);
|
|
k = (i % (ny * nz)) / nz;
|
|
l = i % nz;
|
|
dev_data[j * ny * nz + l * ny + k] = buffer[j * ny * nz + k * nz + l];
|
|
}
|
|
}
|
|
|
|
__global__ void CudaData_Upload_Kernel_FloatFloat(float* buffer, float* dev_data,
|
|
unsigned nx, unsigned ny, unsigned nz, copy_mode mode)
|
|
{
|
|
if(mode == x) mode = xx;
|
|
|
|
unsigned length = nx;
|
|
|
|
if(ny > 0) length *= ny;
|
|
|
|
if(nz > 0) length *= nz;
|
|
|
|
unsigned i = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x, j, k, l;
|
|
|
|
if(i >= length) return;
|
|
|
|
switch(mode) {
|
|
case xx:
|
|
dev_data[i] = buffer[i];
|
|
|
|
case xy:
|
|
dev_data[i] = buffer[i];
|
|
|
|
case yx:
|
|
j = i / ny;
|
|
k = i % ny;
|
|
dev_data[k * nx + j] = buffer[j * ny + k];
|
|
|
|
case xyz:
|
|
dev_data[i] = buffer[i];
|
|
|
|
case xzy:
|
|
j = i / (ny * nz);
|
|
k = (i % (ny * nz)) / nz;
|
|
l = i % nz;
|
|
dev_data[j * ny * nz + l * ny + k] = buffer[j * ny * nz + k * nz + l];
|
|
}
|
|
}
|
|
|
|
__global__ void CudaData_Upload_Kernel_IntInt(int* buffer, int* dev_data,
|
|
unsigned nx, unsigned ny, unsigned nz, copy_mode mode)
|
|
{
|
|
if(mode == x) mode = xx;
|
|
|
|
unsigned length = nx;
|
|
|
|
if(ny > 0) length *= ny;
|
|
|
|
if(nz > 0) length *= nz;
|
|
|
|
unsigned i = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x, j, k, l;
|
|
|
|
if(i >= length) return;
|
|
|
|
switch(mode) {
|
|
case xx:
|
|
dev_data[i] = buffer[i];
|
|
|
|
case xy:
|
|
dev_data[i] = buffer[i];
|
|
|
|
case yx:
|
|
j = i / ny;
|
|
k = i % ny;
|
|
dev_data[k * nx + j] = buffer[j * ny + k];
|
|
|
|
case xyz:
|
|
dev_data[i] = buffer[i];
|
|
|
|
case xzy:
|
|
j = i / (ny * nz);
|
|
k = (i % (ny * nz)) / nz;
|
|
l = i % nz;
|
|
dev_data[j * ny * nz + l * ny + k] = buffer[j * ny * nz + k * nz + l];
|
|
}
|
|
}
|