forked from lijiext/lammps
104 lines
2.6 KiB
Plaintext
104 lines
2.6 KiB
Plaintext
/* ----------------------------------------------------------------------
|
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
|
|
|
Original Version:
|
|
http://lammps.sandia.gov, Sandia National Laboratories
|
|
Steve Plimpton, sjplimp@sandia.gov
|
|
|
|
See the README file in the top-level LAMMPS directory.
|
|
|
|
-----------------------------------------------------------------------
|
|
|
|
USER-CUDA Package and associated modifications:
|
|
https://sourceforge.net/projects/lammpscuda/
|
|
|
|
Christian Trott, christian.trott@tu-ilmenau.de
|
|
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
|
|
Theoretical Physics II, University of Technology Ilmenau, Germany
|
|
|
|
See the README file in the USER-CUDA directory.
|
|
|
|
This software is distributed under the GNU General Public License.
|
|
------------------------------------------------------------------------- */
|
|
|
|
//#define CUDA_PRECISION 1
|
|
#include "cuda_precision.h"
|
|
#include "cuda_common.h"
|
|
struct FFT_DATA{
|
|
FFT_FLOAT re;
|
|
FFT_FLOAT im;
|
|
};
|
|
|
|
#include "fft3d_cuda_cu.h"
|
|
#include "fft3d_cuda_kernel.cu"
|
|
#include <stdio.h>
|
|
|
|
void initfftdata(double* in,FFT_FLOAT* out,int nfast,int nmid,int nslow)
|
|
{
|
|
|
|
dim3 grid;
|
|
grid.x=nslow;
|
|
grid.y=nmid;
|
|
grid.z=1;
|
|
dim3 threads;
|
|
threads.x=nfast;
|
|
threads.y=1;
|
|
threads.z=1;
|
|
cudaThreadSynchronize();
|
|
initfftdata_kernel<<<grid,threads,0>>>(in,out);
|
|
cudaThreadSynchronize();
|
|
MYDBG(printf("ERROR-CUDA initfftdata_kernel: %s\n",cudaGetErrorString(cudaGetLastError())));
|
|
}
|
|
|
|
|
|
void permute(FFT_DATA* in,FFT_DATA* out,int nfast,int nmid,int nslow)
|
|
{
|
|
|
|
dim3 grid;
|
|
grid.x=nslow;
|
|
grid.y=nmid;
|
|
grid.z=1;
|
|
dim3 threads;
|
|
threads.x=nfast*2;
|
|
threads.y=1;
|
|
threads.z=1;
|
|
permute_kernel<<<grid,threads,0>>>((FFT_FLOAT*)in,(FFT_FLOAT*)out);
|
|
cudaThreadSynchronize();
|
|
MYDBG(printf("ERROR-CUDA permute_kernel: %s\n",cudaGetErrorString(cudaGetLastError())));
|
|
}
|
|
|
|
void permute_scale(FFT_DATA* in,FFT_DATA* out,int nfast,int nmid,int nslow)
|
|
{
|
|
|
|
dim3 grid;
|
|
grid.x=nslow;
|
|
grid.y=nmid;
|
|
grid.z=1;
|
|
dim3 threads;
|
|
threads.x=nfast*2;
|
|
threads.y=1;
|
|
threads.z=1;
|
|
permute_kernel<<<grid,threads,0>>>((FFT_FLOAT*)in,(FFT_FLOAT*)out);
|
|
cudaThreadSynchronize();
|
|
}
|
|
void permute_part(FFT_DATA* in,FFT_DATA* out,int nfast,int nmid,int nslow,int ihi,int ilo,int jhi,int jlo,int khi,int klo)
|
|
{
|
|
|
|
dim3 grid;
|
|
grid.x=(ihi-ilo+1);
|
|
grid.y=(jhi-jlo+1);
|
|
grid.z=1;
|
|
dim3 threads;
|
|
threads.x=(khi-klo+1)*2;
|
|
threads.y=1;
|
|
threads.z=1;
|
|
permute_part_kernel<<<grid,threads,0>>>((FFT_FLOAT*)in,(FFT_FLOAT*)out,nfast,nmid,nslow,ihi,ilo,jhi,jlo,khi,klo);
|
|
cudaThreadSynchronize();
|
|
}
|
|
|
|
void FFTsyncthreads()
|
|
{
|
|
cudaThreadSynchronize();
|
|
}
|
|
|