From aa5882275c68a0266cd43df0eedd28fe92e98266 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 8 Aug 2011 17:51:58 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6609 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/KSPACE/Install.sh | 2 + src/KSPACE/fft3d.cpp | 455 ++++++++++++++++++++++++++++++-------- src/KSPACE/fft3d.h | 168 ++++++++++---- src/KSPACE/fft3d_wrap.cpp | 4 +- src/KSPACE/fft3d_wrap.h | 6 +- 5 files changed, 499 insertions(+), 136 deletions(-) diff --git a/src/KSPACE/Install.sh b/src/KSPACE/Install.sh index 8dba1dd8b8..51f70ad360 100644 --- a/src/KSPACE/Install.sh +++ b/src/KSPACE/Install.sh @@ -17,6 +17,7 @@ if (test $1 = 1) then cp remap_wrap.cpp .. cp ewald.h .. + cp kissfft.h .. cp pppm.h .. cp pppm_tip4p.h .. cp pair_born_coul_long.h .. @@ -47,6 +48,7 @@ elif (test $1 = 0) then rm -f ../remap_wrap.cpp rm -f ../ewald.h + rm -f ../kissfft.h rm -f ../pppm.h rm -f ../pppm_tip4p.h rm -f ../pair_born_coul_long.h diff --git a/src/KSPACE/fft3d.cpp b/src/KSPACE/fft3d.cpp index dd5cb9a9ff..fd44a42ef2 100644 --- a/src/KSPACE/fft3d.cpp +++ b/src/KSPACE/fft3d.cpp @@ -13,6 +13,9 @@ /* ---------------------------------------------------------------------- Contributing authors: Jim Shepherd (GA Tech) added SGI SCSL support + Axel Kohlmeyer (Temple U) added support for + FFTW3, KISSFFT, Dfti/MKL, and ACML. + Phil Blood (PSC) added single precision FFT. ------------------------------------------------------------------------- */ #include "mpi.h" @@ -22,6 +25,11 @@ #include "fft3d.h" #include "remap.h" +#ifdef FFT_KISSFFT +/* include kissfft implementation */ +#include "kissfft.h" +#endif + #define MIN(A,B) ((A) < (B)) ? (A) : (B) #define MAX(A,B) ((A) > (B)) ? (A) : (B) @@ -58,24 +66,28 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) { int i,total,length,offset,num; - double norm; + FFT_SCALAR norm, *out_ptr; FFT_DATA *data,*copy; // system specific constants -#ifdef FFT_SCSL +#if defined(FFT_SCSL) int isys = 0; FFT_PREC scalef = 1.0; -#endif -#ifdef FFT_DEC +#elif defined(FFT_DEC) char c = 'C'; char f = 'F'; char b = 'B'; int one = 1; -#endif -#ifdef FFT_T3E +#elif defined(FFT_T3E) int isys = 0; double scalef = 1.0; +#elif defined(FFT_ACML) + int info; +#elif defined(FFT_FFTW3) + FFTW_API(plan) theplan; +#else + // nothing to do for other FFTs. #endif // pre-remap to prepare for 1st FFTs if needed @@ -84,7 +96,7 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) if (plan->pre_plan) { if (plan->pre_target == 0) copy = out; else copy = plan->copy; - remap_3d((double *) in, (double *) copy, (double *) plan->scratch, + remap_3d((FFT_SCALAR *) in, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch, plan->pre_plan); data = copy; } @@ -96,37 +108,53 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) total = plan->total1; length = plan->length1; -#ifdef FFT_SGI +#if defined(FFT_SGI) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,&data[offset],1,plan->coeff1); -#endif -#ifdef FFT_SCSL +#elif defined(FFT_SCSL) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff1, plan->work1,&isys); -#endif -#ifdef FFT_INTEL +#elif defined(FFT_ACML) + num=total/length; + FFT_1D(&flag,&num,&length,data,plan->coeff1,&info); +#elif defined(FFT_INTEL) for (offset = 0; offset < total; offset += length) FFT_1D(&data[offset],&length,&flag,plan->coeff1); -#endif -#ifdef FFT_DEC +#elif defined(FFT_MKL) + if (flag == -1) + DftiComputeForward(plan->handle_fast,data); + else + DftiComputeBackward(plan->handle_fast,data); +#elif defined(FFT_DEC) if (flag == -1) for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one); else for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one); -#endif -#ifdef FFT_T3E +#elif defined(FFT_T3E) for (offset = 0; offset < total; offset += length) FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff1, plan->work1,&isys); -#endif -#ifdef FFT_FFTW +#elif defined(FFT_FFTW2) if (flag == -1) fftw(plan->plan_fast_forward,total/length,data,1,length,NULL,0,0); else fftw(plan->plan_fast_backward,total/length,data,1,length,NULL,0,0); +#elif defined(FFT_FFTW3) + if (flag == -1) + theplan=plan->plan_fast_forward; + else + theplan=plan->plan_fast_backward; + FFTW_API(execute_dft)(theplan,data,data); +#else + if (flag == -1) + for (offset = 0; offset < total; offset += length) + kiss_fft(plan->cfg_fast_forward,&data[offset],&data[offset]); + else + for (offset = 0; offset < total; offset += length) + kiss_fft(plan->cfg_fast_backward,&data[offset],&data[offset]); #endif // 1st mid-remap to prepare for 2nd FFTs @@ -134,7 +162,7 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) if (plan->mid1_target == 0) copy = out; else copy = plan->copy; - remap_3d((double *) data, (double *) copy, (double *) plan->scratch, + remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch, plan->mid1_plan); data = copy; @@ -143,37 +171,53 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) total = plan->total2; length = plan->length2; -#ifdef FFT_SGI +#if defined(FFT_SGI) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,&data[offset],1,plan->coeff2); -#endif -#ifdef FFT_SCSL +#elif defined(FFT_SCSL) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff2, plan->work2,&isys); -#endif -#ifdef FFT_INTEL +#elif defined(FFT_ACML) + num=total/length; + FFT_1D(&flag,&num,&length,data,plan->coeff2,&info); +#elif defined(FFT_INTEL) for (offset = 0; offset < total; offset += length) FFT_1D(&data[offset],&length,&flag,plan->coeff2); -#endif -#ifdef FFT_DEC +#elif defined(FFT_MKL) + if (flag == -1) + DftiComputeForward(plan->handle_mid,data); + else + DftiComputeBackward(plan->handle_mid,data); +#elif defined(FFT_DEC) if (flag == -1) for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one); else for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one); -#endif -#ifdef FFT_T3E +#elif defined(FFT_T3E) for (offset = 0; offset < total; offset += length) FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff2, plan->work2,&isys); -#endif -#ifdef FFT_FFTW +#elif defined(FFT_FFTW2) if (flag == -1) fftw(plan->plan_mid_forward,total/length,data,1,length,NULL,0,0); else fftw(plan->plan_mid_backward,total/length,data,1,length,NULL,0,0); +#elif defined(FFT_FFTW3) + if (flag == -1) + theplan=plan->plan_mid_forward; + else + theplan=plan->plan_mid_backward; + FFTW_API(execute_dft)(theplan,data,data); +#else + if (flag == -1) + for (offset = 0; offset < total; offset += length) + kiss_fft(plan->cfg_mid_forward,&data[offset],&data[offset]); + else + for (offset = 0; offset < total; offset += length) + kiss_fft(plan->cfg_mid_backward,&data[offset],&data[offset]); #endif // 2nd mid-remap to prepare for 3rd FFTs @@ -181,7 +225,7 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) if (plan->mid2_target == 0) copy = out; else copy = plan->copy; - remap_3d((double *) data, (double *) copy, (double *) plan->scratch, + remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch, plan->mid2_plan); data = copy; @@ -190,55 +234,78 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) total = plan->total3; length = plan->length3; -#ifdef FFT_SGI +#if defined(FFT_SGI) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,&data[offset],1,plan->coeff3); -#endif -#ifdef FFT_SCSL +#elif defined(FFT_SCSL) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff3, plan->work3,&isys); -#endif -#ifdef FFT_INTEL +#elif defined(FFT_ACML) + num=total/length; + FFT_1D(&flag,&num,&length,data,plan->coeff3,&info); +#elif defined(FFT_INTEL) for (offset = 0; offset < total; offset += length) FFT_1D(&data[offset],&length,&flag,plan->coeff3); -#endif -#ifdef FFT_DEC +#elif defined(FFT_MKL) + if (flag == -1) + DftiComputeForward(plan->handle_slow,data); + else + DftiComputeBackward(plan->handle_slow,data); +#elif defined(FFT_DEC) if (flag == -1) for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one); else for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one); -#endif -#ifdef FFT_T3E +#elif defined(FFT_T3E) for (offset = 0; offset < total; offset += length) FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff3, plan->work3,&isys); -#endif -#ifdef FFT_FFTW +#elif defined(FFT_FFTW2) if (flag == -1) fftw(plan->plan_slow_forward,total/length,data,1,length,NULL,0,0); else fftw(plan->plan_slow_backward,total/length,data,1,length,NULL,0,0); +#elif defined(FFT_FFTW3) + if (flag == -1) + theplan=plan->plan_slow_forward; + else + theplan=plan->plan_slow_backward; + FFTW_API(execute_dft)(theplan,data,data); +#else + if (flag == -1) + for (offset = 0; offset < total; offset += length) + kiss_fft(plan->cfg_slow_forward,&data[offset],&data[offset]); + else + for (offset = 0; offset < total; offset += length) + kiss_fft(plan->cfg_slow_backward,&data[offset],&data[offset]); #endif // post-remap to put data in output format if needed // destination is always out if (plan->post_plan) - remap_3d((double *) data, (double *) out, (double *) plan->scratch, + remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) out, (FFT_SCALAR *) plan->scratch, plan->post_plan); // scaling if required - -#ifndef FFT_T3E +#if !defined(FFT_T3E) && !defined(FFT_ACML) if (flag == 1 && plan->scaled) { norm = plan->norm; num = plan->normnum; + out_ptr = (FFT_SCALAR *)out; for (i = 0; i < num; i++) { +#if defined(FFT_FFTW3) + *(out_ptr++) *= norm; + *(out_ptr++) *= norm; +#elif defined(FFT_MKL) + out[i] *= norm; +#else out[i].re *= norm; out[i].im *= norm; +#endif } } #endif @@ -250,6 +317,16 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) for (i = 0; i < num; i++) out[i] *= (norm,norm); } #endif + +#ifdef FFT_ACML + norm = plan->norm; + num = plan->normnum; + for (i = 0; i < num; i++) { + out[i].re *= norm; + out[i].im *= norm; + } +#endif + } /* ---------------------------------------------------------------------- @@ -312,13 +389,6 @@ struct fft_plan_3d *fft_3d_create_plan( MPI_Comm_rank(comm,&me); MPI_Comm_size(comm,&nprocs); -#ifdef FFT_NONE - if (me == 0) { - printf("ERROR: Cannot use FFTs with FFT_NONE set\n"); - return NULL; - } -#endif - // compute division of procs in 2 dimensions not on-processor bifactor(nprocs,&np1,&np2); @@ -529,7 +599,7 @@ struct fft_plan_3d *fft_3d_create_plan( // system specific pre-computation of 1d FFT coeffs // and scaling normalization -#ifdef FFT_SGI +#if defined(FFT_SGI) plan->coeff1 = (FFT_DATA *) malloc((nfast+15)*sizeof(FFT_DATA)); plan->coeff2 = (FFT_DATA *) malloc((nmid+15)*sizeof(FFT_DATA)); @@ -551,9 +621,7 @@ struct fft_plan_3d *fft_3d_create_plan( (out_khi-out_klo+1); } -#endif - -#ifdef FFT_SCSL +#elif defined(FFT_SCSL) plan->coeff1 = (FFT_PREC *) malloc((2*nfast+30)*sizeof(FFT_PREC)); plan->coeff2 = (FFT_PREC *) malloc((2*nmid+30)*sizeof(FFT_PREC)); @@ -586,9 +654,37 @@ struct fft_plan_3d *fft_3d_create_plan( (out_khi-out_klo+1); } -#endif +#elif defined(FFT_ACML) -#ifdef FFT_INTEL + plan->coeff1 = (FFT_DATA *) malloc((3*nfast+100)*sizeof(FFT_DATA)); + plan->coeff2 = (FFT_DATA *) malloc((3*nmid+100)*sizeof(FFT_DATA)); + plan->coeff3 = (FFT_DATA *) malloc((3*nslow+100)*sizeof(FFT_DATA)); + + if (plan->coeff1 == NULL || plan->coeff2 == NULL || + plan->coeff3 == NULL) return NULL; + + int isign = 100; + int isys = 1; + int info = 0; + FFT_DATA *dummy = NULL; + + FFT_1D(&isign,&isys,&nfast,dummy,plan->coeff1,&info); + FFT_1D(&isign,&isys,&nmid,dummy,plan->coeff2,&info); + FFT_1D(&isign,&isys,&nslow,dummy,plan->coeff3,&info); + + if (scaled == 0) { + plan->scaled = 0; + plan->norm = sqrt(nfast*nmid*nslow); + plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * + (out_khi-out_klo+1); + } else { + plan->scaled = 1; + plan->norm = sqrt(nfast*nmid*nslow); + plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * + (out_khi-out_klo+1); + } + +#elif defined(FFT_INTEL) flag = 0; @@ -632,9 +728,38 @@ struct fft_plan_3d *fft_3d_create_plan( else plan->scaled = 0; -#endif +#elif defined(FFT_MKL) + DftiCreateDescriptor( &(plan->handle_fast), FFT_MKL_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nfast); + DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)plan->total1/nfast); + DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE); + DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast); + DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast); + DftiCommitDescriptor(plan->handle_fast); -#ifdef FFT_DEC + DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nmid); + DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)plan->total2/nmid); + DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE); + DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid); + DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid); + DftiCommitDescriptor(plan->handle_mid); + + DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nslow); + DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)plan->total3/nslow); + DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE); + DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow); + DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow); + DftiCommitDescriptor(plan->handle_slow); + + if (scaled == 0) + plan->scaled = 0; + else { + plan->scaled = 1; + plan->norm = 1.0/(nfast*nmid*nslow); + plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * + (out_khi-out_klo+1); + } + +#elif defined(FFT_DEC) if (scaled == 0) { plan->scaled = 1; @@ -645,9 +770,7 @@ struct fft_plan_3d *fft_3d_create_plan( else plan->scaled = 0; -#endif - -#ifdef FFT_T3E +#elif defined(FFT_T3E) plan->coeff1 = (double *) malloc((12*nfast)*sizeof(double)); plan->coeff2 = (double *) malloc((12*nmid)*sizeof(double)); @@ -680,9 +803,7 @@ struct fft_plan_3d *fft_3d_create_plan( (out_khi-out_klo+1); } -#endif - -#ifdef FFT_FFTW +#elif defined(FFT_FFTW2) plan->plan_fast_forward = fftw_create_plan(nfast,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); @@ -724,6 +845,81 @@ struct fft_plan_3d *fft_3d_create_plan( (out_khi-out_klo+1); } +#elif defined(FFT_FFTW3) + plan->plan_fast_forward = + FFTW_API(plan_many_dft)(1, &nfast,plan->total1/plan->length1, + NULL,&nfast,1,plan->length1, + NULL,&nfast,1,plan->length1, + FFTW_FORWARD,FFTW_ESTIMATE); + plan->plan_fast_backward = + FFTW_API(plan_many_dft)(1, &nfast,plan->total1/plan->length1, + NULL,&nfast,1,plan->length1, + NULL,&nfast,1,plan->length1, + FFTW_BACKWARD,FFTW_ESTIMATE); + plan->plan_mid_forward = + FFTW_API(plan_many_dft)(1, &nmid,plan->total2/plan->length2, + NULL,&nmid,1,plan->length2, + NULL,&nmid,1,plan->length2, + FFTW_FORWARD,FFTW_ESTIMATE); + plan->plan_mid_backward = + FFTW_API(plan_many_dft)(1, &nmid,plan->total2/plan->length2, + NULL,&nmid,1,plan->length2, + NULL,&nmid,1,plan->length2, + FFTW_BACKWARD,FFTW_ESTIMATE); + plan->plan_slow_forward = + FFTW_API(plan_many_dft)(1, &nslow,plan->total3/plan->length3, + NULL,&nslow,1,plan->length3, + NULL,&nslow,1,plan->length3, + FFTW_FORWARD,FFTW_ESTIMATE); + plan->plan_slow_backward = + FFTW_API(plan_many_dft)(1, &nslow,plan->total3/plan->length3, + NULL,&nslow,1,plan->length3, + NULL,&nslow,1,plan->length3, + FFTW_BACKWARD,FFTW_ESTIMATE); + + if (scaled == 0) + plan->scaled = 0; + else { + plan->scaled = 1; + plan->norm = 1.0/(nfast*nmid*nslow); + plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * + (out_khi-out_klo+1); + } +#else + plan->cfg_fast_forward = kiss_fft_alloc(nfast,0,NULL,NULL); + plan->cfg_fast_backward = kiss_fft_alloc(nfast,1,NULL,NULL); + + if (nmid == nfast) { + plan->cfg_mid_forward = plan->cfg_fast_forward; + plan->cfg_mid_backward = plan->cfg_fast_backward; + } + else { + plan->cfg_mid_forward = kiss_fft_alloc(nmid,0,NULL,NULL); + plan->cfg_mid_backward = kiss_fft_alloc(nmid,1,NULL,NULL); + } + + if (nslow == nfast) { + plan->cfg_slow_forward = plan->cfg_fast_forward; + plan->cfg_slow_backward = plan->cfg_fast_backward; + } + else if (nslow == nmid) { + plan->cfg_slow_forward = plan->cfg_mid_forward; + plan->cfg_slow_backward = plan->cfg_mid_backward; + } + else { + plan->cfg_slow_forward = kiss_fft_alloc(nslow,0,NULL,NULL); + plan->cfg_slow_backward = kiss_fft_alloc(nslow,1,NULL,NULL); + } + + if (scaled == 0) + plan->scaled = 0; + else { + plan->scaled = 1; + plan->norm = 1.0/(nfast*nmid*nslow); + plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * + (out_khi-out_klo+1); + } + #endif return plan; @@ -743,35 +939,39 @@ void fft_3d_destroy_plan(struct fft_plan_3d *plan) if (plan->copy) free(plan->copy); if (plan->scratch) free(plan->scratch); -#ifdef FFT_SGI +#if defined(FFT_SGI) free(plan->coeff1); free(plan->coeff2); free(plan->coeff3); -#endif -#ifdef FFT_SCSL +#elif defined(FFT_SCSL) free(plan->coeff1); free(plan->coeff2); free(plan->coeff3); free(plan->work1); free(plan->work2); free(plan->work3); -#endif -#ifdef FFT_INTEL +#elif defined(FFT_ACML) free(plan->coeff1); free(plan->coeff2); free(plan->coeff3); -#endif -#ifdef FFT_T3E +#elif defined(FFT_INTEL) + free(plan->coeff1); + free(plan->coeff2); + free(plan->coeff3); +#elif defined(FFT_MKL) + DftiFreeDescriptor(&(plan->handle_fast)); + DftiFreeDescriptor(&(plan->handle_mid)); + DftiFreeDescriptor(&(plan->handle_slow)); +#elif defined(FFT_T3E) free(plan->coeff1); free(plan->coeff2); free(plan->coeff3); free(plan->work1); free(plan->work2); free(plan->work3); -#endif -#ifdef FFT_FFTW - if (plan->plan_slow_forward != plan->plan_mid_forward && - plan->plan_slow_forward != plan->plan_fast_forward) { +#elif defined(FFT_FFTW2) + if (plan->plan_slow_forward != plan->plan_fast_forward && + plan->plan_slow_forward != plan->plan_mid_forward) { fftw_destroy_plan(plan->plan_slow_forward); fftw_destroy_plan(plan->plan_slow_backward); } @@ -781,6 +981,25 @@ void fft_3d_destroy_plan(struct fft_plan_3d *plan) } fftw_destroy_plan(plan->plan_fast_forward); fftw_destroy_plan(plan->plan_fast_backward); +#elif defined(FFT_FFTW3) + FFTW_API(destroy_plan)(plan->plan_slow_forward); + FFTW_API(destroy_plan)(plan->plan_slow_backward); + FFTW_API(destroy_plan)(plan->plan_mid_forward); + FFTW_API(destroy_plan)(plan->plan_mid_backward); + FFTW_API(destroy_plan)(plan->plan_fast_forward); + FFTW_API(destroy_plan)(plan->plan_fast_backward); +#else + if (plan->cfg_slow_forward != plan->cfg_fast_forward && + plan->cfg_slow_forward != plan->cfg_mid_forward) { + free(plan->cfg_slow_forward); + free(plan->cfg_slow_backward); + } + if (plan->cfg_mid_forward != plan->cfg_fast_forward) { + free(plan->cfg_mid_forward); + free(plan->cfg_mid_backward); + } + free(plan->cfg_fast_forward); + free(plan->cfg_fast_backward); #endif free(plan); @@ -866,7 +1085,7 @@ void bifactor(int n, int *factor1, int *factor2) void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) { int i,total,length,offset,num; - double norm; + FFT_SCALAR norm, *data_ptr; // system specific constants @@ -897,6 +1116,12 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) int total3 = plan->total3; int length3 = plan->length3; +// fftw3 and Dfti in MKL encode the number of transforms +// into the plan, so we cannot operate on a smaller data set. +#if defined(FFT_MKL) || defined(FFT_FFTW3) + if ((total1 > nsize) || (total2 > nsize) || (total3 > nsize)) + return; +#endif if (total1 > nsize) total1 = (nsize/length1) * length1; if (total2 > nsize) total2 = (nsize/length2) * length2; if (total3 > nsize) total3 = (nsize/length3) * length3; @@ -911,8 +1136,7 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) FFT_1D(flag,length2,&data[offset],1,plan->coeff2); for (offset = 0; offset < total3; offset += length3) FFT_1D(flag,length3,&data[offset],1,plan->coeff3); -#endif -#ifdef FFT_SCSL +#elif defined(FFT_SCSL) for (offset = 0; offset < total1; offset += length1) FFT_1D(flag,length1,scalef,&data[offset],&data[offset],plan->coeff1, plan->work1,&isys); @@ -922,16 +1146,32 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) for (offset = 0; offset < total3; offset += length3) FFT_1D(flag,length3,scalef,&data[offset],&data[offset],plan->coeff3, plan->work3,&isys); -#endif -#ifdef FFT_INTEL +#elif defined(FFT_ACML) + int info=0; + num=total1/length1; + FFT_1D(&flag,&num,&length1,data,plan->coeff1,&info); + num=total2/length2; + FFT_1D(&flag,&num,&length2,data,plan->coeff2,&info); + num=total3/length3; + FFT_1D(&flag,&num,&length3,data,plan->coeff3,&info); +#elif defined(FFT_INTEL) for (offset = 0; offset < total1; offset += length1) FFT_1D(&data[offset],&length1,&flag,plan->coeff1); for (offset = 0; offset < total2; offset += length2) FFT_1D(&data[offset],&length2,&flag,plan->coeff2); for (offset = 0; offset < total3; offset += length3) FFT_1D(&data[offset],&length3,&flag,plan->coeff3); -#endif -#ifdef FFT_DEC +#elif defined(FFT_MKL) + if (flag == -1) { + DftiComputeForward(plan->handle_fast,data); + DftiComputeForward(plan->handle_mid,data); + DftiComputeForward(plan->handle_slow,data); + } else { + DftiComputeBackward(plan->handle_fast,data); + DftiComputeBackward(plan->handle_mid,data); + DftiComputeBackward(plan->handle_slow,data); + } +#elif defined(FFT_DEC) if (flag == -1) { for (offset = 0; offset < total1; offset += length1) FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length1,&one); @@ -947,8 +1187,7 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) for (offset = 0; offset < total3; offset += length3) FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length3,&one); } -#endif -#ifdef FFT_T3E +#elif defined(FFT_T3E) for (offset = 0; offset < total1; offset += length1) FFT_1D(&flag,&length1,&scalef,&data[offset],&data[offset],plan->coeff1, plan->work1,&isys); @@ -958,8 +1197,7 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) for (offset = 0; offset < total3; offset += length3) FFT_1D(&flag,&length3,&scalef,&data[offset],&data[offset],plan->coeff3, plan->work3,&isys); -#endif -#ifdef FFT_FFTW +#elif defined(FFT_FFTW2) if (flag == -1) { fftw(plan->plan_fast_forward,total1/length1,data,1,0,NULL,0,0); fftw(plan->plan_mid_forward,total2/length2,data,1,0,NULL,0,0); @@ -969,6 +1207,39 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) fftw(plan->plan_mid_backward,total2/length2,data,1,0,NULL,0,0); fftw(plan->plan_slow_backward,total3/length3,data,1,0,NULL,0,0); } +#elif defined(FFT_FFTW3) + FFTW_API(plan) theplan; + if (flag == -1) + theplan=plan->plan_fast_forward; + else + theplan=plan->plan_fast_backward; + FFTW_API(execute_dft)(theplan,data,data); + if (flag == -1) + theplan=plan->plan_mid_forward; + else + theplan=plan->plan_mid_backward; + FFTW_API(execute_dft)(theplan,data,data); + if (flag == -1) + theplan=plan->plan_slow_forward; + else + theplan=plan->plan_slow_backward; + FFTW_API(execute_dft)(theplan,data,data); +#else + if (flag == -1) { + for (offset = 0; offset < total1; offset += length1) + kiss_fft(plan->cfg_fast_forward,&data[offset],&data[offset]); + for (offset = 0; offset < total2; offset += length2) + kiss_fft(plan->cfg_mid_forward,&data[offset],&data[offset]); + for (offset = 0; offset < total3; offset += length3) + kiss_fft(plan->cfg_slow_forward,&data[offset],&data[offset]); + } else { + for (offset = 0; offset < total1; offset += length1) + kiss_fft(plan->cfg_fast_backward,&data[offset],&data[offset]); + for (offset = 0; offset < total2; offset += length2) + kiss_fft(plan->cfg_mid_backward,&data[offset],&data[offset]); + for (offset = 0; offset < total3; offset += length3) + kiss_fft(plan->cfg_slow_backward,&data[offset],&data[offset]); + } #endif // scaling if required @@ -978,9 +1249,17 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) if (flag == 1 && plan->scaled) { norm = plan->norm; num = MIN(plan->normnum,nsize); + data_ptr = (FFT_SCALAR *)data; for (i = 0; i < num; i++) { +#if defined(FFT_FFTW3) + *(data_ptr++) *= norm; + *(data_ptr++) *= norm; +#elif defined(FFT_MKL) + data[i] *= norm; +#else data[i].re *= norm; data[i].im *= norm; +#endif } } #endif diff --git a/src/KSPACE/fft3d.h b/src/KSPACE/fft3d.h index 3c3c9adb93..842c4c43f2 100644 --- a/src/KSPACE/fft3d.h +++ b/src/KSPACE/fft3d.h @@ -1,4 +1,4 @@ -/* ---------------------------------------------------------------------- +/* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -16,7 +16,19 @@ // FFT_PRECISION = 1 is single-precision complex (4-byte real, 4-byte imag) // FFT_PRECISION = 2 is double-precision complex (8-byte real, 8-byte imag) +#ifdef FFT_SINGLE +#define FFT_PRECISION 1 +typedef float FFT_SCALAR; +#else #define FFT_PRECISION 2 +typedef double FFT_SCALAR; +#endif + + +// set default fftw library. switch to FFT_FFTW3 when convenient. +#ifdef FFT_FFTW +#define FFT_FFTW2 +#endif // ------------------------------------------------------------------------- @@ -24,7 +36,7 @@ #if FFT_PRECISION == 1 -#ifdef FFT_SGI +#if defined(FFT_SGI) #include "fft.h" typedef complex FFT_DATA; #define FFT_1D cfft1d @@ -34,9 +46,7 @@ extern "C" { FFT_DATA *cfft1di(int, FFT_DATA *); } -#endif - -#ifdef FFT_SCSL +#elif defined(FFT_SCSL) #include typedef scsl_complex FFT_DATA; typedef float FFT_PREC; @@ -47,9 +57,17 @@ extern "C" { FFT_PREC *, FFT_PREC *, int *); } -#endif +#elif defined(FFT_ACML) +typedef struct { + float re; + float im; +} FFT_DATA; +#define FFT_1D cfft1m_ +extern "C" { + void cfft1m_(int *, int *, int *, FFT_DATA *, FFT_DATA *, int *); +} -#ifdef FFT_INTEL +#elif defined(FFT_INTEL) typedef struct { float re; float im; @@ -59,9 +77,13 @@ typedef struct { extern "C" { void cfft1d_(FFT_DATA *, int *, int *, FFT_DATA *); } -#endif -#ifdef FFT_DEC +#elif defined(FFT_MKL) +#include "mkl_dfti.h" +typedef float _Complex FFT_DATA; +#define FFT_MKL_PREC DFTI_SINGLE + +#elif defined(FFT_DEC) typedef struct { float re; float im; @@ -70,9 +92,8 @@ typedef struct { extern "C" { void cfft_(char *, char *, char *, FFT_DATA *, FFT_DATA *, int *, int *); } -#endif -#ifdef FFT_T3E +#elif defined(FFT_T3E) #include typedef complex single FFT_DATA; #define FFT_1D GGFFT @@ -81,29 +102,43 @@ extern "C" { void GGFFT(int *, int *, double *, FFT_DATA *, FFT_DATA *, double *, double *, int *); } -#endif -#ifdef FFT_FFTW +#elif defined(FFT_FFTW2) +#if defined(FFTW_SIZE) +#include "sfftw.h" +#else #include "fftw.h" +#endif typedef FFTW_COMPLEX FFT_DATA; -#endif -#ifdef FFT_NONE +#elif defined(FFT_FFTW3) +#include "fftw3.h" +typedef fftwf_complex FFT_DATA; +#define FFTW_API(function) fftwf_ ## function + +#else + +/* use a stripped down version of kiss fft as default fft */ +#ifndef FFT_KISSFFT +#define FFT_KISSFFT +#endif +#define kiss_fft_scalar float typedef struct { - float re; - float im; + kiss_fft_scalar re; + kiss_fft_scalar im; } FFT_DATA; -#endif +struct kiss_fft_state; +typedef struct kiss_fft_state* kiss_fft_cfg; #endif // ------------------------------------------------------------------------- // Data types for double-precision complex -#if FFT_PRECISION == 2 +#elif FFT_PRECISION == 2 -#ifdef FFT_SGI +#if defined(FFT_SGI) #include "fft.h" typedef zomplex FFT_DATA; #define FFT_1D zfft1d @@ -112,9 +147,8 @@ extern "C" { int zfft1d(int, int, FFT_DATA *, int, FFT_DATA *); FFT_DATA *zfft1di(int, FFT_DATA *); } -#endif -#ifdef FFT_SCSL +#elif defined(FFT_SCSL) #include typedef scsl_zomplex FFT_DATA; typedef double FFT_PREC; @@ -124,9 +158,18 @@ extern "C" { int zzfft(int, int, FFT_PREC, FFT_DATA *, FFT_DATA *, FFT_PREC *, FFT_PREC *, int *); } -#endif -#ifdef FFT_INTEL +#elif defined(FFT_ACML) +typedef struct { + double re; + double im; +} FFT_DATA; +#define FFT_1D zfft1m_ +extern "C" { + void zfft1m_(int *, int *, int *, FFT_DATA *, FFT_DATA *, int *); +} + +#elif defined(FFT_INTEL) typedef struct { double re; double im; @@ -136,9 +179,13 @@ typedef struct { extern "C" { void zfft1d_(FFT_DATA *, int *, int *, FFT_DATA *); } -#endif -#ifdef FFT_DEC +#elif defined(FFT_MKL) +#include "mkl_dfti.h" +typedef double _Complex FFT_DATA; +#define FFT_MKL_PREC DFTI_DOUBLE + +#elif defined(FFT_DEC) typedef struct { double re; double im; @@ -147,9 +194,8 @@ typedef struct { extern "C" { void zfft_(char *, char *, char *, FFT_DATA *, FFT_DATA *, int *, int *); } -#endif -#ifdef FFT_T3E +#elif defined(FFT_T3E) #include typedef complex double FFT_DATA; #define FFT_1D CCFFT @@ -158,20 +204,38 @@ extern "C" { void CCFFT(int *, int *, double *, FFT_DATA *, FFT_DATA *, double *, double *, int *); } -#endif -#ifdef FFT_FFTW +#elif defined(FFT_FFTW2) +#if defined(FFTW_SIZE) +#include "dfftw.h" +#else #include "fftw.h" +#endif typedef FFTW_COMPLEX FFT_DATA; -#endif -#ifdef FFT_NONE +#elif defined(FFT_FFTW3) +#include "fftw3.h" +typedef fftw_complex FFT_DATA; +#define FFTW_API(function) fftw_ ## function + +#else + +/* use a stripped down version of kiss fft as default fft */ +#ifndef FFT_KISSFFT +#define FFT_KISSFFT +#endif +#define kiss_fft_scalar double typedef struct { - double re; - double im; + kiss_fft_scalar re; + kiss_fft_scalar im; } FFT_DATA; + +struct kiss_fft_state; +typedef struct kiss_fft_state* kiss_fft_cfg; #endif +#else +#error "FFT_PRECISION needs to be either 1 (=single) or 2 (=double)" #endif // ------------------------------------------------------------------------- @@ -194,39 +258,57 @@ struct fft_plan_3d { double norm; // normalization factor for rescaling // system specific 1d FFT info -#ifdef FFT_SGI +#if defined(FFT_SGI) FFT_DATA *coeff1; FFT_DATA *coeff2; FFT_DATA *coeff3; -#endif -#ifdef FFT_SCSL +#elif defined(FFT_SCSL) FFT_PREC *coeff1; FFT_PREC *coeff2; FFT_PREC *coeff3; FFT_PREC *work1; FFT_PREC *work2; FFT_PREC *work3; -#endif -#ifdef FFT_INTEL +#elif defined(FFT_ACML) FFT_DATA *coeff1; FFT_DATA *coeff2; FFT_DATA *coeff3; -#endif -#ifdef FFT_T3E +#elif defined(FFT_INTEL) + FFT_DATA *coeff1; + FFT_DATA *coeff2; + FFT_DATA *coeff3; +#elif defined(FFT_MKL) + DFTI_DESCRIPTOR *handle_fast; + DFTI_DESCRIPTOR *handle_mid; + DFTI_DESCRIPTOR *handle_slow; +#elif defined(FFT_T3E) double *coeff1; double *coeff2; double *coeff3; double *work1; double *work2; double *work3; -#endif -#ifdef FFT_FFTW +#elif defined(FFT_FFTW2) fftw_plan plan_fast_forward; fftw_plan plan_fast_backward; fftw_plan plan_mid_forward; fftw_plan plan_mid_backward; fftw_plan plan_slow_forward; fftw_plan plan_slow_backward; +#elif defined(FFT_FFTW3) + FFTW_API(plan) plan_fast_forward; + FFTW_API(plan) plan_fast_backward; + FFTW_API(plan) plan_mid_forward; + FFTW_API(plan) plan_mid_backward; + FFTW_API(plan) plan_slow_forward; + FFTW_API(plan) plan_slow_backward; +#elif defined(FFT_KISSFFT) + kiss_fft_cfg cfg_fast_forward; + kiss_fft_cfg cfg_fast_backward; + kiss_fft_cfg cfg_mid_forward; + kiss_fft_cfg cfg_mid_backward; + kiss_fft_cfg cfg_slow_forward; + kiss_fft_cfg cfg_slow_backward; #endif }; diff --git a/src/KSPACE/fft3d_wrap.cpp b/src/KSPACE/fft3d_wrap.cpp index 4f15a87a76..fa0d232a10 100644 --- a/src/KSPACE/fft3d_wrap.cpp +++ b/src/KSPACE/fft3d_wrap.cpp @@ -42,14 +42,14 @@ FFT3d::~FFT3d() /* ---------------------------------------------------------------------- */ -void FFT3d::compute(double *in, double *out, int flag) +void FFT3d::compute(FFT_SCALAR *in, FFT_SCALAR *out, int flag) { fft_3d((FFT_DATA *) in,(FFT_DATA *) out,flag,plan); } /* ---------------------------------------------------------------------- */ -void FFT3d::timing1d(double *in, int nsize, int flag) +void FFT3d::timing1d(FFT_SCALAR *in, int nsize, int flag) { fft_1d_only((FFT_DATA *) in,nsize,flag,plan); } diff --git a/src/KSPACE/fft3d_wrap.h b/src/KSPACE/fft3d_wrap.h index 510dd44a92..a7a86bcf6e 100644 --- a/src/KSPACE/fft3d_wrap.h +++ b/src/KSPACE/fft3d_wrap.h @@ -1,4 +1,4 @@ -/* ---------------------------------------------------------------------- +/* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -24,8 +24,8 @@ class FFT3d : protected Pointers { FFT3d(class LAMMPS *, MPI_Comm,int,int,int,int,int,int,int,int,int, int,int,int,int,int,int,int,int,int *); ~FFT3d(); - void compute(double *, double *, int); - void timing1d(double *, int, int); + void compute(FFT_SCALAR *, FFT_SCALAR *, int); + void timing1d(FFT_SCALAR *, int, int); private: struct fft_plan_3d *plan;