diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 0f99cc6274..0687b54fcb 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -679,4 +679,9 @@ if(PKG_KSPACE) else() message(STATUS "Using double precision FFTs") endif() + if(FFT_THREADS) + message(STATUS "Using threaded FFTs") + else() + message(STATUS "Using non-threaded FFTs") + endif() endif() diff --git a/cmake/Modules/FindFFTW3.cmake b/cmake/Modules/FindFFTW3.cmake index 552bcc4257..221e4beaad 100644 --- a/cmake/Modules/FindFFTW3.cmake +++ b/cmake/Modules/FindFFTW3.cmake @@ -1,20 +1,23 @@ # - Find fftw3 # Find the native FFTW3 headers and libraries. # -# FFTW3_INCLUDE_DIRS - where to find fftw3.h, etc. -# FFTW3_LIBRARIES - List of libraries when using fftw3. -# FFTW3_FOUND - True if fftw3 found. +# FFTW3_INCLUDE_DIRS - where to find fftw3.h, etc. +# FFTW3_LIBRARIES - List of libraries when using fftw3. +# FFTW3_OMP_LIBRARIES - List of libraries when using fftw3. +# FFTW3_FOUND - True if fftw3 found. +# FFTW3_OMP_FOUND - True if fftw3_omp found. # find_package(PkgConfig) pkg_check_modules(PC_FFTW3 fftw3) find_path(FFTW3_INCLUDE_DIR fftw3.h HINTS ${PC_FFTW3_INCLUDE_DIRS}) - find_library(FFTW3_LIBRARY NAMES fftw3 HINTS ${PC_FFTW3_LIBRARY_DIRS}) +find_library(FFTW3_OMP_LIBRARY NAMES fftw3_omp HINTS ${PC_FFTW3_LIBRARY_DIRS}) -set(FFTW3_LIBRARIES ${FFTW3_LIBRARY}) set(FFTW3_INCLUDE_DIRS ${FFTW3_INCLUDE_DIR}) +set(FFTW3_LIBRARIES ${FFTW3_LIBRARY}) +set(FFTW3_OMP_LIBRARIES ${FFTW3_OMP_LIBRARY}) include(FindPackageHandleStandardArgs) # handle the QUIETLY and REQUIRED arguments and set FFTW3_FOUND to TRUE @@ -22,4 +25,4 @@ include(FindPackageHandleStandardArgs) find_package_handle_standard_args(FFTW3 DEFAULT_MSG FFTW3_LIBRARY FFTW3_INCLUDE_DIR) -mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_LIBRARY ) +mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_LIBRARY FFTW3_OMP_LIBRARY) diff --git a/cmake/Modules/FindFFTW3F.cmake b/cmake/Modules/FindFFTW3F.cmake index 92d1e85e79..c67aa5faf1 100644 --- a/cmake/Modules/FindFFTW3F.cmake +++ b/cmake/Modules/FindFFTW3F.cmake @@ -1,8 +1,8 @@ -# - Find fftw3f -# Find the native FFTW3F headers and libraries. +# Find the native single precision FFTW3 headers and libraries. # # FFTW3F_INCLUDE_DIRS - where to find fftw3f.h, etc. # FFTW3F_LIBRARIES - List of libraries when using fftw3f. +# FFTW3F_OMP_LIBRARIES - List of libraries when using fftw3. # FFTW3F_FOUND - True if fftw3f found. # @@ -10,11 +10,12 @@ find_package(PkgConfig) pkg_check_modules(PC_FFTW3F fftw3f) find_path(FFTW3F_INCLUDE_DIR fftw3.h HINTS ${PC_FFTW3F_INCLUDE_DIRS}) - find_library(FFTW3F_LIBRARY NAMES fftw3f HINTS ${PC_FFTW3F_LIBRARY_DIRS}) +find_library(FFTW3F_OMP_LIBRARY NAMES fftw3f_omp HINTS ${PC_FFTW3F_LIBRARY_DIRS}) -set(FFTW3F_LIBRARIES ${FFTW3F_LIBRARY}) set(FFTW3F_INCLUDE_DIRS ${FFTW3F_INCLUDE_DIR}) +set(FFTW3F_LIBRARIES ${FFTW3F_LIBRARY}) +set(FFTW3F_OMP_LIBRARIES ${FFTW3F_OMP_LIBRARY}) include(FindPackageHandleStandardArgs) # handle the QUIETLY and REQUIRED arguments and set FFTW3F_FOUND to TRUE @@ -22,4 +23,4 @@ include(FindPackageHandleStandardArgs) find_package_handle_standard_args(FFTW3F DEFAULT_MSG FFTW3F_LIBRARY FFTW3F_INCLUDE_DIR) -mark_as_advanced(FFTW3F_INCLUDE_DIR FFTW3F_LIBRARY ) +mark_as_advanced(FFTW3F_INCLUDE_DIR FFTW3F_LIBRARY) diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 2f7d401802..cd4dd21cd7 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -27,7 +27,9 @@ if(PKG_KOKKOS) ${KOKKOS_PKG_SOURCES_DIR}/modify_kokkos.cpp) if(PKG_KSPACE) - list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/gridcomm_kokkos.cpp) + list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fft3d_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/gridcomm_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/remap_kokkos.cpp) endif() set_property(GLOBAL PROPERTY "KOKKOS_PKG_SOURCES" "${KOKKOS_PKG_SOURCES}") diff --git a/cmake/Modules/Packages/KSPACE.cmake b/cmake/Modules/Packages/KSPACE.cmake index 63c42baf2d..8451eef91c 100644 --- a/cmake/Modules/Packages/KSPACE.cmake +++ b/cmake/Modules/Packages/KSPACE.cmake @@ -21,14 +21,36 @@ if(PKG_KSPACE) add_definitions(-DFFT_FFTW3) include_directories(${${FFTW}_INCLUDE_DIRS}) list(APPEND LAMMPS_LINK_LIBS ${${FFTW}_LIBRARIES}) + if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY) + option(FFT_THREADS "Use threaded FFT library" ON) + else() + option(FFT_THREADS "Use threaded FFT library" OFF) + endif() + + if(FFT_THREADS) + if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY) + add_definitions(-DFFT_FFTW_THREADS) + list(APPEND LAMMPS_LINK_LIBS ${${FFTW}_OMP_LIBRARIES}) + else() + message(FATAL_ERROR "Need OpenMP enabled FFTW3 library for FFT_THREADS") + endif() + endif() elseif(FFT STREQUAL "MKL") find_package(MKL REQUIRED) + option(FFT_THREADS "Use threaded FFT library" OFF) add_definitions(-DFFT_MKL) include_directories(${MKL_INCLUDE_DIRS}) list(APPEND LAMMPS_LINK_LIBS ${MKL_LIBRARIES}) + if (FFT_THREADS) + message(FATAL_ERROR "FFT_THREADS not supported with FFT from MKL") + endif() else() # last option is KISSFFT + option(FFT_THREADS "Use threaded FFT library" OFF) add_definitions(-DFFT_KISS) + if (FFT_THREADS) + message(FATAL_ERROR "FFT_THREADS not supported with KISSFFT") + endif() endif() set(FFT_PACK "array" CACHE STRING "Optimization for FFT") diff --git a/doc/src/Build_settings.txt b/doc/src/Build_settings.txt index 578903774d..48a67a189b 100644 --- a/doc/src/Build_settings.txt +++ b/doc/src/Build_settings.txt @@ -33,8 +33,9 @@ LAMMPS can use them if they are available on your system. [CMake variables]: --D FFT=value # FFTW3 or MKL or KISS or CUFFT, default is FFTW3 if found, else KISS +-D FFT=value # FFTW3 or MKL or KISS, default is FFTW3 if found, else KISS -D FFT_SINGLE=value # yes or no (default), no = double precision +-D FFT_THREADS=value # yes or no, default is yes if FFT=FFTW3 and OpenMP FFT lib found, else no -D FFT_PACK=value # array (default) or pointer or memcpy :pre NOTE: The values for the FFT variable must be in upper-case. This is @@ -51,9 +52,10 @@ the FFT library, you can set these variables: [Makefile.machine settings]: -FFT_INC = -DFFT_FFTW3 # -DFFT_FFTW3, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, -DFFT_FFTW_THREADS, -DFFT_CUFFT, or -DFFT_KISS +FFT_INC = -DFFT_FFTW3 # -DFFT_FFTW3, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, or -DFFT_KISS # default is KISS if not specified FFT_INC = -DFFT_SINGLE # do not specify for double precision +FFT_INC = -DFFT_FFTW_THREADS # use threaded FFTW3. requires -DFFT_FFTW3 (or -DFFT_FFTW) FFT_INC = -DFFT_PACK_ARRAY # or -DFFT_PACK_POINTER or -DFFT_PACK_MEMCPY :pre # default is FFT_PACK_ARRAY if not specified @@ -61,7 +63,7 @@ FFT_INC = -I/usr/local/include FFT_PATH = -L/usr/local/lib FFT_LIB = -lfftw3 # FFTW3 double precision FFT_LIB = -lfftw3 -lfftw3f # FFTW3 single precision -FFT_LIB = -lfftw3 -lfftw3_omp # FFTW3 double precision, OpenMP threaded using the KOKKOS package +FFT_LIB = -lfftw3 -lfftw3_omp # FFTW3 double precision, OpenMP threaded FFT_LIB = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core # MKL with Intel compiler FFT_LIB = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core # MKL with GNU compiler FFT_LIB = -lcufft # cuFFT (for GPUs using the KOKKOS package) :pre @@ -119,6 +121,9 @@ library libfftw3f.a make clean ./configure --enable-single; make; make install :pre +When using -DFFT_FFTW_THREADS you need to also have the FFTW3 +OpenMP thread interface libraries available. + Performing 3d FFTs requires communication to transpose the 3d FFT grid. The data packing/unpacking for this can be done in one of 3 modes (ARRAY, POINTER, MEMCPY) as set by the FFT_PACK syntax above. diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index c68d78db56..8d12a3f952 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -26,7 +26,9 @@ #endif #ifdef FFT_FFTW_THREADS -#define FFT_FFTW3 +#if !defined(FFT_FFTW3) +#error "Must use -DFFT_FFTW3 with -DFFT_FFTW_THREADS" +#endif #endif #if defined(FFT_MKL) diff --git a/src/KSPACE/fft3d.cpp b/src/KSPACE/fft3d.cpp index 7d3c8c83f2..a0abd50c01 100644 --- a/src/KSPACE/fft3d.cpp +++ b/src/KSPACE/fft3d.cpp @@ -25,6 +25,9 @@ #include #include "fft3d.h" #include "remap.h" +#if defined(_OPENMP) +#include +#endif #ifdef FFT_KISS /* include kissfft implementation */ @@ -266,7 +269,7 @@ struct fft_plan_3d *fft_3d_create_plan( int scaled, int permute, int *nbuf, int usecollective) { struct fft_plan_3d *plan; - int me,nprocs; + int me,nprocs,nthreads; int flag,remapflag; int first_ilo,first_ihi,first_jlo,first_jhi,first_klo,first_khi; int second_ilo,second_ihi,second_jlo,second_jhi,second_klo,second_khi; @@ -279,6 +282,14 @@ struct fft_plan_3d *fft_3d_create_plan( MPI_Comm_rank(comm,&me); MPI_Comm_size(comm,&nprocs); +#if defined(_OPENMP) + // query OpenMP info. + // should have been initialized systemwide in Comm class constructor + nthreads = omp_get_max_threads(); +#else + nthreads = 1; +#endif + // compute division of procs in 2 dimensions not on-processor bifactor(nprocs,&np1,&np2); @@ -571,6 +582,13 @@ struct fft_plan_3d *fft_3d_create_plan( */ #elif defined(FFT_FFTW3) +#if defined(FFT_FFTW_THREADS) + if (nthreads > 1) { + FFTW_API(init_threads)(); + FFTW_API(plan_with_nthreads)(nthreads); + } +#endif + plan->plan_fast_forward = FFTW_API(plan_many_dft)(1, &nfast,plan->total1/plan->length1, NULL,&nfast,1,plan->length1, @@ -689,6 +707,9 @@ void fft_3d_destroy_plan(struct fft_plan_3d *plan) FFTW_API(destroy_plan)(plan->plan_mid_backward); FFTW_API(destroy_plan)(plan->plan_fast_forward); FFTW_API(destroy_plan)(plan->plan_fast_backward); +#if defined(FFT_FFTW_THREADS) + FFTW_API(cleanup_threads)(); +#endif #else if (plan->cfg_slow_forward != plan->cfg_fast_forward && plan->cfg_slow_forward != plan->cfg_mid_forward) {