make support for threaded FFTW3 consistent and incorporate it into CMake

This now also includes threaded FFTW3 for regular FFTs, not only KOKKOS.
This commit is contained in:
Axel Kohlmeyer 2019-07-19 16:46:37 -04:00
parent 8b134cb0c9
commit 6a65deeb65
No known key found for this signature in database
GPG Key ID: D9B44E93BF0C375A
8 changed files with 78 additions and 17 deletions

View File

@ -679,4 +679,9 @@ if(PKG_KSPACE)
else()
message(STATUS "Using double precision FFTs")
endif()
if(FFT_THREADS)
message(STATUS "Using threaded FFTs")
else()
message(STATUS "Using non-threaded FFTs")
endif()
endif()

View File

@ -1,20 +1,23 @@
# - Find fftw3
# Find the native FFTW3 headers and libraries.
#
# FFTW3_INCLUDE_DIRS - where to find fftw3.h, etc.
# FFTW3_LIBRARIES - List of libraries when using fftw3.
# FFTW3_FOUND - True if fftw3 found.
# FFTW3_INCLUDE_DIRS - where to find fftw3.h, etc.
# FFTW3_LIBRARIES - List of libraries when using fftw3.
# FFTW3_OMP_LIBRARIES - List of libraries when using fftw3.
# FFTW3_FOUND - True if fftw3 found.
# FFTW3_OMP_FOUND - True if fftw3_omp found.
#
find_package(PkgConfig)
pkg_check_modules(PC_FFTW3 fftw3)
find_path(FFTW3_INCLUDE_DIR fftw3.h HINTS ${PC_FFTW3_INCLUDE_DIRS})
find_library(FFTW3_LIBRARY NAMES fftw3 HINTS ${PC_FFTW3_LIBRARY_DIRS})
find_library(FFTW3_OMP_LIBRARY NAMES fftw3_omp HINTS ${PC_FFTW3_LIBRARY_DIRS})
set(FFTW3_LIBRARIES ${FFTW3_LIBRARY})
set(FFTW3_INCLUDE_DIRS ${FFTW3_INCLUDE_DIR})
set(FFTW3_LIBRARIES ${FFTW3_LIBRARY})
set(FFTW3_OMP_LIBRARIES ${FFTW3_OMP_LIBRARY})
include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set FFTW3_FOUND to TRUE
@ -22,4 +25,4 @@ include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(FFTW3 DEFAULT_MSG FFTW3_LIBRARY FFTW3_INCLUDE_DIR)
mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_LIBRARY )
mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_LIBRARY FFTW3_OMP_LIBRARY)

View File

@ -1,8 +1,8 @@
# - Find fftw3f
# Find the native FFTW3F headers and libraries.
# Find the native single precision FFTW3 headers and libraries.
#
# FFTW3F_INCLUDE_DIRS - where to find fftw3f.h, etc.
# FFTW3F_LIBRARIES - List of libraries when using fftw3f.
# FFTW3F_OMP_LIBRARIES - List of libraries when using fftw3.
# FFTW3F_FOUND - True if fftw3f found.
#
@ -10,11 +10,12 @@ find_package(PkgConfig)
pkg_check_modules(PC_FFTW3F fftw3f)
find_path(FFTW3F_INCLUDE_DIR fftw3.h HINTS ${PC_FFTW3F_INCLUDE_DIRS})
find_library(FFTW3F_LIBRARY NAMES fftw3f HINTS ${PC_FFTW3F_LIBRARY_DIRS})
find_library(FFTW3F_OMP_LIBRARY NAMES fftw3f_omp HINTS ${PC_FFTW3F_LIBRARY_DIRS})
set(FFTW3F_LIBRARIES ${FFTW3F_LIBRARY})
set(FFTW3F_INCLUDE_DIRS ${FFTW3F_INCLUDE_DIR})
set(FFTW3F_LIBRARIES ${FFTW3F_LIBRARY})
set(FFTW3F_OMP_LIBRARIES ${FFTW3F_OMP_LIBRARY})
include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set FFTW3F_FOUND to TRUE
@ -22,4 +23,4 @@ include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(FFTW3F DEFAULT_MSG FFTW3F_LIBRARY FFTW3F_INCLUDE_DIR)
mark_as_advanced(FFTW3F_INCLUDE_DIR FFTW3F_LIBRARY )
mark_as_advanced(FFTW3F_INCLUDE_DIR FFTW3F_LIBRARY)

View File

@ -27,7 +27,9 @@ if(PKG_KOKKOS)
${KOKKOS_PKG_SOURCES_DIR}/modify_kokkos.cpp)
if(PKG_KSPACE)
list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/gridcomm_kokkos.cpp)
list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fft3d_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/gridcomm_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/remap_kokkos.cpp)
endif()
set_property(GLOBAL PROPERTY "KOKKOS_PKG_SOURCES" "${KOKKOS_PKG_SOURCES}")

View File

@ -21,14 +21,36 @@ if(PKG_KSPACE)
add_definitions(-DFFT_FFTW3)
include_directories(${${FFTW}_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS ${${FFTW}_LIBRARIES})
if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY)
option(FFT_THREADS "Use threaded FFT library" ON)
else()
option(FFT_THREADS "Use threaded FFT library" OFF)
endif()
if(FFT_THREADS)
if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY)
add_definitions(-DFFT_FFTW_THREADS)
list(APPEND LAMMPS_LINK_LIBS ${${FFTW}_OMP_LIBRARIES})
else()
message(FATAL_ERROR "Need OpenMP enabled FFTW3 library for FFT_THREADS")
endif()
endif()
elseif(FFT STREQUAL "MKL")
find_package(MKL REQUIRED)
option(FFT_THREADS "Use threaded FFT library" OFF)
add_definitions(-DFFT_MKL)
include_directories(${MKL_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS ${MKL_LIBRARIES})
if (FFT_THREADS)
message(FATAL_ERROR "FFT_THREADS not supported with FFT from MKL")
endif()
else()
# last option is KISSFFT
option(FFT_THREADS "Use threaded FFT library" OFF)
add_definitions(-DFFT_KISS)
if (FFT_THREADS)
message(FATAL_ERROR "FFT_THREADS not supported with KISSFFT")
endif()
endif()
set(FFT_PACK "array" CACHE STRING "Optimization for FFT")

View File

@ -33,8 +33,9 @@ LAMMPS can use them if they are available on your system.
[CMake variables]:
-D FFT=value # FFTW3 or MKL or KISS or CUFFT, default is FFTW3 if found, else KISS
-D FFT=value # FFTW3 or MKL or KISS, default is FFTW3 if found, else KISS
-D FFT_SINGLE=value # yes or no (default), no = double precision
-D FFT_THREADS=value # yes or no, default is yes if FFT=FFTW3 and OpenMP FFT lib found, else no
-D FFT_PACK=value # array (default) or pointer or memcpy :pre
NOTE: The values for the FFT variable must be in upper-case. This is
@ -51,9 +52,10 @@ the FFT library, you can set these variables:
[Makefile.machine settings]:
FFT_INC = -DFFT_FFTW3 # -DFFT_FFTW3, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, -DFFT_FFTW_THREADS, -DFFT_CUFFT, or -DFFT_KISS
FFT_INC = -DFFT_FFTW3 # -DFFT_FFTW3, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, or -DFFT_KISS
# default is KISS if not specified
FFT_INC = -DFFT_SINGLE # do not specify for double precision
FFT_INC = -DFFT_FFTW_THREADS # use threaded FFTW3. requires -DFFT_FFTW3 (or -DFFT_FFTW)
FFT_INC = -DFFT_PACK_ARRAY # or -DFFT_PACK_POINTER or -DFFT_PACK_MEMCPY :pre
# default is FFT_PACK_ARRAY if not specified
@ -61,7 +63,7 @@ FFT_INC = -I/usr/local/include
FFT_PATH = -L/usr/local/lib
FFT_LIB = -lfftw3 # FFTW3 double precision
FFT_LIB = -lfftw3 -lfftw3f # FFTW3 single precision
FFT_LIB = -lfftw3 -lfftw3_omp # FFTW3 double precision, OpenMP threaded using the KOKKOS package
FFT_LIB = -lfftw3 -lfftw3_omp # FFTW3 double precision, OpenMP threaded
FFT_LIB = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core # MKL with Intel compiler
FFT_LIB = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core # MKL with GNU compiler
FFT_LIB = -lcufft # cuFFT (for GPUs using the KOKKOS package) :pre
@ -119,6 +121,9 @@ library libfftw3f.a
make clean
./configure --enable-single; make; make install :pre
When using -DFFT_FFTW_THREADS you need to also have the FFTW3
OpenMP thread interface libraries available.
Performing 3d FFTs requires communication to transpose the 3d FFT
grid. The data packing/unpacking for this can be done in one of 3
modes (ARRAY, POINTER, MEMCPY) as set by the FFT_PACK syntax above.

View File

@ -26,7 +26,9 @@
#endif
#ifdef FFT_FFTW_THREADS
#define FFT_FFTW3
#if !defined(FFT_FFTW3)
#error "Must use -DFFT_FFTW3 with -DFFT_FFTW_THREADS"
#endif
#endif
#if defined(FFT_MKL)

View File

@ -25,6 +25,9 @@
#include <cmath>
#include "fft3d.h"
#include "remap.h"
#if defined(_OPENMP)
#include <omp.h>
#endif
#ifdef FFT_KISS
/* include kissfft implementation */
@ -266,7 +269,7 @@ struct fft_plan_3d *fft_3d_create_plan(
int scaled, int permute, int *nbuf, int usecollective)
{
struct fft_plan_3d *plan;
int me,nprocs;
int me,nprocs,nthreads;
int flag,remapflag;
int first_ilo,first_ihi,first_jlo,first_jhi,first_klo,first_khi;
int second_ilo,second_ihi,second_jlo,second_jhi,second_klo,second_khi;
@ -279,6 +282,14 @@ struct fft_plan_3d *fft_3d_create_plan(
MPI_Comm_rank(comm,&me);
MPI_Comm_size(comm,&nprocs);
#if defined(_OPENMP)
// query OpenMP info.
// should have been initialized systemwide in Comm class constructor
nthreads = omp_get_max_threads();
#else
nthreads = 1;
#endif
// compute division of procs in 2 dimensions not on-processor
bifactor(nprocs,&np1,&np2);
@ -571,6 +582,13 @@ struct fft_plan_3d *fft_3d_create_plan(
*/
#elif defined(FFT_FFTW3)
#if defined(FFT_FFTW_THREADS)
if (nthreads > 1) {
FFTW_API(init_threads)();
FFTW_API(plan_with_nthreads)(nthreads);
}
#endif
plan->plan_fast_forward =
FFTW_API(plan_many_dft)(1, &nfast,plan->total1/plan->length1,
NULL,&nfast,1,plan->length1,
@ -689,6 +707,9 @@ void fft_3d_destroy_plan(struct fft_plan_3d *plan)
FFTW_API(destroy_plan)(plan->plan_mid_backward);
FFTW_API(destroy_plan)(plan->plan_fast_forward);
FFTW_API(destroy_plan)(plan->plan_fast_backward);
#if defined(FFT_FFTW_THREADS)
FFTW_API(cleanup_threads)();
#endif
#else
if (plan->cfg_slow_forward != plan->cfg_fast_forward &&
plan->cfg_slow_forward != plan->cfg_mid_forward) {