forked from lijiext/lammps
make support for threaded FFTW3 consistent and incorporate it into CMake
This now also includes threaded FFTW3 for regular FFTs, not only KOKKOS.
This commit is contained in:
parent
8b134cb0c9
commit
6a65deeb65
|
@ -679,4 +679,9 @@ if(PKG_KSPACE)
|
|||
else()
|
||||
message(STATUS "Using double precision FFTs")
|
||||
endif()
|
||||
if(FFT_THREADS)
|
||||
message(STATUS "Using threaded FFTs")
|
||||
else()
|
||||
message(STATUS "Using non-threaded FFTs")
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
@ -1,20 +1,23 @@
|
|||
# - Find fftw3
|
||||
# Find the native FFTW3 headers and libraries.
|
||||
#
|
||||
# FFTW3_INCLUDE_DIRS - where to find fftw3.h, etc.
|
||||
# FFTW3_LIBRARIES - List of libraries when using fftw3.
|
||||
# FFTW3_FOUND - True if fftw3 found.
|
||||
# FFTW3_INCLUDE_DIRS - where to find fftw3.h, etc.
|
||||
# FFTW3_LIBRARIES - List of libraries when using fftw3.
|
||||
# FFTW3_OMP_LIBRARIES - List of libraries when using fftw3.
|
||||
# FFTW3_FOUND - True if fftw3 found.
|
||||
# FFTW3_OMP_FOUND - True if fftw3_omp found.
|
||||
#
|
||||
|
||||
find_package(PkgConfig)
|
||||
|
||||
pkg_check_modules(PC_FFTW3 fftw3)
|
||||
find_path(FFTW3_INCLUDE_DIR fftw3.h HINTS ${PC_FFTW3_INCLUDE_DIRS})
|
||||
|
||||
find_library(FFTW3_LIBRARY NAMES fftw3 HINTS ${PC_FFTW3_LIBRARY_DIRS})
|
||||
find_library(FFTW3_OMP_LIBRARY NAMES fftw3_omp HINTS ${PC_FFTW3_LIBRARY_DIRS})
|
||||
|
||||
set(FFTW3_LIBRARIES ${FFTW3_LIBRARY})
|
||||
set(FFTW3_INCLUDE_DIRS ${FFTW3_INCLUDE_DIR})
|
||||
set(FFTW3_LIBRARIES ${FFTW3_LIBRARY})
|
||||
set(FFTW3_OMP_LIBRARIES ${FFTW3_OMP_LIBRARY})
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
# handle the QUIETLY and REQUIRED arguments and set FFTW3_FOUND to TRUE
|
||||
|
@ -22,4 +25,4 @@ include(FindPackageHandleStandardArgs)
|
|||
|
||||
find_package_handle_standard_args(FFTW3 DEFAULT_MSG FFTW3_LIBRARY FFTW3_INCLUDE_DIR)
|
||||
|
||||
mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_LIBRARY )
|
||||
mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_LIBRARY FFTW3_OMP_LIBRARY)
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# - Find fftw3f
|
||||
# Find the native FFTW3F headers and libraries.
|
||||
# Find the native single precision FFTW3 headers and libraries.
|
||||
#
|
||||
# FFTW3F_INCLUDE_DIRS - where to find fftw3f.h, etc.
|
||||
# FFTW3F_LIBRARIES - List of libraries when using fftw3f.
|
||||
# FFTW3F_OMP_LIBRARIES - List of libraries when using fftw3.
|
||||
# FFTW3F_FOUND - True if fftw3f found.
|
||||
#
|
||||
|
||||
|
@ -10,11 +10,12 @@ find_package(PkgConfig)
|
|||
|
||||
pkg_check_modules(PC_FFTW3F fftw3f)
|
||||
find_path(FFTW3F_INCLUDE_DIR fftw3.h HINTS ${PC_FFTW3F_INCLUDE_DIRS})
|
||||
|
||||
find_library(FFTW3F_LIBRARY NAMES fftw3f HINTS ${PC_FFTW3F_LIBRARY_DIRS})
|
||||
find_library(FFTW3F_OMP_LIBRARY NAMES fftw3f_omp HINTS ${PC_FFTW3F_LIBRARY_DIRS})
|
||||
|
||||
set(FFTW3F_LIBRARIES ${FFTW3F_LIBRARY})
|
||||
set(FFTW3F_INCLUDE_DIRS ${FFTW3F_INCLUDE_DIR})
|
||||
set(FFTW3F_LIBRARIES ${FFTW3F_LIBRARY})
|
||||
set(FFTW3F_OMP_LIBRARIES ${FFTW3F_OMP_LIBRARY})
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
# handle the QUIETLY and REQUIRED arguments and set FFTW3F_FOUND to TRUE
|
||||
|
@ -22,4 +23,4 @@ include(FindPackageHandleStandardArgs)
|
|||
|
||||
find_package_handle_standard_args(FFTW3F DEFAULT_MSG FFTW3F_LIBRARY FFTW3F_INCLUDE_DIR)
|
||||
|
||||
mark_as_advanced(FFTW3F_INCLUDE_DIR FFTW3F_LIBRARY )
|
||||
mark_as_advanced(FFTW3F_INCLUDE_DIR FFTW3F_LIBRARY)
|
||||
|
|
|
@ -27,7 +27,9 @@ if(PKG_KOKKOS)
|
|||
${KOKKOS_PKG_SOURCES_DIR}/modify_kokkos.cpp)
|
||||
|
||||
if(PKG_KSPACE)
|
||||
list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/gridcomm_kokkos.cpp)
|
||||
list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fft3d_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/gridcomm_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/remap_kokkos.cpp)
|
||||
endif()
|
||||
|
||||
set_property(GLOBAL PROPERTY "KOKKOS_PKG_SOURCES" "${KOKKOS_PKG_SOURCES}")
|
||||
|
|
|
@ -21,14 +21,36 @@ if(PKG_KSPACE)
|
|||
add_definitions(-DFFT_FFTW3)
|
||||
include_directories(${${FFTW}_INCLUDE_DIRS})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${${FFTW}_LIBRARIES})
|
||||
if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY)
|
||||
option(FFT_THREADS "Use threaded FFT library" ON)
|
||||
else()
|
||||
option(FFT_THREADS "Use threaded FFT library" OFF)
|
||||
endif()
|
||||
|
||||
if(FFT_THREADS)
|
||||
if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY)
|
||||
add_definitions(-DFFT_FFTW_THREADS)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${${FFTW}_OMP_LIBRARIES})
|
||||
else()
|
||||
message(FATAL_ERROR "Need OpenMP enabled FFTW3 library for FFT_THREADS")
|
||||
endif()
|
||||
endif()
|
||||
elseif(FFT STREQUAL "MKL")
|
||||
find_package(MKL REQUIRED)
|
||||
option(FFT_THREADS "Use threaded FFT library" OFF)
|
||||
add_definitions(-DFFT_MKL)
|
||||
include_directories(${MKL_INCLUDE_DIRS})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${MKL_LIBRARIES})
|
||||
if (FFT_THREADS)
|
||||
message(FATAL_ERROR "FFT_THREADS not supported with FFT from MKL")
|
||||
endif()
|
||||
else()
|
||||
# last option is KISSFFT
|
||||
option(FFT_THREADS "Use threaded FFT library" OFF)
|
||||
add_definitions(-DFFT_KISS)
|
||||
if (FFT_THREADS)
|
||||
message(FATAL_ERROR "FFT_THREADS not supported with KISSFFT")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(FFT_PACK "array" CACHE STRING "Optimization for FFT")
|
||||
|
|
|
@ -33,8 +33,9 @@ LAMMPS can use them if they are available on your system.
|
|||
|
||||
[CMake variables]:
|
||||
|
||||
-D FFT=value # FFTW3 or MKL or KISS or CUFFT, default is FFTW3 if found, else KISS
|
||||
-D FFT=value # FFTW3 or MKL or KISS, default is FFTW3 if found, else KISS
|
||||
-D FFT_SINGLE=value # yes or no (default), no = double precision
|
||||
-D FFT_THREADS=value # yes or no, default is yes if FFT=FFTW3 and OpenMP FFT lib found, else no
|
||||
-D FFT_PACK=value # array (default) or pointer or memcpy :pre
|
||||
|
||||
NOTE: The values for the FFT variable must be in upper-case. This is
|
||||
|
@ -51,9 +52,10 @@ the FFT library, you can set these variables:
|
|||
|
||||
[Makefile.machine settings]:
|
||||
|
||||
FFT_INC = -DFFT_FFTW3 # -DFFT_FFTW3, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, -DFFT_FFTW_THREADS, -DFFT_CUFFT, or -DFFT_KISS
|
||||
FFT_INC = -DFFT_FFTW3 # -DFFT_FFTW3, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, or -DFFT_KISS
|
||||
# default is KISS if not specified
|
||||
FFT_INC = -DFFT_SINGLE # do not specify for double precision
|
||||
FFT_INC = -DFFT_FFTW_THREADS # use threaded FFTW3. requires -DFFT_FFTW3 (or -DFFT_FFTW)
|
||||
FFT_INC = -DFFT_PACK_ARRAY # or -DFFT_PACK_POINTER or -DFFT_PACK_MEMCPY :pre
|
||||
# default is FFT_PACK_ARRAY if not specified
|
||||
|
||||
|
@ -61,7 +63,7 @@ FFT_INC = -I/usr/local/include
|
|||
FFT_PATH = -L/usr/local/lib
|
||||
FFT_LIB = -lfftw3 # FFTW3 double precision
|
||||
FFT_LIB = -lfftw3 -lfftw3f # FFTW3 single precision
|
||||
FFT_LIB = -lfftw3 -lfftw3_omp # FFTW3 double precision, OpenMP threaded using the KOKKOS package
|
||||
FFT_LIB = -lfftw3 -lfftw3_omp # FFTW3 double precision, OpenMP threaded
|
||||
FFT_LIB = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core # MKL with Intel compiler
|
||||
FFT_LIB = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core # MKL with GNU compiler
|
||||
FFT_LIB = -lcufft # cuFFT (for GPUs using the KOKKOS package) :pre
|
||||
|
@ -119,6 +121,9 @@ library libfftw3f.a
|
|||
make clean
|
||||
./configure --enable-single; make; make install :pre
|
||||
|
||||
When using -DFFT_FFTW_THREADS you need to also have the FFTW3
|
||||
OpenMP thread interface libraries available.
|
||||
|
||||
Performing 3d FFTs requires communication to transpose the 3d FFT
|
||||
grid. The data packing/unpacking for this can be done in one of 3
|
||||
modes (ARRAY, POINTER, MEMCPY) as set by the FFT_PACK syntax above.
|
||||
|
|
|
@ -26,7 +26,9 @@
|
|||
#endif
|
||||
|
||||
#ifdef FFT_FFTW_THREADS
|
||||
#define FFT_FFTW3
|
||||
#if !defined(FFT_FFTW3)
|
||||
#error "Must use -DFFT_FFTW3 with -DFFT_FFTW_THREADS"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(FFT_MKL)
|
||||
|
|
|
@ -25,6 +25,9 @@
|
|||
#include <cmath>
|
||||
#include "fft3d.h"
|
||||
#include "remap.h"
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#ifdef FFT_KISS
|
||||
/* include kissfft implementation */
|
||||
|
@ -266,7 +269,7 @@ struct fft_plan_3d *fft_3d_create_plan(
|
|||
int scaled, int permute, int *nbuf, int usecollective)
|
||||
{
|
||||
struct fft_plan_3d *plan;
|
||||
int me,nprocs;
|
||||
int me,nprocs,nthreads;
|
||||
int flag,remapflag;
|
||||
int first_ilo,first_ihi,first_jlo,first_jhi,first_klo,first_khi;
|
||||
int second_ilo,second_ihi,second_jlo,second_jhi,second_klo,second_khi;
|
||||
|
@ -279,6 +282,14 @@ struct fft_plan_3d *fft_3d_create_plan(
|
|||
MPI_Comm_rank(comm,&me);
|
||||
MPI_Comm_size(comm,&nprocs);
|
||||
|
||||
#if defined(_OPENMP)
|
||||
// query OpenMP info.
|
||||
// should have been initialized systemwide in Comm class constructor
|
||||
nthreads = omp_get_max_threads();
|
||||
#else
|
||||
nthreads = 1;
|
||||
#endif
|
||||
|
||||
// compute division of procs in 2 dimensions not on-processor
|
||||
|
||||
bifactor(nprocs,&np1,&np2);
|
||||
|
@ -571,6 +582,13 @@ struct fft_plan_3d *fft_3d_create_plan(
|
|||
*/
|
||||
|
||||
#elif defined(FFT_FFTW3)
|
||||
#if defined(FFT_FFTW_THREADS)
|
||||
if (nthreads > 1) {
|
||||
FFTW_API(init_threads)();
|
||||
FFTW_API(plan_with_nthreads)(nthreads);
|
||||
}
|
||||
#endif
|
||||
|
||||
plan->plan_fast_forward =
|
||||
FFTW_API(plan_many_dft)(1, &nfast,plan->total1/plan->length1,
|
||||
NULL,&nfast,1,plan->length1,
|
||||
|
@ -689,6 +707,9 @@ void fft_3d_destroy_plan(struct fft_plan_3d *plan)
|
|||
FFTW_API(destroy_plan)(plan->plan_mid_backward);
|
||||
FFTW_API(destroy_plan)(plan->plan_fast_forward);
|
||||
FFTW_API(destroy_plan)(plan->plan_fast_backward);
|
||||
#if defined(FFT_FFTW_THREADS)
|
||||
FFTW_API(cleanup_threads)();
|
||||
#endif
|
||||
#else
|
||||
if (plan->cfg_slow_forward != plan->cfg_fast_forward &&
|
||||
plan->cfg_slow_forward != plan->cfg_mid_forward) {
|
||||
|
|
Loading…
Reference in New Issue