lammps/lib/cuda/Makefile.common

124 lines
3.7 KiB
Makefile

#Common commandline argument interpreter for compilation with lammpscuda (USER-CUDA) installed
# make options:
# emu=1 switch to cuda emulation mode (otherwise: use gpu)
# dbg=1 print a lot of debugging output during runtime
# verbose=1 output nvcc command line during compilation
# keep=1 do not delete temporary compilation files (.ii, .cubin, ...)
# cufft=1 use cuda's fast fourier transformation lib "cufft" where possible (otherwise: use cpu fftw)
# binning=1 create virtual particle grid (neighbor-lists otherwise); currently this is not supported
# precision=1 single precision (global setting)
# precision=2 double precision (global setting)
SHELL = /bin/sh
# System-specific settings
CUDA_INSTALL_PATH = /usr/local/cuda
#CUDA_INSTALL_PATH = /home/crtrott/lib/cuda
# e.g. in Gentoo
# CUDA_INSTALL_PATH = /opt/cuda
#//////////////////////////////////////////////////////////////////////////////////////////////
# no need to change anything below this line
#//////////////////////////////////////////////////////////////////////////////////////////////
#use CPU FFT if cufft=0 is requested.
FALLBACK_FFT = 1
#default settings for compiler switches
ifdef COMPILELIB
include Makefile.defaults
else
include ../../lib/cuda/Makefile.defaults
endif
#shell echo "Compiling with precision = " ${precision} ", arch = " ${arch} ", cufft = " ${cufft} ", dbg = " ${dbg} ", prec_timer = " ${prec_timer}
CUDA_FLAGS := -I${CUDA_INSTALL_PATH}/include -DUNIX
CUDA_USRLIB_CONDITIONAL := -L${CUDA_INSTALL_PATH}/lib -L${CUDA_INSTALL_PATH}/lib64
# debug setting
ifeq ($(strip $(dbg)), 1)
CUDA_FLAGS += -D_DEBUG -g
NVCC_FLAGS += -g -G
else
NVCC_FLAGS += --compiler-options -fno-strict-aliasing -O3
endif
# skip timing on Mac and Windows manually
ifeq ($(strip $(prec_timer)), 0)
CUDA_FLAGS += -DNO_PREC_TIMING
endif
# set fft routine
ifeq ($(strip $(cufft)), 0)
ifneq ($(FALLBACK_FFT), 1)
FFT_INC = -DFFT_NONE
FFT_PATH =
FFT_LIB =
CUDA_FLAGS += -DFFT_NONE
endif
else
CUDA_FLAGS += -DFFT_CUFFT
CUDA_USRLIB_CONDITIONAL += -lcufft
endif
# make global precision setting
ifeq ($(strip $(precision)), 1)
CUDA_FLAGS += -DCUDA_PRECISION=1
else
ifeq ($(strip $(precision)), 3)
CUDA_FLAGS += -DCUDA_PRECISION=1 -DX_PRECISION=2
else
ifeq ($(strip $(precision)), 4)
CUDA_FLAGS += -DCUDA_PRECISION=1 -DX_PRECISION=2 -DV_PRECISION=2
else
CUDA_FLAGS += -DCUDA_PRECISION=2
endif
endif
endif
# make architecture settings
ifeq ($(strip $(arch)), 13)
CUDA_FLAGS += -DCUDA_ARCH=13
SMVERSIONFLAGS := -arch sm_13
else
ifeq ($(strip $(arch)), 20)
CUDA_FLAGS += -DCUDA_ARCH=20
#NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true
NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false
SMVERSIONFLAGS := -arch sm_20
else
ifeq ($(strip $(arch)), 21)
CUDA_FLAGS += -DCUDA_ARCH=20
#NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true
NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false
SMVERSIONFLAGS := -arch sm_21
else
ifeq ($(strip $(arch)), 30)
CUDA_FLAGS += -DCUDA_ARCH=20
#NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true
NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false
SMVERSIONFLAGS := -arch sm_30
else
ifeq ($(strip $(arch)), 35)
CUDA_FLAGS += -DCUDA_ARCH=20
#NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true
NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false
SMVERSIONFLAGS := -arch sm_35
else
CUDA_FLAGS += -DCUDA_ARCH=99
SMVERSIONFLAGS := -arch sm_13
endif
endif
endif
endif
endif
CCFLAGS := $(CCFLAGS) $(CUDA_FLAGS) \
-I$(CUDA_INSTALL_PATH)/include