forked from lijiext/lammps
124 lines
3.7 KiB
Makefile
124 lines
3.7 KiB
Makefile
#Common commandline argument interpreter for compilation with lammpscuda (USER-CUDA) installed
|
|
|
|
# make options:
|
|
# emu=1 switch to cuda emulation mode (otherwise: use gpu)
|
|
# dbg=1 print a lot of debugging output during runtime
|
|
# verbose=1 output nvcc command line during compilation
|
|
# keep=1 do not delete temporary compilation files (.ii, .cubin, ...)
|
|
# cufft=1 use cuda's fast fourier transformation lib "cufft" where possible (otherwise: use cpu fftw)
|
|
# binning=1 create virtual particle grid (neighbor-lists otherwise); currently this is not supported
|
|
# precision=1 single precision (global setting)
|
|
# precision=2 double precision (global setting)
|
|
|
|
SHELL = /bin/sh
|
|
|
|
# System-specific settings
|
|
|
|
CUDA_INSTALL_PATH = /usr/local/cuda
|
|
#CUDA_INSTALL_PATH = /home/crtrott/lib/cuda
|
|
# e.g. in Gentoo
|
|
# CUDA_INSTALL_PATH = /opt/cuda
|
|
|
|
#//////////////////////////////////////////////////////////////////////////////////////////////
|
|
# no need to change anything below this line
|
|
#//////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#use CPU FFT if cufft=0 is requested.
|
|
FALLBACK_FFT = 1
|
|
|
|
#default settings for compiler switches
|
|
ifdef COMPILELIB
|
|
include Makefile.defaults
|
|
else
|
|
include ../../lib/cuda/Makefile.defaults
|
|
endif
|
|
|
|
#shell echo "Compiling with precision = " ${precision} ", arch = " ${arch} ", cufft = " ${cufft} ", dbg = " ${dbg} ", prec_timer = " ${prec_timer}
|
|
|
|
CUDA_FLAGS := -I${CUDA_INSTALL_PATH}/include -DUNIX
|
|
CUDA_USRLIB_CONDITIONAL := -L${CUDA_INSTALL_PATH}/lib -L${CUDA_INSTALL_PATH}/lib64
|
|
|
|
# debug setting
|
|
ifeq ($(strip $(dbg)), 1)
|
|
CUDA_FLAGS += -D_DEBUG -g
|
|
NVCC_FLAGS += -g -G
|
|
else
|
|
NVCC_FLAGS += --compiler-options -fno-strict-aliasing -O3
|
|
endif
|
|
|
|
# skip timing on Mac and Windows manually
|
|
ifeq ($(strip $(prec_timer)), 0)
|
|
CUDA_FLAGS += -DNO_PREC_TIMING
|
|
endif
|
|
|
|
# set fft routine
|
|
ifeq ($(strip $(cufft)), 0)
|
|
ifneq ($(FALLBACK_FFT), 1)
|
|
FFT_INC = -DFFT_NONE
|
|
FFT_PATH =
|
|
FFT_LIB =
|
|
CUDA_FLAGS += -DFFT_NONE
|
|
endif
|
|
else
|
|
CUDA_FLAGS += -DFFT_CUFFT
|
|
CUDA_USRLIB_CONDITIONAL += -lcufft
|
|
endif
|
|
|
|
# make global precision setting
|
|
|
|
ifeq ($(strip $(precision)), 1)
|
|
CUDA_FLAGS += -DCUDA_PRECISION=1
|
|
else
|
|
ifeq ($(strip $(precision)), 3)
|
|
CUDA_FLAGS += -DCUDA_PRECISION=1 -DX_PRECISION=2
|
|
else
|
|
ifeq ($(strip $(precision)), 4)
|
|
CUDA_FLAGS += -DCUDA_PRECISION=1 -DX_PRECISION=2 -DV_PRECISION=2
|
|
else
|
|
CUDA_FLAGS += -DCUDA_PRECISION=2
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
# make architecture settings
|
|
ifeq ($(strip $(arch)), 13)
|
|
CUDA_FLAGS += -DCUDA_ARCH=13
|
|
SMVERSIONFLAGS := -arch sm_13
|
|
else
|
|
ifeq ($(strip $(arch)), 20)
|
|
CUDA_FLAGS += -DCUDA_ARCH=20
|
|
#NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true
|
|
NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false
|
|
SMVERSIONFLAGS := -arch sm_20
|
|
else
|
|
ifeq ($(strip $(arch)), 21)
|
|
CUDA_FLAGS += -DCUDA_ARCH=20
|
|
#NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true
|
|
NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false
|
|
SMVERSIONFLAGS := -arch sm_21
|
|
else
|
|
ifeq ($(strip $(arch)), 30)
|
|
CUDA_FLAGS += -DCUDA_ARCH=20
|
|
#NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true
|
|
NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false
|
|
SMVERSIONFLAGS := -arch sm_30
|
|
else
|
|
ifeq ($(strip $(arch)), 35)
|
|
CUDA_FLAGS += -DCUDA_ARCH=20
|
|
#NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true
|
|
NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false
|
|
SMVERSIONFLAGS := -arch sm_35
|
|
else
|
|
CUDA_FLAGS += -DCUDA_ARCH=99
|
|
SMVERSIONFLAGS := -arch sm_13
|
|
endif
|
|
endif
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
|
|
|
|
CCFLAGS := $(CCFLAGS) $(CUDA_FLAGS) \
|
|
-I$(CUDA_INSTALL_PATH)/include
|