lammps/lib/cuda/Makefile.common

#Common commandline argument interpreter for compilation with lammpscuda (USER-CUDA) installed

# make options:
# emu=1        switch to cuda emulation mode (otherwise: use gpu)
# dbg=1        print a lot of debugging output during runtime
# verbose=1    output nvcc command line during compilation
# keep=1       do not delete temporary compilation files (.ii, .cubin, ...)
# cufft=1      use cuda's fast fourier transformation lib "cufft" where possible (otherwise: use cpu fftw)
# binning=1    create virtual particle grid (neighbor-lists otherwise); currently this is not supported
# precision=1  single precision (global setting)
# precision=2  double precision (global setting)

SHELL = /bin/sh

# System-specific settings

CUDA_INSTALL_PATH = /usr/local/cuda
#CUDA_INSTALL_PATH = /home/crtrott/lib/cuda
# e.g. in Gentoo
# CUDA_INSTALL_PATH = /opt/cuda

#//////////////////////////////////////////////////////////////////////////////////////////////
# no need to change anything below this line
#//////////////////////////////////////////////////////////////////////////////////////////////

#use CPU FFT if cufft=0 is requested.
FALLBACK_FFT = 1

#default settings for compiler switches
ifdef COMPILELIB
include Makefile.defaults
else
include ../../lib/cuda/Makefile.defaults
endif

#shell echo "Compiling with precision = " ${precision} ", arch = " ${arch} ", cufft = " ${cufft} ", dbg = " ${dbg} ", prec_timer = " ${prec_timer}

CUDA_FLAGS := -I${CUDA_INSTALL_PATH}/include -DUNIX
CUDA_USRLIB_CONDITIONAL := -L${CUDA_INSTALL_PATH}/lib -L${CUDA_INSTALL_PATH}/lib64

# debug setting
ifeq ($(strip $(dbg)), 1)
	CUDA_FLAGS += -D_DEBUG -g
	NVCC_FLAGS += -g -G
else
	NVCC_FLAGS += --compiler-options -fno-strict-aliasing -O3
endif

# skip timing on Mac and Windows manually
ifeq ($(strip $(prec_timer)), 0)
	CUDA_FLAGS += -DNO_PREC_TIMING
endif

# set fft routine
ifeq ($(strip $(cufft)), 0)
	ifneq ($(FALLBACK_FFT), 1)
	    FFT_INC = -DFFT_NONE
	    FFT_PATH =
	    FFT_LIB =
		CUDA_FLAGS += -DFFT_NONE
	endif
else
	CUDA_FLAGS += -DFFT_CUFFT
	CUDA_USRLIB_CONDITIONAL += -lcufft
endif

# make global precision setting

ifeq ($(strip $(precision)), 1)
	CUDA_FLAGS += -DCUDA_PRECISION=1
else
	ifeq ($(strip $(precision)), 3)
		CUDA_FLAGS += -DCUDA_PRECISION=1 -DX_PRECISION=2
	else
		ifeq ($(strip $(precision)), 4)
			CUDA_FLAGS += -DCUDA_PRECISION=1 -DX_PRECISION=2 -DV_PRECISION=2
		else
			CUDA_FLAGS += -DCUDA_PRECISION=2
		endif
	endif
endif

# make architecture settings
ifeq ($(strip $(arch)), 13)
	CUDA_FLAGS += -DCUDA_ARCH=13
	SMVERSIONFLAGS	:= -arch sm_13
else
  ifeq ($(strip $(arch)), 20)
	 CUDA_FLAGS += -DCUDA_ARCH=20
	 #NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true
	 NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false
	 SMVERSIONFLAGS	:= -arch sm_20
  else
     ifeq ($(strip $(arch)), 21)
	   CUDA_FLAGS += -DCUDA_ARCH=20
	   #NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true
	   NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false
	   SMVERSIONFLAGS	:= -arch sm_21
     else
       ifeq ($(strip $(arch)), 30)
           CUDA_FLAGS += -DCUDA_ARCH=20
           #NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true
           NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false
           SMVERSIONFLAGS       := -arch sm_30
       else
         ifeq ($(strip $(arch)), 35)
           CUDA_FLAGS += -DCUDA_ARCH=20
           #NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true
           NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false
           SMVERSIONFLAGS       := -arch sm_35
         else
           CUDA_FLAGS += -DCUDA_ARCH=99
           SMVERSIONFLAGS	:= -arch sm_13
         endif
       endif
     endif
  endif
endif


CCFLAGS := $(CCFLAGS) $(CUDA_FLAGS) \
		-I$(CUDA_INSTALL_PATH)/include