forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6573 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
ba76216025
commit
c80cc31a22
|
@ -14,7 +14,7 @@ SHELL = /bin/sh
|
|||
|
||||
# System-specific settings
|
||||
|
||||
CUDA_INSTALL_PATH = /usr/local/cuda
|
||||
CUDA_INSTALL_PATH = /usr/local/cuda-3.2
|
||||
# e.g. in Gentoo
|
||||
# CUDA_INSTALL_PATH = /opt/cuda
|
||||
|
||||
|
@ -27,11 +27,11 @@ CUDA_INSTALL_PATH = /usr/local/cuda
|
|||
FALLBACK_FFT = 1
|
||||
|
||||
#default settings for compiler switches
|
||||
#ifdef COMPILELIB
|
||||
#include Makefile.defaults
|
||||
#else
|
||||
ifdef COMPILELIB
|
||||
include Makefile.defaults
|
||||
else
|
||||
include ../../lib/cuda/Makefile.defaults
|
||||
#endif
|
||||
endif
|
||||
|
||||
#shell echo "Compiling with precision = " ${precision} ", arch = " ${arch} ", cufft = " ${cufft} ", dbg = " ${dbg} ", prec_timer = " ${prec_timer}
|
||||
|
||||
|
@ -39,7 +39,7 @@ CUDA_FLAGS := -DUNIX
|
|||
CUDA_USRLIB_CONDITIONAL :=
|
||||
|
||||
# debug setting
|
||||
ifeq ($(dbg), 1)
|
||||
ifeq ($(strip $(dbg)), 1)
|
||||
CUDA_FLAGS += -D_DEBUG -g
|
||||
NVCC_FLAGS += -g -G
|
||||
else
|
||||
|
@ -47,12 +47,12 @@ else
|
|||
endif
|
||||
|
||||
# skip timing on Mac and Windows manually
|
||||
ifeq ($(prec_timer), 0)
|
||||
ifeq ($(strip $(prec_timer)), 0)
|
||||
CUDA_FLAGS += -DNO_PREC_TIMING
|
||||
endif
|
||||
|
||||
# set fft routine
|
||||
ifeq ($(cufft), 0)
|
||||
ifeq ($(strip $(cufft)), 0)
|
||||
ifneq ($(FALLBACK_FFT), 1)
|
||||
FFT_INC = -DFFT_NONE
|
||||
FFT_PATH =
|
||||
|
@ -65,13 +65,14 @@ else
|
|||
endif
|
||||
|
||||
# make global precision setting
|
||||
ifeq ($(precision), 1)
|
||||
|
||||
ifeq ($(strip $(precision)), 1)
|
||||
CUDA_FLAGS += -DCUDA_PRECISION=1
|
||||
else
|
||||
ifeq ($(precision), 3)
|
||||
ifeq ($(strip $(precision)), 3)
|
||||
CUDA_FLAGS += -DCUDA_PRECISION=1 -DX_PRECISION=2
|
||||
else
|
||||
ifeq ($(precision), 4)
|
||||
ifeq ($(strip $(precision)), 4)
|
||||
CUDA_FLAGS += -DCUDA_PRECISION=1 -DX_PRECISION=2 -DV_PRECISION=2
|
||||
else
|
||||
CUDA_FLAGS += -DCUDA_PRECISION=2
|
||||
|
@ -80,17 +81,17 @@ else
|
|||
endif
|
||||
|
||||
# make architecture settings
|
||||
ifeq ($(arch), 13)
|
||||
ifeq ($(strip $(arch)), 13)
|
||||
CUDA_FLAGS += -DCUDA_ARCH=13
|
||||
SMVERSIONFLAGS := -arch sm_13
|
||||
else
|
||||
ifeq ($(arch), 20)
|
||||
ifeq ($(strip $(arch)), 20)
|
||||
CUDA_FLAGS += -DCUDA_ARCH=20
|
||||
#NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true
|
||||
NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false
|
||||
SMVERSIONFLAGS := -arch sm_20
|
||||
else
|
||||
ifeq ($(arch), 21)
|
||||
ifeq ($(strip $(arch)), 21)
|
||||
CUDA_FLAGS += -DCUDA_ARCH=20
|
||||
#NVCC_FLAGS += -ftz=false -prec-div=true -prec-sqrt=true
|
||||
NVCC_FLAGS += -ftz=true -prec-div=false -prec-sqrt=false
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
|
||||
#precision setting: 1 single, 2 double, 4 mixed
|
||||
precision ?= 2
|
||||
precision ?= 1
|
||||
|
||||
#GPU architecture (compute capability): 13, 20, 21
|
||||
arch ?= 20
|
||||
|
||||
#Using cufft (should not be changed)
|
||||
cufft ?= 1
|
||||
cufft ?= 1
|
||||
|
||||
#Using dbg mode
|
||||
dbg ?= 0
|
||||
dbg ?= 0
|
||||
|
||||
#On mac machines set this to 0 in order to avoid usage of linux specific precision timer
|
||||
prec_timer ?= 1
|
||||
|
|
|
@ -78,8 +78,11 @@ void CudaWrapper_Init(int argc, char** argv,int me,int ppn,int* devicelist)
|
|||
}
|
||||
|
||||
for(int i=0;i<deviceCount;i++)
|
||||
{
|
||||
if((deviceProp[dev_list[i]].computeMode==0)) sharedmode=true;
|
||||
|
||||
cudaSetDevice(i);
|
||||
cudaSetDeviceFlags(cudaDeviceMapHost);
|
||||
}
|
||||
if(sharedmode)
|
||||
{
|
||||
if(ppn&&(me%ppn+1)>deviceCount) {printf("Asking for more GPUs per node when there are. Reduce gpu/node setting.\n"); exit(0);}
|
||||
|
@ -97,7 +100,6 @@ void CudaWrapper_Init(int argc, char** argv,int me,int ppn,int* devicelist)
|
|||
{
|
||||
CUDA_SAFE_CALL( cudaSetValidDevices(dev_list,deviceCount) );
|
||||
}
|
||||
cudaSetDeviceFlags(cudaDeviceMapHost);
|
||||
cudaThreadSynchronize();
|
||||
|
||||
int dev;
|
||||
|
|
Loading…
Reference in New Issue