git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7275 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2011-12-02 15:51:43 +00:00
parent a55728fa74
commit 37f098062b
17 changed files with 547 additions and 589 deletions

View File

@ -1,26 +1,8 @@
# /* ----------------------------------------------------------------------
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
# http://lammps.sandia.gov, Sandia National Laboratories
# Steve Plimpton, sjplimp@sandia.gov
#
# Copyright (2003) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
# certain rights in this software. This software is distributed under
# the GNU General Public License.
#
# See the README file in the top-level LAMMPS directory.
# ------------------------------------------------------------------------- */
#
# /* ----------------------------------------------------------------------
# Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
# Peng Wang (Nvidia), penwang@nvidia.com
# Paul Crozier (SNL), pscrozi@sandia.gov
# ------------------------------------------------------------------------- */
#CUDA_HOME = $(HOME)/cuda-4.0
CUDA_HOME = $(HOME)/cuda
NVCC = $(CUDA_HOME)/bin/nvcc
CUDA_ARCH = -arch=sm_13
CUDA_ARCH = -arch=sm_20
CUDA_PRECISION = -D_SINGLE_DOUBLE
CUDA_INCLUDE = -I$(CUDA_HOME)/include
CUDA_LIB = -L$(CUDA_HOME)/lib64 -Xlinker -rpath -Xlinker $(CUDA_HOME)/lib64
@ -29,9 +11,9 @@ CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math
CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -I$(CUDA_HOME)/include
CUDR_OPTS = -O3 -ffast-math -funroll-loops -DMPI_GERYON
BIN_DIR = ./
OBJ_DIR = ./
LIB_DIR = ./
BIN_DIR = .
OBJ_DIR = obj
LIB_DIR = .
AR = ar
BSH = /bin/sh

View File

@ -1,22 +1,3 @@
# /* ----------------------------------------------------------------------
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
# http://lammps.sandia.gov, Sandia National Laboratories
# Steve Plimpton, sjplimp@sandia.gov
#
# Copyright (2003) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
# certain rights in this software. This software is distributed under
# the GNU General Public License.
#
# See the README file in the top-level LAMMPS directory.
# ------------------------------------------------------------------------- */
#
# /* ----------------------------------------------------------------------
# Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
# Peng Wang (Nvidia), penwang@nvidia.com
# Paul Crozier (SNL), pscrozi@sandia.gov
# ------------------------------------------------------------------------- */
CUDA_HOME = /sw/analysis-x64/cuda/3.2/centos5.5_binary/
NVCC = nvcc
@ -26,7 +7,7 @@ CUDA_INCLUDE = -I$(CUDA_HOME)/include
CUDA_LIB = -L$(CUDA_HOME)/lib64
CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math
CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -openmp
CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT # -openmp
CUDR_OPTS = -O2 -xSSE2 -ip -use-intel-optimized-headers -fno-alias
BIN_DIR = ./

View File

@ -1,17 +1,4 @@
# /* ----------------------------------------------------------------------
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
# http://lammps.sandia.gov, Sandia National Laboratories
# Steve Plimpton, sjplimp@sandia.gov
#
# Copyright (2003) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
# certain rights in this software. This software is distributed under
# the GNU General Public License.
#
# See the README file in the top-level LAMMPS directory.
# ------------------------------------------------------------------------- */
#
# /* ----------------------------------------------------------------------
# Makefile for NCSA's lincoln GPU cluster. Tested with "soft +cuda-2.3"
# ------------------------------------------------------------------------- */

View File

@ -1,20 +1,6 @@
# /* ----------------------------------------------------------------------
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
# http://lammps.sandia.gov, Sandia National Laboratories
# Steve Plimpton, sjplimp@sandia.gov
#
# Copyright (2003) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
# certain rights in this software. This software is distributed under
# the GNU General Public License.
#
# See the README file in the top-level LAMMPS directory.
# ------------------------------------------------------------------------- */
#
# /* ----------------------------------------------------------------------
# Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
# Peng Wang (Nvidia), penwang@nvidia.com
# Paul Crozier (SNL), pscrozi@sandia.gov
# Generic Linux Makefile for CUDA
# - Change CUDA_ARCH for your GPU
# ------------------------------------------------------------------------- */
CUDA_HOME = /usr/local/cuda

View File

@ -1,20 +1,5 @@
# /* ----------------------------------------------------------------------
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
# http://lammps.sandia.gov, Sandia National Laboratories
# Steve Plimpton, sjplimp@sandia.gov
#
# Copyright (2003) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
# certain rights in this software. This software is distributed under
# the GNU General Public License.
#
# See the README file in the top-level LAMMPS directory.
# ------------------------------------------------------------------------- */
#
# /* ----------------------------------------------------------------------
# Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
# Peng Wang (Nvidia), penwang@nvidia.com
# Paul Crozier (SNL), pscrozi@sandia.gov
# Generic Linux Makefile for OpenCL
# ------------------------------------------------------------------------- */
OCL_CPP = mpic++ -O3 -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK

View File

@ -1,18 +1,6 @@
# /* ----------------------------------------------------------------------
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
# http://lammps.sandia.gov, Sandia National Laboratories
# Steve Plimpton, sjplimp@sandia.gov
#
# Copyright (2003) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
# certain rights in this software. This software is distributed under
# the GNU General Public License.
#
# See the README file in the top-level LAMMPS directory.
# ------------------------------------------------------------------------- */
#
# /* ----------------------------------------------------------------------
# Makefile for the TACC longhorn cluster. Use "module load cuda".
# Makefile for the TACC longhorn cluster.
# - Use "module load cuda".
# ------------------------------------------------------------------------- */
CUDA_HOME = $(TACC_CUDA_DIR)

View File

@ -1,33 +1,20 @@
# /* ----------------------------------------------------------------------
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
# http://lammps.sandia.gov, Sandia National Laboratories
# Steve Plimpton, sjplimp@sandia.gov
#
# Copyright (2003) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
# certain rights in this software. This software is distributed under
# the GNU General Public License.
#
# See the README file in the top-level LAMMPS directory.
# ------------------------------------------------------------------------- */
#
# /* ----------------------------------------------------------------------
# Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
# Peng Wang (Nvidia), penwang@nvidia.com
# Paul Crozier (SNL), pscrozi@sandia.gov
# Generic Mac Makefile for CUDA
# - 32-bit (requires adding -m32 to LAMMPS Makefile)
# - Change CUDA_ARCH for your GPU
# ------------------------------------------------------------------------- */
CUDA_HOME = /usr/local/cuda
NVCC = nvcc
NVCC = nvcc -m64
CUDA_ARCH = -arch=sm_11
CUDA_PRECISION = -D_SINGLE_SINGLE
CUDA_INCLUDE = -I$(CUDA_HOME)/include
CUDA_LIB = -L$(CUDA_HOME)/lib
CUDA_OPTS = -DUNIX -DUCL_NO_EXIT -O3 -Xptxas -v --use_fast_math -m32
CUDA_OPTS = -DUNIX -DUCL_NO_EXIT -O3 -Xptxas -v --use_fast_math
CUDR_CPP = mpic++
CUDR_OPTS = -O2 -m32 -g
CUDR_CPP = mpic++ -m64
CUDR_OPTS = -O2 -g
BIN_DIR = ./
OBJ_DIR = ./

View File

@ -1,20 +1,5 @@
# /* ----------------------------------------------------------------------
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
# http://lammps.sandia.gov, Sandia National Laboratories
# Steve Plimpton, sjplimp@sandia.gov
#
# Copyright (2003) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
# certain rights in this software. This software is distributed under
# the GNU General Public License.
#
# See the README file in the top-level LAMMPS directory.
# ------------------------------------------------------------------------- */
#
# /* ----------------------------------------------------------------------
# Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
# Peng Wang (Nvidia), penwang@nvidia.com
# Paul Crozier (SNL), pscrozi@sandia.gov
# Generic Mac Makefile for OpenCL
# ------------------------------------------------------------------------- */
OCL_CPP = mpic++ -I./geryon/opencl_1_0 -O3 -DMPI_GERYON -DUCL_NO_EXIT

View File

@ -1,23 +1,3 @@
# /* ----------------------------------------------------------------------
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
# http://lammps.sandia.gov, Sandia National Laboratories
# Steve Plimpton, sjplimp@sandia.gov
#
# Copyright (2003) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
# certain rights in this software. This software is distributed under
# the GNU General Public License.
#
# See the README file in the top-level LAMMPS directory.
# ------------------------------------------------------------------------- */
#
# /* ----------------------------------------------------------------------
# Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
# Peng Wang (Nvidia), penwang@nvidia.com
# Inderaj Bains (NVIDIA), ibains@nvidia.com
# Paul Crozier (SNL), pscrozi@sandia.gov
# ------------------------------------------------------------------------- */
CUDA = $(NVCC) $(CUDA_INCLUDE) $(CUDA_OPTS) -Icudpp_mini $(CUDA_ARCH) \
$(CUDA_PRECISION)
CUDR = $(CUDR_CPP) $(CUDR_OPTS) $(CUDA_PRECISION) $(CUDA_INCLUDE) \
@ -29,11 +9,11 @@ GPU_LIB = $(LIB_DIR)/libgpu.a
# Headers for Geryon
UCL_H = $(wildcard ./geryon/ucl*.h)
NVC_H = $(wildcard ./geryon/nvc*.h) $(UCL_H)
NVD_H = $(wildcard ./geryon/nvd*.h) $(UCL_H) nv_kernel_def.h
NVD_H = $(wildcard ./geryon/nvd*.h) $(UCL_H) lal_preprocessor.h
# Headers for Pair Stuff
PAIR_H = pair_gpu_atom.h pair_gpu_ans.h pair_gpu_nbor_shared.h \
pair_gpu_nbor.h pair_gpu_precision.h pair_gpu_device.h \
pair_gpu_balance.h pppm_gpu_memory.h
PAIR_H = lal_atom.h lal_answer.h lal_neighbor_shared.h \
lal_neighbor.h lal_precision.h lal_device.h \
lal_balance.h lal_pppm.h
ALL_H = $(NVD_H) $(PAIR_H)
@ -41,48 +21,47 @@ EXECS = $(BIN_DIR)/nvc_get_devices
CUDPP = $(OBJ_DIR)/cudpp.o $(OBJ_DIR)/cudpp_plan.o \
$(OBJ_DIR)/cudpp_maximal_launch.o $(OBJ_DIR)/cudpp_plan_manager.o \
$(OBJ_DIR)/radixsort_app.cu_o $(OBJ_DIR)/scan_app.cu_o
OBJS = $(OBJ_DIR)/pair_gpu_atom.o $(OBJ_DIR)/pair_gpu_ans.o \
$(OBJ_DIR)/pair_gpu_nbor.o $(OBJ_DIR)/pair_gpu_nbor_shared.o \
$(OBJ_DIR)/pair_gpu_device.o $(OBJ_DIR)/atomic_gpu_memory.o \
$(OBJ_DIR)/charge_gpu_memory.o $(OBJ_DIR)/base_ellipsoid.o \
$(OBJ_DIR)/pppm_gpu_memory.o $(OBJ_DIR)/pppm_l_gpu.o \
$(OBJ_DIR)/gayberne.o $(OBJ_DIR)/gayberne_ext.o \
$(OBJ_DIR)/re_squared.o $(OBJ_DIR)/re_squared_ext.o \
$(OBJ_DIR)/lj_cut_gpu_memory.o $(OBJ_DIR)/lj_cut_gpu.o \
$(OBJ_DIR)/lj96_cut_gpu_memory.o $(OBJ_DIR)/lj96_cut_gpu.o \
$(OBJ_DIR)/lj_expand_gpu_memory.o $(OBJ_DIR)/lj_expand_gpu.o \
$(OBJ_DIR)/ljc_cut_gpu_memory.o $(OBJ_DIR)/ljc_cut_gpu.o \
$(OBJ_DIR)/ljcl_cut_gpu_memory.o $(OBJ_DIR)/ljcl_cut_gpu.o \
$(OBJ_DIR)/lj_class2_long.o $(OBJ_DIR)/lj_class2_long_ext.o \
$(OBJ_DIR)/coul_long_gpu_memory.o $(OBJ_DIR)/coul_long_gpu.o \
$(OBJ_DIR)/morse_gpu_memory.o $(OBJ_DIR)/morse_gpu.o \
$(OBJ_DIR)/crml_gpu_memory.o $(OBJ_DIR)/crml_gpu.o \
$(OBJ_DIR)/cmm_cut_gpu_memory.o $(OBJ_DIR)/cmm_cut_gpu.o \
$(OBJ_DIR)/cmmc_long_gpu_memory.o $(OBJ_DIR)/cmmc_long_gpu.o \
$(OBJ_DIR)/cmmc_msm_gpu_memory.o $(OBJ_DIR)/cmmc_msm_gpu.o \
OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
$(OBJ_DIR)/lal_neighbor.o $(OBJ_DIR)/lal_neighbor_shared.o \
$(OBJ_DIR)/lal_device.o $(OBJ_DIR)/lal_base_atomic.o \
$(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_base_ellipsoid.o \
$(OBJ_DIR)/lal_pppm.o $(OBJ_DIR)/lal_pppm_ext.o \
$(OBJ_DIR)/lal_gayberne.o $(OBJ_DIR)/lal_gayberne_ext.o \
$(OBJ_DIR)/lal_re_squared.o $(OBJ_DIR)/lal_re_squared_ext.o \
$(OBJ_DIR)/lal_lj.o $(OBJ_DIR)/lal_lj_ext.o \
$(OBJ_DIR)/lal_lj96.o $(OBJ_DIR)/lal_lj96_ext.o \
$(OBJ_DIR)/lal_lj_expand.o $(OBJ_DIR)/lal_lj_expand_ext.o \
$(OBJ_DIR)/lal_lj_coul.o $(OBJ_DIR)/lal_lj_coul_ext.o \
$(OBJ_DIR)/lal_lj_coul_long.o $(OBJ_DIR)/lal_lj_coul_long_ext.o \
$(OBJ_DIR)/lal_lj_class2_long.o $(OBJ_DIR)/lal_lj_class2_long_ext.o \
$(OBJ_DIR)/lal_coul_long.o $(OBJ_DIR)/lal_coul_long_ext.o \
$(OBJ_DIR)/lal_morse.o $(OBJ_DIR)/lal_morse_ext.o \
$(OBJ_DIR)/lal_charmm_long.o $(OBJ_DIR)/lal_charmm_long_ext.o \
$(OBJ_DIR)/lal_cg_cmm.o $(OBJ_DIR)/lal_cg_cmm_ext.o \
$(OBJ_DIR)/lal_cg_cmm_long.o $(OBJ_DIR)/lal_cg_cmm_long_ext.o \
$(CUDPP)
PTXS = $(OBJ_DIR)/pair_gpu_dev_kernel.ptx $(OBJ_DIR)/pair_gpu_dev_ptx.h \
$(OBJ_DIR)/pair_gpu_atom_kernel.ptx $(OBJ_DIR)/pair_gpu_atom_ptx.h \
$(OBJ_DIR)/pair_gpu_nbor_kernel.ptx $(OBJ_DIR)/pair_gpu_nbor_ptx.h \
$(OBJ_DIR)/pair_gpu_build_kernel.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h \
$(OBJ_DIR)/pppm_f_gpu_kernel.ptx $(OBJ_DIR)/pppm_f_gpu_ptx.h \
$(OBJ_DIR)/pppm_d_gpu_kernel.ptx $(OBJ_DIR)/pppm_d_gpu_ptx.h \
PTXS = $(OBJ_DIR)/device.ptx $(OBJ_DIR)/device_ptx.h \
$(OBJ_DIR)/atom.ptx $(OBJ_DIR)/atom_ptx.h \
$(OBJ_DIR)/neighbor_cpu.ptx $(OBJ_DIR)/neighbor_cpu_ptx.h \
$(OBJ_DIR)/neighbor_gpu.ptx $(OBJ_DIR)/neighbor_gpu_ptx.h \
$(OBJ_DIR)/pppm_f.ptx $(OBJ_DIR)/pppm_f_ptx.h \
$(OBJ_DIR)/pppm_d.ptx $(OBJ_DIR)/pppm_d_ptx.h \
$(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/ellipsoid_nbor_ptx.h \
$(OBJ_DIR)/gayberne.ptx $(OBJ_DIR)/gayberne_lj.ptx \
$(OBJ_DIR)/gayberne_ptx.h $(OBJ_DIR)/re_squared.ptx \
$(OBJ_DIR)/re_squared_lj.ptx $(OBJ_DIR)/re_squared_ptx.h \
$(OBJ_DIR)/lj_cut_gpu_kernel.ptx $(OBJ_DIR)/lj_cut_gpu_ptx.h \
$(OBJ_DIR)/lj96_cut_gpu_kernel.ptx $(OBJ_DIR)/lj96_cut_gpu_ptx.h \
$(OBJ_DIR)/lj_expand_gpu_kernel.ptx $(OBJ_DIR)/lj_expand_gpu_ptx.h \
$(OBJ_DIR)/ljc_cut_gpu_kernel.ptx $(OBJ_DIR)/ljc_cut_gpu_ptx.h \
$(OBJ_DIR)/ljcl_cut_gpu_kernel.ptx $(OBJ_DIR)/ljcl_cut_gpu_ptx.h \
$(OBJ_DIR)/gayberne_ptx.h $(OBJ_DIR)/gayberne_lj_ptx.h \
$(OBJ_DIR)/re_squared.ptx $(OBJ_DIR)/re_squared_lj.ptx \
$(OBJ_DIR)/re_squared_ptx.h $(OBJ_DIR)/re_squared_lj_ptx.h \
$(OBJ_DIR)/lj.ptx $(OBJ_DIR)/lj_ptx.h \
$(OBJ_DIR)/lj96.ptx $(OBJ_DIR)/lj96_ptx.h \
$(OBJ_DIR)/lj_expand.ptx $(OBJ_DIR)/lj_expand_ptx.h \
$(OBJ_DIR)/lj_coul.ptx $(OBJ_DIR)/lj_coul_ptx.h \
$(OBJ_DIR)/lj_coul_long.ptx $(OBJ_DIR)/lj_coul_long_ptx.h \
$(OBJ_DIR)/lj_class2_long.ptx $(OBJ_DIR)/lj_class2_long_ptx.h \
$(OBJ_DIR)/coul_long_gpu_kernel.ptx $(OBJ_DIR)/coul_long_gpu_ptx.h \
$(OBJ_DIR)/morse_gpu_kernel.ptx $(OBJ_DIR)/morse_gpu_ptx.h \
$(OBJ_DIR)/crml_gpu_kernel.ptx $(OBJ_DIR)/crml_gpu_ptx.h \
$(OBJ_DIR)/cmm_cut_gpu_kernel.ptx $(OBJ_DIR)/cmm_cut_gpu_ptx.h \
$(OBJ_DIR)/cmmc_long_gpu_kernel.ptx $(OBJ_DIR)/cmmc_long_gpu_ptx.h \
$(OBJ_DIR)/cmmc_msm_gpu_kernel.ptx $(OBJ_DIR)/cmmc_msm_gpu_ptx.h
$(OBJ_DIR)/coul_long.ptx $(OBJ_DIR)/coul_long_ptx.h \
$(OBJ_DIR)/morse.ptx $(OBJ_DIR)/morse_ptx.h \
$(OBJ_DIR)/charmm_long.ptx $(OBJ_DIR)/charmm_long_ptx.h \
$(OBJ_DIR)/cg_cmm.ptx $(OBJ_DIR)/cg_cmm_ptx.h \
$(OBJ_DIR)/cg_cmm_long.ptx $(OBJ_DIR)/cg_cmm_long_ptx.h
all: $(GPU_LIB) $(EXECS)
@ -104,251 +83,245 @@ $(OBJ_DIR)/radixsort_app.cu_o: cudpp_mini/radixsort_app.cu
$(OBJ_DIR)/scan_app.cu_o: cudpp_mini/scan_app.cu
$(CUDA) -o $@ -c cudpp_mini/scan_app.cu
$(OBJ_DIR)/pair_gpu_atom_kernel.ptx: pair_gpu_atom_kernel.cu
$(CUDA) --ptx -DNV_KERNEL -o $@ pair_gpu_atom_kernel.cu
$(OBJ_DIR)/atom.ptx: lal_atom.cu lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_atom.cu
$(OBJ_DIR)/pair_gpu_atom_ptx.h: $(OBJ_DIR)/pair_gpu_atom_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/pair_gpu_atom_kernel.ptx $(OBJ_DIR)/pair_gpu_atom_ptx.h
$(OBJ_DIR)/atom_ptx.h: $(OBJ_DIR)/atom.ptx
$(BSH) ./geryon/file_to_cstr.sh atom $(OBJ_DIR)/atom.ptx $(OBJ_DIR)/atom_ptx.h
$(OBJ_DIR)/pair_gpu_atom.o: pair_gpu_atom.cpp pair_gpu_atom.h $(NVD_H) $(OBJ_DIR)/pair_gpu_atom_ptx.h
$(CUDR) -o $@ -c pair_gpu_atom.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_atom.o: lal_atom.cpp lal_atom.h $(NVD_H) $(OBJ_DIR)/atom_ptx.h
$(CUDR) -o $@ -c lal_atom.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/pair_gpu_ans.o: pair_gpu_ans.cpp pair_gpu_ans.h $(NVD_H)
$(CUDR) -o $@ -c pair_gpu_ans.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_ans.o: lal_answer.cpp lal_answer.h $(NVD_H)
$(CUDR) -o $@ -c lal_answer.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/pair_gpu_nbor_kernel.ptx: pair_gpu_nbor_kernel.cu
$(CUDA) --ptx -DNV_KERNEL -o $@ pair_gpu_nbor_kernel.cu
$(OBJ_DIR)/neighbor_cpu.ptx: lal_neighbor_cpu.cu lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_neighbor_cpu.cu
$(OBJ_DIR)/pair_gpu_nbor_ptx.h: $(OBJ_DIR)/pair_gpu_nbor_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/pair_gpu_nbor_kernel.ptx $(OBJ_DIR)/pair_gpu_nbor_ptx.h
$(OBJ_DIR)/neighbor_cpu_ptx.h: $(OBJ_DIR)/neighbor_cpu.ptx
$(BSH) ./geryon/file_to_cstr.sh neighbor_cpu $(OBJ_DIR)/neighbor_cpu.ptx $(OBJ_DIR)/neighbor_cpu_ptx.h
$(OBJ_DIR)/pair_gpu_build_kernel.ptx: pair_gpu_build_kernel.cu
$(CUDA) --ptx -DNV_KERNEL -o $@ pair_gpu_build_kernel.cu
$(OBJ_DIR)/neighbor_gpu.ptx: lal_neighbor_gpu.cu lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_neighbor_gpu.cu
$(OBJ_DIR)/pair_gpu_build_ptx.h: $(OBJ_DIR)/pair_gpu_build_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/pair_gpu_build_kernel.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h
$(OBJ_DIR)/neighbor_gpu_ptx.h: $(OBJ_DIR)/neighbor_gpu.ptx
$(BSH) ./geryon/file_to_cstr.sh neighbor_gpu $(OBJ_DIR)/neighbor_gpu.ptx $(OBJ_DIR)/neighbor_gpu_ptx.h
$(OBJ_DIR)/pair_gpu_nbor_shared.o: pair_gpu_nbor_shared.cpp pair_gpu_nbor_shared.h $(OBJ_DIR)/pair_gpu_nbor_ptx.h $(OBJ_DIR)/pair_gpu_build_ptx.h $(NVD_H)
$(CUDR) -o $@ -c pair_gpu_nbor_shared.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_neighbor_shared.o: lal_neighbor_shared.cpp lal_neighbor_shared.h $(OBJ_DIR)/neighbor_cpu_ptx.h $(OBJ_DIR)/neighbor_gpu_ptx.h $(NVD_H)
$(CUDR) -o $@ -c lal_neighbor_shared.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/pair_gpu_nbor.o: pair_gpu_nbor.cpp pair_gpu_nbor.h pair_gpu_nbor_shared.h $(NVD_H)
$(CUDR) -o $@ -c pair_gpu_nbor.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_neighbor.o: lal_neighbor.cpp lal_neighbor.h lal_neighbor_shared.h $(NVD_H)
$(CUDR) -o $@ -c lal_neighbor.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/pair_gpu_dev_kernel.ptx: pair_gpu_dev_kernel.cu
$(CUDA) --ptx -DNV_KERNEL -o $@ pair_gpu_dev_kernel.cu
$(OBJ_DIR)/device.ptx: lal_device.cu lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_device.cu
$(OBJ_DIR)/pair_gpu_dev_ptx.h: $(OBJ_DIR)/pair_gpu_dev_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/pair_gpu_dev_kernel.ptx $(OBJ_DIR)/pair_gpu_dev_ptx.h
$(OBJ_DIR)/device_ptx.h: $(OBJ_DIR)/device.ptx
$(BSH) ./geryon/file_to_cstr.sh device $(OBJ_DIR)/device.ptx $(OBJ_DIR)/device_ptx.h
$(OBJ_DIR)/pair_gpu_device.o: pair_gpu_device.cpp pair_gpu_device.h $(ALL_H) $(OBJ_DIR)/pair_gpu_dev_ptx.h
$(CUDR) -o $@ -c pair_gpu_device.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_device.o: lal_device.cpp lal_device.h $(ALL_H) $(OBJ_DIR)/device_ptx.h
$(CUDR) -o $@ -c lal_device.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/atomic_gpu_memory.o: $(ALL_H) atomic_gpu_memory.h atomic_gpu_memory.cpp
$(CUDR) -o $@ -c atomic_gpu_memory.cpp
$(OBJ_DIR)/lal_base_atomic.o: $(ALL_H) lal_base_atomic.h lal_base_atomic.cpp
$(CUDR) -o $@ -c lal_base_atomic.cpp
$(OBJ_DIR)/charge_gpu_memory.o: $(ALL_H) charge_gpu_memory.h charge_gpu_memory.cpp
$(CUDR) -o $@ -c charge_gpu_memory.cpp
$(OBJ_DIR)/lal_base_charge.o: $(ALL_H) lal_base_charge.h lal_base_charge.cpp
$(CUDR) -o $@ -c lal_base_charge.cpp
$(OBJ_DIR)/base_ellipsoid.o: $(ALL_H) base_ellipsoid.h base_ellipsoid.cpp $(OBJ_DIR)/ellipsoid_nbor_ptx.h
$(CUDR) -o $@ -c base_ellipsoid.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_base_ellipsoid.o: $(ALL_H) lal_base_ellipsoid.h lal_base_ellipsoid.cpp $(OBJ_DIR)/ellipsoid_nbor_ptx.h
$(CUDR) -o $@ -c lal_base_ellipsoid.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/pppm_f_gpu_kernel.ptx: pppm_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -Dgrdtyp=float -Dgrdtyp4=float4 -o $@ pppm_gpu_kernel.cu
$(OBJ_DIR)/pppm_f.ptx: lal_pppm.cu lal_precision.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -Dgrdtyp=float -Dgrdtyp4=float4 -o $@ lal_pppm.cu
$(OBJ_DIR)/pppm_f_gpu_ptx.h: $(OBJ_DIR)/pppm_f_gpu_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/pppm_f_gpu_kernel.ptx $(OBJ_DIR)/pppm_f_gpu_ptx.h
$(OBJ_DIR)/pppm_f_ptx.h: $(OBJ_DIR)/pppm_f.ptx
$(BSH) ./geryon/file_to_cstr.sh pppm_f $(OBJ_DIR)/pppm_f.ptx $(OBJ_DIR)/pppm_f_ptx.h
$(OBJ_DIR)/pppm_d_gpu_kernel.ptx: pppm_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -Dgrdtyp=double -Dgrdtyp4=double4 -o $@ pppm_gpu_kernel.cu
$(OBJ_DIR)/pppm_d.ptx: lal_pppm.cu lal_precision.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -Dgrdtyp=double -Dgrdtyp4=double4 -o $@ lal_pppm.cu
$(OBJ_DIR)/pppm_d_gpu_ptx.h: $(OBJ_DIR)/pppm_d_gpu_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/pppm_d_gpu_kernel.ptx $(OBJ_DIR)/pppm_d_gpu_ptx.h
$(OBJ_DIR)/pppm_d_ptx.h: $(OBJ_DIR)/pppm_d.ptx
$(BSH) ./geryon/file_to_cstr.sh pppm_d $(OBJ_DIR)/pppm_d.ptx $(OBJ_DIR)/pppm_d_ptx.h
$(OBJ_DIR)/pppm_gpu_memory.o: $(ALL_H) pppm_gpu_memory.h pppm_gpu_memory.cpp $(OBJ_DIR)/pppm_f_gpu_ptx.h $(OBJ_DIR)/pppm_d_gpu_ptx.h
$(CUDR) -o $@ -c pppm_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_pppm.o: $(ALL_H) lal_pppm.h lal_pppm.cpp $(OBJ_DIR)/pppm_f_ptx.h $(OBJ_DIR)/pppm_d_ptx.h
$(CUDR) -o $@ -c lal_pppm.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/pppm_l_gpu.o: $(ALL_H) pppm_gpu_memory.h pppm_l_gpu.cpp
$(CUDR) -o $@ -c pppm_l_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_pppm_ext.o: $(ALL_H) lal_pppm.h lal_pppm_ext.cpp
$(CUDR) -o $@ -c lal_pppm_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/ellipsoid_nbor.ptx: ellipsoid_nbor.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ ellipsoid_nbor.cu
$(OBJ_DIR)/ellipsoid_nbor.ptx: lal_ellipsoid_nbor.cu lal_precision.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_ellipsoid_nbor.cu
$(OBJ_DIR)/ellipsoid_nbor_ptx.h: $(OBJ_DIR)/ellipsoid_nbor.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/ellipsoid_nbor_ptx.h
$(BSH) ./geryon/file_to_cstr.sh ellipsoid_nbor $(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/ellipsoid_nbor_ptx.h
$(OBJ_DIR)/gayberne.ptx: gayberne.cu pair_gpu_precision.h ellipsoid_extra.h
$(CUDA) --ptx -DNV_KERNEL -o $@ gayberne.cu
$(OBJ_DIR)/gayberne.ptx: lal_gayberne.cu lal_precision.h lal_ellipsoid_extra.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_gayberne.cu
$(OBJ_DIR)/gayberne_lj.ptx: gayberne_lj.cu pair_gpu_precision.h ellipsoid_extra.h
$(CUDA) --ptx -DNV_KERNEL -o $@ gayberne_lj.cu
$(OBJ_DIR)/gayberne_lj.ptx: lal_gayberne_lj.cu lal_precision.h lal_ellipsoid_extra.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_gayberne_lj.cu
$(OBJ_DIR)/gayberne_ptx.h: $(OBJ_DIR)/gayberne.ptx $(OBJ_DIR)/gayberne_lj.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/gayberne.ptx $(OBJ_DIR)/gayberne_lj.ptx $(OBJ_DIR)/gayberne_ptx.h
$(OBJ_DIR)/gayberne_ptx.h: $(OBJ_DIR)/gayberne.ptx
$(BSH) ./geryon/file_to_cstr.sh gayberne $(OBJ_DIR)/gayberne.ptx $(OBJ_DIR)/gayberne_ptx.h
$(OBJ_DIR)/gayberne.o: $(ALL_H) gayberne.h gayberne.cpp $(OBJ_DIR)/gayberne_ptx.h $(OBJ_DIR)/base_ellipsoid.o
$(CUDR) -o $@ -c gayberne.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/gayberne_lj_ptx.h: $(OBJ_DIR)/gayberne_lj.ptx
$(BSH) ./geryon/file_to_cstr.sh gayberne_lj $(OBJ_DIR)/gayberne_lj.ptx $(OBJ_DIR)/gayberne_lj_ptx.h
$(OBJ_DIR)/gayberne_ext.o: $(ALL_H) $(OBJ_DIR)/gayberne.o gayberne_ext.cpp
$(CUDR) -o $@ -c gayberne_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_gayberne.o: $(ALL_H) lal_gayberne.h lal_gayberne.cpp $(OBJ_DIR)/gayberne_ptx.h $(OBJ_DIR)/gayberne_lj_ptx.h $(OBJ_DIR)/lal_base_ellipsoid.o
$(CUDR) -o $@ -c lal_gayberne.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/re_squared.ptx: re_squared.cu pair_gpu_precision.h ellipsoid_extra.h
$(CUDA) --ptx -DNV_KERNEL -o $@ re_squared.cu
$(OBJ_DIR)/lal_gayberne_ext.o: $(ALL_H) $(OBJ_DIR)/lal_gayberne.o lal_gayberne_ext.cpp
$(CUDR) -o $@ -c lal_gayberne_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/re_squared_lj.ptx: re_squared_lj.cu pair_gpu_precision.h ellipsoid_extra.h
$(CUDA) --ptx -DNV_KERNEL -o $@ re_squared_lj.cu
$(OBJ_DIR)/re_squared.ptx: lal_re_squared.cu lal_precision.h lal_ellipsoid_extra.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_re_squared.cu
$(OBJ_DIR)/re_squared_ptx.h: $(OBJ_DIR)/re_squared.ptx $(OBJ_DIR)/re_squared_lj.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/re_squared.ptx $(OBJ_DIR)/re_squared_lj.ptx $(OBJ_DIR)/re_squared_ptx.h
$(OBJ_DIR)/re_squared_lj.ptx: lal_re_squared_lj.cu lal_precision.h lal_ellipsoid_extra.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_re_squared_lj.cu
$(OBJ_DIR)/re_squared.o: $(ALL_H) re_squared.h re_squared.cpp $(OBJ_DIR)/re_squared_ptx.h $(OBJ_DIR)/base_ellipsoid.o
$(CUDR) -o $@ -c re_squared.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/re_squared_ptx.h: $(OBJ_DIR)/re_squared.ptx
$(BSH) ./geryon/file_to_cstr.sh re_squared $(OBJ_DIR)/re_squared.ptx $(OBJ_DIR)/re_squared_ptx.h
$(OBJ_DIR)/re_squared_ext.o: $(ALL_H) $(OBJ_DIR)/re_squared.o re_squared_ext.cpp
$(CUDR) -o $@ -c re_squared_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/re_squared_lj_ptx.h: $(OBJ_DIR)/re_squared_lj.ptx
$(BSH) ./geryon/file_to_cstr.sh re_squared_lj $(OBJ_DIR)/re_squared_lj.ptx $(OBJ_DIR)/re_squared_lj_ptx.h
$(OBJ_DIR)/lj_cut_gpu_kernel.ptx: lj_cut_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lj_cut_gpu_kernel.cu
$(OBJ_DIR)/lal_re_squared.o: $(ALL_H) lal_re_squared.h lal_re_squared.cpp $(OBJ_DIR)/re_squared_ptx.h $(OBJ_DIR)/re_squared_lj_ptx.h $(OBJ_DIR)/lal_base_ellipsoid.o
$(CUDR) -o $@ -c lal_re_squared.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_cut_gpu_ptx.h: $(OBJ_DIR)/lj_cut_gpu_kernel.ptx $(OBJ_DIR)/lj_cut_gpu_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/lj_cut_gpu_kernel.ptx $(OBJ_DIR)/lj_cut_gpu_ptx.h
$(OBJ_DIR)/lal_re_squared_ext.o: $(ALL_H) $(OBJ_DIR)/lal_re_squared.o lal_re_squared_ext.cpp
$(CUDR) -o $@ -c lal_re_squared_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_cut_gpu_memory.o: $(ALL_H) lj_cut_gpu_memory.h lj_cut_gpu_memory.cpp $(OBJ_DIR)/lj_cut_gpu_ptx.h $(OBJ_DIR)/atomic_gpu_memory.o
$(CUDR) -o $@ -c lj_cut_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj.ptx: lal_lj.cu lal_precision.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_lj.cu
$(OBJ_DIR)/lj_cut_gpu.o: $(ALL_H) lj_cut_gpu_memory.h lj_cut_gpu.cpp atomic_gpu_memory.h
$(CUDR) -o $@ -c lj_cut_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_ptx.h: $(OBJ_DIR)/lj.ptx $(OBJ_DIR)/lj.ptx
$(BSH) ./geryon/file_to_cstr.sh lj $(OBJ_DIR)/lj.ptx $(OBJ_DIR)/lj_ptx.h
$(OBJ_DIR)/ljc_cut_gpu_kernel.ptx: ljc_cut_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ ljc_cut_gpu_kernel.cu
$(OBJ_DIR)/lal_lj.o: $(ALL_H) lal_lj.h lal_lj.cpp $(OBJ_DIR)/lj_ptx.h $(OBJ_DIR)/lal_base_atomic.o
$(CUDR) -o $@ -c lal_lj.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/ljc_cut_gpu_ptx.h: $(OBJ_DIR)/ljc_cut_gpu_kernel.ptx $(OBJ_DIR)/ljc_cut_gpu_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/ljc_cut_gpu_kernel.ptx $(OBJ_DIR)/ljc_cut_gpu_ptx.h
$(OBJ_DIR)/lal_lj_ext.o: $(ALL_H) lal_lj.h lal_lj_ext.cpp lal_base_atomic.h
$(CUDR) -o $@ -c lal_lj_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/ljc_cut_gpu_memory.o: $(ALL_H) ljc_cut_gpu_memory.h ljc_cut_gpu_memory.cpp $(OBJ_DIR)/ljc_cut_gpu_ptx.h $(OBJ_DIR)/charge_gpu_memory.o
$(CUDR) -o $@ -c ljc_cut_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_coul.ptx: lal_lj_coul.cu lal_precision.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_lj_coul.cu
$(OBJ_DIR)/ljc_cut_gpu.o: $(ALL_H) ljc_cut_gpu_memory.h ljc_cut_gpu.cpp charge_gpu_memory.h
$(CUDR) -o $@ -c ljc_cut_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_coul_ptx.h: $(OBJ_DIR)/lj_coul.ptx $(OBJ_DIR)/lj_coul.ptx
$(BSH) ./geryon/file_to_cstr.sh lj_coul $(OBJ_DIR)/lj_coul.ptx $(OBJ_DIR)/lj_coul_ptx.h
$(OBJ_DIR)/lj_class2_long.ptx: lj_class2_long.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lj_class2_long.cu
$(OBJ_DIR)/lal_lj_coul.o: $(ALL_H) lal_lj_coul.h lal_lj_coul.cpp $(OBJ_DIR)/lj_coul_ptx.h $(OBJ_DIR)/lal_base_charge.o
$(CUDR) -o $@ -c lal_lj_coul.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_coul_ext.o: $(ALL_H) lal_lj_coul.h lal_lj_coul_ext.cpp lal_base_charge.h
$(CUDR) -o $@ -c lal_lj_coul_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_class2_long.ptx: lal_lj_class2_long.cu lal_precision.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_lj_class2_long.cu
$(OBJ_DIR)/lj_class2_long_ptx.h: $(OBJ_DIR)/lj_class2_long.ptx $(OBJ_DIR)/lj_class2_long.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/lj_class2_long.ptx $(OBJ_DIR)/lj_class2_long_ptx.h
$(BSH) ./geryon/file_to_cstr.sh lj_class2_long $(OBJ_DIR)/lj_class2_long.ptx $(OBJ_DIR)/lj_class2_long_ptx.h
$(OBJ_DIR)/lj_class2_long.o: $(ALL_H) lj_class2_long.h lj_class2_long.cpp $(OBJ_DIR)/lj_class2_long_ptx.h $(OBJ_DIR)/charge_gpu_memory.o
$(CUDR) -o $@ -c lj_class2_long.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_class2_long.o: $(ALL_H) lal_lj_class2_long.h lal_lj_class2_long.cpp $(OBJ_DIR)/lj_class2_long_ptx.h $(OBJ_DIR)/lal_base_charge.o
$(CUDR) -o $@ -c lal_lj_class2_long.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_class2_long_ext.o: $(ALL_H) lj_class2_long.h lj_class2_long_ext.cpp charge_gpu_memory.h
$(CUDR) -o $@ -c lj_class2_long_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_class2_long_ext.o: $(ALL_H) lal_lj_class2_long.h lal_lj_class2_long_ext.cpp lal_base_charge.h
$(CUDR) -o $@ -c lal_lj_class2_long_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/ljcl_cut_gpu_kernel.ptx: ljcl_cut_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ ljcl_cut_gpu_kernel.cu
$(OBJ_DIR)/coul_long.ptx: lal_coul_long.cu lal_precision.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_coul_long.cu
$(OBJ_DIR)/ljcl_cut_gpu_ptx.h: $(OBJ_DIR)/ljcl_cut_gpu_kernel.ptx $(OBJ_DIR)/ljcl_cut_gpu_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/ljcl_cut_gpu_kernel.ptx $(OBJ_DIR)/ljcl_cut_gpu_ptx.h
$(OBJ_DIR)/coul_long_ptx.h: $(OBJ_DIR)/coul_long.ptx $(OBJ_DIR)/coul_long.ptx
$(BSH) ./geryon/file_to_cstr.sh coul_long $(OBJ_DIR)/coul_long.ptx $(OBJ_DIR)/coul_long_ptx.h
$(OBJ_DIR)/ljcl_cut_gpu_memory.o: $(ALL_H) ljcl_cut_gpu_memory.h ljcl_cut_gpu_memory.cpp $(OBJ_DIR)/ljcl_cut_gpu_ptx.h $(OBJ_DIR)/charge_gpu_memory.o
$(CUDR) -o $@ -c ljcl_cut_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_coul_long.o: $(ALL_H) lal_coul_long.h lal_coul_long.cpp $(OBJ_DIR)/coul_long_ptx.h $(OBJ_DIR)/lal_base_charge.o
$(CUDR) -o $@ -c lal_coul_long.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/ljcl_cut_gpu.o: $(ALL_H) ljcl_cut_gpu_memory.h ljcl_cut_gpu.cpp charge_gpu_memory.h
$(CUDR) -o $@ -c ljcl_cut_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_coul_long_ext.o: $(ALL_H) lal_coul_long.h lal_coul_long_ext.cpp lal_base_charge.h
$(CUDR) -o $@ -c lal_coul_long_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/coul_long_gpu_kernel.ptx: coul_long_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ coul_long_gpu_kernel.cu
$(OBJ_DIR)/lj_coul_long.ptx: lal_lj_coul_long.cu lal_precision.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_lj_coul_long.cu
$(OBJ_DIR)/coul_long_gpu_ptx.h: $(OBJ_DIR)/coul_long_gpu_kernel.ptx $(OBJ_DIR)/coul_long_gpu_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/coul_long_gpu_kernel.ptx $(OBJ_DIR)/coul_long_gpu_ptx.h
$(OBJ_DIR)/lj_coul_long_ptx.h: $(OBJ_DIR)/lj_coul_long.ptx $(OBJ_DIR)/lj_coul_long.ptx
$(BSH) ./geryon/file_to_cstr.sh lj_coul_long $(OBJ_DIR)/lj_coul_long.ptx $(OBJ_DIR)/lj_coul_long_ptx.h
$(OBJ_DIR)/coul_long_gpu_memory.o: $(ALL_H) coul_long_gpu_memory.h coul_long_gpu_memory.cpp $(OBJ_DIR)/coul_long_gpu_ptx.h $(OBJ_DIR)/charge_gpu_memory.o
$(CUDR) -o $@ -c coul_long_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_coul_long.o: $(ALL_H) lal_lj_coul_long.h lal_lj_coul_long.cpp $(OBJ_DIR)/lj_coul_long_ptx.h $(OBJ_DIR)/lal_base_charge.o
$(CUDR) -o $@ -c lal_lj_coul_long.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/coul_long_gpu.o: $(ALL_H) coul_long_gpu_memory.h coul_long_gpu.cpp charge_gpu_memory.h
$(CUDR) -o $@ -c coul_long_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_coul_long_ext.o: $(ALL_H) lal_lj_coul_long.h lal_lj_coul_long_ext.cpp lal_base_charge.h
$(CUDR) -o $@ -c lal_lj_coul_long_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/morse_gpu_kernel.ptx: morse_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ morse_gpu_kernel.cu
$(OBJ_DIR)/morse.ptx: lal_morse.cu lal_precision.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_morse.cu
$(OBJ_DIR)/morse_gpu_ptx.h: $(OBJ_DIR)/morse_gpu_kernel.ptx $(OBJ_DIR)/morse_gpu_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/morse_gpu_kernel.ptx $(OBJ_DIR)/morse_gpu_ptx.h
$(OBJ_DIR)/morse_ptx.h: $(OBJ_DIR)/morse.ptx $(OBJ_DIR)/morse.ptx
$(BSH) ./geryon/file_to_cstr.sh morse $(OBJ_DIR)/morse.ptx $(OBJ_DIR)/morse_ptx.h
$(OBJ_DIR)/morse_gpu_memory.o: $(ALL_H) morse_gpu_memory.h morse_gpu_memory.cpp $(OBJ_DIR)/morse_gpu_ptx.h $(OBJ_DIR)/atomic_gpu_memory.o
$(CUDR) -o $@ -c morse_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_morse.o: $(ALL_H) lal_morse.h lal_morse.cpp $(OBJ_DIR)/morse_ptx.h $(OBJ_DIR)/lal_base_atomic.o
$(CUDR) -o $@ -c lal_morse.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/morse_gpu.o: $(ALL_H) morse_gpu_memory.h morse_gpu.cpp atomic_gpu_memory.h
$(CUDR) -o $@ -c morse_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_morse_ext.o: $(ALL_H) lal_morse.h lal_morse_ext.cpp lal_base_atomic.h
$(CUDR) -o $@ -c lal_morse_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/crml_gpu_kernel.ptx: crml_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ crml_gpu_kernel.cu
$(OBJ_DIR)/charmm_long.ptx: lal_charmm_long.cu lal_precision.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_charmm_long.cu
$(OBJ_DIR)/crml_gpu_ptx.h: $(OBJ_DIR)/crml_gpu_kernel.ptx $(OBJ_DIR)/crml_gpu_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/crml_gpu_kernel.ptx $(OBJ_DIR)/crml_gpu_ptx.h
$(OBJ_DIR)/charmm_long_ptx.h: $(OBJ_DIR)/charmm_long.ptx $(OBJ_DIR)/charmm_long.ptx
$(BSH) ./geryon/file_to_cstr.sh charmm_long $(OBJ_DIR)/charmm_long.ptx $(OBJ_DIR)/charmm_long_ptx.h
$(OBJ_DIR)/crml_gpu_memory.o: $(ALL_H) crml_gpu_memory.h crml_gpu_memory.cpp $(OBJ_DIR)/crml_gpu_ptx.h $(OBJ_DIR)/charge_gpu_memory.o
$(CUDR) -o $@ -c crml_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_charmm_long.o: $(ALL_H) lal_charmm_long.h lal_charmm_long.cpp $(OBJ_DIR)/charmm_long_ptx.h $(OBJ_DIR)/lal_base_charge.o
$(CUDR) -o $@ -c lal_charmm_long.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/crml_gpu.o: $(ALL_H) crml_gpu_memory.h crml_gpu.cpp charge_gpu_memory.h
$(CUDR) -o $@ -c crml_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_charmm_long_ext.o: $(ALL_H) lal_charmm_long.h lal_charmm_long_ext.cpp lal_base_charge.h
$(CUDR) -o $@ -c lal_charmm_long_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj96_cut_gpu_kernel.ptx: lj96_cut_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lj96_cut_gpu_kernel.cu
$(OBJ_DIR)/lj96.ptx: lal_lj96.cu lal_precision.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_lj96.cu
$(OBJ_DIR)/lj96_cut_gpu_ptx.h: $(OBJ_DIR)/lj96_cut_gpu_kernel.ptx $(OBJ_DIR)/lj96_cut_gpu_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/lj96_cut_gpu_kernel.ptx $(OBJ_DIR)/lj96_cut_gpu_ptx.h
$(OBJ_DIR)/lj96_ptx.h: $(OBJ_DIR)/lj96.ptx $(OBJ_DIR)/lj96.ptx
$(BSH) ./geryon/file_to_cstr.sh lj96 $(OBJ_DIR)/lj96.ptx $(OBJ_DIR)/lj96_ptx.h
$(OBJ_DIR)/lj96_cut_gpu_memory.o: $(ALL_H) lj96_cut_gpu_memory.h lj96_cut_gpu_memory.cpp $(OBJ_DIR)/lj96_cut_gpu_ptx.h $(OBJ_DIR)/atomic_gpu_memory.o
$(CUDR) -o $@ -c lj96_cut_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj96.o: $(ALL_H) lal_lj96.h lal_lj96.cpp $(OBJ_DIR)/lj96_ptx.h $(OBJ_DIR)/lal_base_atomic.o
$(CUDR) -o $@ -c lal_lj96.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj96_cut_gpu.o: $(ALL_H) lj96_cut_gpu_memory.h lj96_cut_gpu.cpp atomic_gpu_memory.h
$(CUDR) -o $@ -c lj96_cut_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj96_ext.o: $(ALL_H) lal_lj96.h lal_lj96_ext.cpp lal_base_atomic.h
$(CUDR) -o $@ -c lal_lj96_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_expand_gpu_kernel.ptx: lj_expand_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lj_expand_gpu_kernel.cu
$(OBJ_DIR)/lj_expand.ptx: lal_lj_expand.cu lal_precision.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_lj_expand.cu
$(OBJ_DIR)/lj_expand_gpu_ptx.h: $(OBJ_DIR)/lj_expand_gpu_kernel.ptx $(OBJ_DIR)/lj_expand_gpu_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/lj_expand_gpu_kernel.ptx $(OBJ_DIR)/lj_expand_gpu_ptx.h
$(OBJ_DIR)/lj_expand_ptx.h: $(OBJ_DIR)/lj_expand.ptx $(OBJ_DIR)/lj_expand.ptx
$(BSH) ./geryon/file_to_cstr.sh lj_expand $(OBJ_DIR)/lj_expand.ptx $(OBJ_DIR)/lj_expand_ptx.h
$(OBJ_DIR)/lj_expand_gpu_memory.o: $(ALL_H) lj_expand_gpu_memory.h lj_expand_gpu_memory.cpp $(OBJ_DIR)/lj_expand_gpu_ptx.h $(OBJ_DIR)/atomic_gpu_memory.o
$(CUDR) -o $@ -c lj_expand_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_expand.o: $(ALL_H) lal_lj_expand.h lal_lj_expand.cpp $(OBJ_DIR)/lj_expand_ptx.h $(OBJ_DIR)/lal_base_atomic.o
$(CUDR) -o $@ -c lal_lj_expand.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_expand_gpu.o: $(ALL_H) lj_expand_gpu_memory.h lj_expand_gpu.cpp atomic_gpu_memory.h
$(CUDR) -o $@ -c lj_expand_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_expand_ext.o: $(ALL_H) lal_lj_expand.h lal_lj_expand_ext.cpp lal_base_atomic.h
$(CUDR) -o $@ -c lal_lj_expand_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/cmm_cut_gpu_kernel.ptx: cmm_cut_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ cmm_cut_gpu_kernel.cu
$(OBJ_DIR)/cg_cmm.ptx: lal_cg_cmm.cu lal_precision.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_cg_cmm.cu
$(OBJ_DIR)/cmm_cut_gpu_ptx.h: $(OBJ_DIR)/cmm_cut_gpu_kernel.ptx $(OBJ_DIR)/cmm_cut_gpu_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/cmm_cut_gpu_kernel.ptx $(OBJ_DIR)/cmm_cut_gpu_ptx.h
$(OBJ_DIR)/cg_cmm_ptx.h: $(OBJ_DIR)/cg_cmm.ptx $(OBJ_DIR)/cg_cmm.ptx
$(BSH) ./geryon/file_to_cstr.sh cg_cmm $(OBJ_DIR)/cg_cmm.ptx $(OBJ_DIR)/cg_cmm_ptx.h
$(OBJ_DIR)/cmm_cut_gpu_memory.o: $(ALL_H) cmm_cut_gpu_memory.h cmm_cut_gpu_memory.cpp $(OBJ_DIR)/cmm_cut_gpu_ptx.h $(OBJ_DIR)/atomic_gpu_memory.o
$(CUDR) -o $@ -c cmm_cut_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_cg_cmm.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm.cpp $(OBJ_DIR)/cg_cmm_ptx.h $(OBJ_DIR)/lal_base_atomic.o
$(CUDR) -o $@ -c lal_cg_cmm.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/cmm_cut_gpu.o: $(ALL_H) cmm_cut_gpu_memory.h cmm_cut_gpu.cpp atomic_gpu_memory.h
$(CUDR) -o $@ -c cmm_cut_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_cg_cmm_ext.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm_ext.cpp lal_base_atomic.h
$(CUDR) -o $@ -c lal_cg_cmm_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/cmmc_long_gpu_kernel.ptx: cmmc_long_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ cmmc_long_gpu_kernel.cu
$(OBJ_DIR)/cg_cmm_long.ptx: lal_cg_cmm_long.cu lal_precision.h lal_preprocessor.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lal_cg_cmm_long.cu
$(OBJ_DIR)/cmmc_long_gpu_ptx.h: $(OBJ_DIR)/cmmc_long_gpu_kernel.ptx $(OBJ_DIR)/cmmc_long_gpu_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/cmmc_long_gpu_kernel.ptx $(OBJ_DIR)/cmmc_long_gpu_ptx.h
$(OBJ_DIR)/cg_cmm_long_ptx.h: $(OBJ_DIR)/cg_cmm_long.ptx $(OBJ_DIR)/cg_cmm_long.ptx
$(BSH) ./geryon/file_to_cstr.sh cg_cmm_long $(OBJ_DIR)/cg_cmm_long.ptx $(OBJ_DIR)/cg_cmm_long_ptx.h
$(OBJ_DIR)/cmmc_long_gpu_memory.o: $(ALL_H) cmmc_long_gpu_memory.h cmmc_long_gpu_memory.cpp $(OBJ_DIR)/cmmc_long_gpu_ptx.h $(OBJ_DIR)/atomic_gpu_memory.o
$(CUDR) -o $@ -c cmmc_long_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_cg_cmm_long.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long.cpp $(OBJ_DIR)/cg_cmm_long_ptx.h $(OBJ_DIR)/lal_base_atomic.o
$(CUDR) -o $@ -c lal_cg_cmm_long.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/cmmc_long_gpu.o: $(ALL_H) cmmc_long_gpu_memory.h cmmc_long_gpu.cpp charge_gpu_memory.h
$(CUDR) -o $@ -c cmmc_long_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/cmmc_msm_gpu_kernel.ptx: cmmc_msm_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ cmmc_msm_gpu_kernel.cu
$(OBJ_DIR)/cmmc_msm_gpu_ptx.h: $(OBJ_DIR)/cmmc_msm_gpu_kernel.ptx $(OBJ_DIR)/cmmc_msm_gpu_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/cmmc_msm_gpu_kernel.ptx $(OBJ_DIR)/cmmc_msm_gpu_ptx.h
$(OBJ_DIR)/cmmc_msm_gpu_memory.o: $(ALL_H) cmmc_msm_gpu_memory.h cmmc_msm_gpu_memory.cpp $(OBJ_DIR)/cmmc_msm_gpu_ptx.h $(OBJ_DIR)/atomic_gpu_memory.o
$(CUDR) -o $@ -c cmmc_msm_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/cmmc_msm_gpu.o: $(ALL_H) cmmc_msm_gpu_memory.h cmmc_msm_gpu.cpp charge_gpu_memory.h
$(CUDR) -o $@ -c cmmc_msm_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_cg_cmm_long_ext.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long_ext.cpp lal_base_charge.h
$(CUDR) -o $@ -c lal_cg_cmm_long_ext.cpp -I$(OBJ_DIR)
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVC_H)
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDART $(CUDA_LINK)
@ -361,3 +334,4 @@ clean:
veryclean: clean
rm -rf *~ *.linkinfo

View File

@ -1,226 +1,221 @@
# /* ----------------------------------------------------------------------
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
# http://lammps.sandia.gov, Sandia National Laboratories
# Steve Plimpton, sjplimp@sandia.gov
#
# Copyright (2003) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
# certain rights in this software. This software is distributed under
# the GNU General Public License.
#
# See the README file in the top-level LAMMPS directory.
# ------------------------------------------------------------------------- */
#
# /* ----------------------------------------------------------------------
# Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
# Peng Wang (Nvidia), penwang@nvidia.com
# Inderaj Bains (NVIDIA), ibains@nvidia.com
# Paul Crozier (SNL), pscrozi@sandia.gov
# ------------------------------------------------------------------------- */
OCL = $(OCL_CPP) $(OCL_PREC) -DUSE_OPENCL
OCL_LIB = $(LIB_DIR)/libgpu.a
# Headers for Geryon
UCL_H = $(wildcard ./geryon/ucl*.h)
OCL_H = $(wildcard ./geryon/ocl*.h) $(UCL_H)
# Headers for Pair Stuff
PAIR_H = pair_gpu_atom.h pair_gpu_ans.h pair_gpu_nbor_shared.h \
pair_gpu_nbor.h pair_gpu_precision.h pair_gpu_device.h \
pair_gpu_balance.h pppm_gpu_memory.h
PAIR_H = lal_atom.h lal_answer.h lal_neighbor_shared.h \
lal_neighbor.h lal_precision.h lal_device.h \
lal_balance.h lal_pppm.h
# Headers for Preprocessor/Auxiliary Functions
PRE1_H = lal_preprocessor.h lal_aux_fun1.h
ALL_H = $(OCL_H) $(PAIR_H)
EXECS = $(BIN_DIR)/ocl_get_devices
OBJS = $(OBJ_DIR)/pair_gpu_atom.o $(OBJ_DIR)/pair_gpu_ans.o \
$(OBJ_DIR)/pair_gpu_nbor_shared.o $(OBJ_DIR)/pair_gpu_nbor.o \
$(OBJ_DIR)/pair_gpu_device.o $(OBJ_DIR)/atomic_gpu_memory.o \
$(OBJ_DIR)/charge_gpu_memory.o $(OBJ_DIR)/base_ellipsoid.o \
$(OBJ_DIR)/pppm_gpu_memory.o $(OBJ_DIR)/pppm_l_gpu.o \
$(OBJ_DIR)/gayberne.o $(OBJ_DIR)/gayberne_ext.o \
$(OBJ_DIR)/re_squared.o $(OBJ_DIR)/re_squared_ext.o \
$(OBJ_DIR)/lj_cut_gpu_memory.o $(OBJ_DIR)/lj_cut_gpu.o \
$(OBJ_DIR)/lj96_cut_gpu_memory.o $(OBJ_DIR)/lj96_cut_gpu.o \
$(OBJ_DIR)/lj_expand_gpu_memory.o $(OBJ_DIR)/lj_expand_gpu.o \
$(OBJ_DIR)/ljc_cut_gpu_memory.o $(OBJ_DIR)/ljc_cut_gpu.o \
$(OBJ_DIR)/ljcl_cut_gpu_memory.o $(OBJ_DIR)/ljcl_cut_gpu.o \
$(OBJ_DIR)/lj_class2_long.o $(OBJ_DIR)/lj_class2_long_ext.o \
$(OBJ_DIR)/morse_gpu_memory.o $(OBJ_DIR)/morse_gpu.o \
$(OBJ_DIR)/crml_gpu_memory.o $(OBJ_DIR)/crml_gpu.o \
$(OBJ_DIR)/cmm_cut_gpu_memory.o $(OBJ_DIR)/cmm_cut_gpu.o \
$(OBJ_DIR)/cmmc_long_gpu_memory.o $(OBJ_DIR)/cmmc_long_gpu.o
KERS = $(OBJ_DIR)/pair_gpu_dev_cl.h $(OBJ_DIR)/pair_gpu_atom_cl.h \
$(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/pppm_gpu_cl.h \
OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
$(OBJ_DIR)/lal_neighbor_shared.o $(OBJ_DIR)/lal_neighbor.o \
$(OBJ_DIR)/lal_device.o $(OBJ_DIR)/lal_base_atomic.o \
$(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_base_ellipsoid.o \
$(OBJ_DIR)/lal_pppm.o $(OBJ_DIR)/lal_pppm_ext.o \
$(OBJ_DIR)/lal_gayberne.o $(OBJ_DIR)/lal_gayberne_ext.o \
$(OBJ_DIR)/lal_re_squared.o $(OBJ_DIR)/lal_re_squared_ext.o \
$(OBJ_DIR)/lal_lj.o $(OBJ_DIR)/lal_lj_ext.o \
$(OBJ_DIR)/lal_lj96.o $(OBJ_DIR)/lal_lj96_ext.o \
$(OBJ_DIR)/lal_lj_expand.o $(OBJ_DIR)/lal_lj_expand_ext.o \
$(OBJ_DIR)/lal_lj_coul.o $(OBJ_DIR)/lal_lj_coul_ext.o \
$(OBJ_DIR)/lal_lj_coul_long.o $(OBJ_DIR)/lal_lj_coul_long_ext.o \
$(OBJ_DIR)/lal_lj_class2_long.o $(OBJ_DIR)/lal_lj_class2_long_ext.o \
$(OBJ_DIR)/lal_coul_long.o $(OBJ_DIR)/lal_coul_long_ext.o \
$(OBJ_DIR)/lal_morse.o $(OBJ_DIR)/lal_morse_ext.o \
$(OBJ_DIR)/lal_charmm_long.o $(OBJ_DIR)/lal_charmm_long_ext.o \
$(OBJ_DIR)/lal_cg_cmm.o $(OBJ_DIR)/lal_cg_cmm_ext.o \
$(OBJ_DIR)/lal_cg_cmm_long.o $(OBJ_DIR)/lal_cg_cmm_long_ext.o
KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
$(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/pppm_cl.h \
$(OBJ_DIR)/ellipsoid_nbor_cl.h $(OBJ_DIR)/gayberne_cl.h \
$(OBJ_DIR)/re_squared_cl.h \
$(OBJ_DIR)/lj_cut_gpu_cl.h $(OBJ_DIR)/lj96_cut_gpu_cl.h \
$(OBJ_DIR)/lj_expand_gpu_cl.h $(OBJ_DIR)/ljc_cut_gpu_cl.h \
$(OBJ_DIR)/ljcl_cut_gpu_cl.h $(OBJ_DIR)/lj_class2_long_cl.h \
$(OBJ_DIR)/morse_gpu_cl.h \
$(OBJ_DIR)/crml_gpu_cl.h $(OBJ_DIR)/cmm_cut_gpu_cl.h \
$(OBJ_DIR)/cmmc_long_gpu_cl.h
$(OBJ_DIR)/gayberne_lj_cl.h $(OBJ_DIR)/re_squared_cl.h \
$(OBJ_DIR)/re_squared_lj_cl.h $(OBJ_DIR)/lj_cl.h $(OBJ_DIR)/lj96_cl.h \
$(OBJ_DIR)/lj_expand_cl.h $(OBJ_DIR)/lj_coul_cl.h \
$(OBJ_DIR)/lj_coul_long_cl.h $(OBJ_DIR)/lj_class2_long_cl.h \
$(OBJ_DIR)/coul_long_cl.h $(OBJ_DIR)/morse_cl.h \
$(OBJ_DIR)/charmm_long_cl.h $(OBJ_DIR)/cg_cmm_cl.h \
$(OBJ_DIR)/cg_cmm_long_cl.h $(OBJ_DIR)/neighbor_gpu_cl.h
OCL_EXECS = $(BIN_DIR)/ocl_get_devices
all: $(OCL_LIB) $(EXECS)
$(OBJ_DIR)/pair_gpu_atom_cl.h: pair_gpu_atom_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh pair_gpu_atom_kernel.cu $(OBJ_DIR)/pair_gpu_atom_cl.h
$(OBJ_DIR)/atom_cl.h: lal_atom.cu lal_preprocessor.h
$(BSH) ./geryon/file_to_cstr.sh atom lal_preprocessor.h lal_atom.cu $(OBJ_DIR)/atom_cl.h
$(OBJ_DIR)/pair_gpu_atom.o: pair_gpu_atom.cpp pair_gpu_atom.h $(OCL_H) $(OBJ_DIR)/pair_gpu_atom_cl.h
$(OCL) -o $@ -c pair_gpu_atom.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_atom.o: lal_atom.cpp lal_atom.h $(OCL_H) $(OBJ_DIR)/atom_cl.h
$(OCL) -o $@ -c lal_atom.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/pair_gpu_ans.o: pair_gpu_ans.cpp pair_gpu_ans.h $(OCL_H)
$(OCL) -o $@ -c pair_gpu_ans.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_answer.o: lal_answer.cpp lal_answer.h $(OCL_H)
$(OCL) -o $@ -c lal_answer.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/pair_gpu_nbor_cl.h: pair_gpu_nbor_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh pair_gpu_nbor_kernel.cu $(OBJ_DIR)/pair_gpu_nbor_cl.h
$(OBJ_DIR)/neighbor_cpu_cl.h: lal_neighbor_cpu.cu lal_preprocessor.h
$(BSH) ./geryon/file_to_cstr.sh neighbor_cpu lal_preprocessor.h lal_neighbor_cpu.cu $(OBJ_DIR)/neighbor_cpu_cl.h
$(OBJ_DIR)/pair_gpu_nbor_shared.o: pair_gpu_nbor_shared.cpp pair_gpu_nbor_shared.h $(OCL_H) $(OBJ_DIR)/pair_gpu_nbor_cl.h
$(OCL) -o $@ -c pair_gpu_nbor_shared.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/neighbor_gpu_cl.h: lal_neighbor_gpu.cu lal_preprocessor.h
$(BSH) ./geryon/file_to_cstr.sh neighbor_gpu lal_preprocessor.h lal_neighbor_gpu.cu $(OBJ_DIR)/neighbor_gpu_cl.h
$(OBJ_DIR)/pair_gpu_nbor.o: pair_gpu_nbor.cpp pair_gpu_nbor.h $(OCL_H) pair_gpu_nbor_shared.h
$(OCL) -o $@ -c pair_gpu_nbor.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_neighbor_shared.o: lal_neighbor_shared.cpp lal_neighbor_shared.h $(OCL_H) $(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/neighbor_gpu_cl.h
$(OCL) -o $@ -c lal_neighbor_shared.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/pair_gpu_dev_cl.h: pair_gpu_dev_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh pair_gpu_dev_kernel.cu $(OBJ_DIR)/pair_gpu_dev_cl.h
$(OBJ_DIR)/lal_neighbor.o: lal_neighbor.cpp lal_neighbor.h $(OCL_H) lal_neighbor_shared.h
$(OCL) -o $@ -c lal_neighbor.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/pair_gpu_device.o: pair_gpu_device.cpp pair_gpu_device.h $(ALL_H) $(OBJ_DIR)/pair_gpu_dev_cl.h
$(OCL) -o $@ -c pair_gpu_device.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/device_cl.h: lal_device.cu lal_preprocessor.h
$(BSH) ./geryon/file_to_cstr.sh device lal_preprocessor.h lal_device.cu $(OBJ_DIR)/device_cl.h
$(OBJ_DIR)/atomic_gpu_memory.o: $(OCL_H) atomic_gpu_memory.h atomic_gpu_memory.cpp
$(OCL) -o $@ -c atomic_gpu_memory.cpp
$(OBJ_DIR)/lal_device.o: lal_device.cpp lal_device.h $(ALL_H) $(OBJ_DIR)/device_cl.h
$(OCL) -o $@ -c lal_device.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/charge_gpu_memory.o: $(OCL_H) charge_gpu_memory.h charge_gpu_memory.cpp
$(OCL) -o $@ -c charge_gpu_memory.cpp
$(OBJ_DIR)/lal_base_atomic.o: $(OCL_H) lal_base_atomic.h lal_base_atomic.cpp
$(OCL) -o $@ -c lal_base_atomic.cpp
$(OBJ_DIR)/base_ellipsoid.o: $(OCL_H) base_ellipsoid.h base_ellipsoid.cpp $(OBJ_DIR)/ellipsoid_nbor_cl.h
$(OCL) -o $@ -c base_ellipsoid.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_base_charge.o: $(OCL_H) lal_base_charge.h lal_base_charge.cpp
$(OCL) -o $@ -c lal_base_charge.cpp
$(OBJ_DIR)/pppm_gpu_cl.h: pppm_gpu_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh pppm_gpu_kernel.cu $(OBJ_DIR)/pppm_gpu_cl.h;
$(OBJ_DIR)/lal_base_ellipsoid.o: $(OCL_H) lal_base_ellipsoid.h lal_base_ellipsoid.cpp $(OBJ_DIR)/ellipsoid_nbor_cl.h
$(OCL) -o $@ -c lal_base_ellipsoid.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/pppm_gpu_memory.o: $(ALL_H) pppm_gpu_memory.h pppm_gpu_memory.cpp $(OBJ_DIR)/pppm_gpu_cl.h $(OBJ_DIR)/pppm_gpu_cl.h
$(OCL) -o $@ -c pppm_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/pppm_cl.h: lal_pppm.cu lal_preprocessor.h
$(BSH) ./geryon/file_to_cstr.sh pppm lal_preprocessor.h lal_pppm.cu $(OBJ_DIR)/pppm_cl.h;
$(OBJ_DIR)/pppm_l_gpu.o: $(ALL_H) pppm_gpu_memory.h pppm_l_gpu.cpp
$(OCL) -o $@ -c pppm_l_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_pppm.o: $(ALL_H) lal_pppm.h lal_pppm.cpp $(OBJ_DIR)/pppm_cl.h $(OBJ_DIR)/pppm_cl.h
$(OCL) -o $@ -c lal_pppm.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/ellipsoid_nbor_cl.h: ellipsoid_nbor.cu
$(BSH) ./geryon/file_to_cstr.sh ellipsoid_nbor.cu $(OBJ_DIR)/ellipsoid_nbor_cl.h
$(OBJ_DIR)/lal_pppm_ext.o: $(ALL_H) lal_pppm.h lal_pppm_ext.cpp
$(OCL) -o $@ -c lal_pppm_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/gayberne_cl.h: gayberne.cu gayberne_lj.cu ellipsoid_extra.h
cat ellipsoid_extra.h gayberne.cu > $(OBJ_DIR)/gayberne.tar; \
cat ellipsoid_extra.h gayberne_lj.cu > $(OBJ_DIR)/gayberne_lj.tar; \
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/gayberne.tar $(OBJ_DIR)/gayberne_lj.tar $(OBJ_DIR)/gayberne_cl.h; \
rm -f $(OBJ_DIR)/gayberne.tar $(OBJ_DIR)/gayberne_lj.tar
$(OBJ_DIR)/ellipsoid_nbor_cl.h: lal_ellipsoid_nbor.cu lal_preprocessor.h
$(BSH) ./geryon/file_to_cstr.sh ellipsoid_nbor lal_preprocessor.h lal_ellipsoid_nbor.cu $(OBJ_DIR)/ellipsoid_nbor_cl.h
$(OBJ_DIR)/gayberne.o: $(ALL_H) gayberne.h gayberne.cpp $(OBJ_DIR)/gayberne_cl.h $(OBJ_DIR)/base_ellipsoid.o
$(OCL) -o $@ -c gayberne.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/gayberne_cl.h: lal_gayberne.cu lal_ellipsoid_extra.h lal_preprocessor.h
$(BSH) ./geryon/file_to_cstr.sh gayberne lal_preprocessor.h lal_ellipsoid_extra.h lal_gayberne.cu $(OBJ_DIR)/gayberne_cl.h;
$(OBJ_DIR)/gayberne_ext.o: $(ALL_H) $(OBJ_DIR)/gayberne.o gayberne_ext.cpp
$(OCL) -o $@ -c gayberne_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/gayberne_lj_cl.h: lal_gayberne_lj.cu lal_ellipsoid_extra.h lal_preprocessor.h
$(BSH) ./geryon/file_to_cstr.sh gayberne_lj lal_preprocessor.h lal_ellipsoid_extra.h lal_gayberne_lj.cu $(OBJ_DIR)/gayberne_lj_cl.h;
$(OBJ_DIR)/re_squared_cl.h: re_squared.cu re_squared_lj.cu ellipsoid_extra.h
cat ellipsoid_extra.h re_squared.cu > $(OBJ_DIR)/re_squared.tar; \
cat ellipsoid_extra.h re_squared_lj.cu > $(OBJ_DIR)/re_squared_lj.tar; \
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/re_squared.tar $(OBJ_DIR)/re_squared_lj.tar $(OBJ_DIR)/re_squared_cl.h; \
rm -f $(OBJ_DIR)/re_squared.tar $(OBJ_DIR)/re_squared_lj.tar
$(OBJ_DIR)/lal_gayberne.o: $(ALL_H) lal_gayberne.h lal_gayberne.cpp $(OBJ_DIR)/gayberne_cl.h $(OBJ_DIR)/gayberne_lj_cl.h $(OBJ_DIR)/lal_base_ellipsoid.o
$(OCL) -o $@ -c lal_gayberne.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/re_squared.o: $(ALL_H) re_squared.h re_squared.cpp $(OBJ_DIR)/re_squared_cl.h $(OBJ_DIR)/base_ellipsoid.o
$(OCL) -o $@ -c re_squared.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_gayberne_ext.o: $(ALL_H) $(OBJ_DIR)/lal_gayberne.o lal_gayberne_ext.cpp
$(OCL) -o $@ -c lal_gayberne_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/re_squared_ext.o: $(ALL_H) $(OBJ_DIR)/re_squared.o re_squared_ext.cpp
$(OCL) -o $@ -c re_squared_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/re_squared_cl.h: lal_re_squared.cu lal_ellipsoid_extra.h lal_preprocessor.h
$(BSH) ./geryon/file_to_cstr.sh re_squared lal_preprocessor.h lal_ellipsoid_extra.h lal_re_squared.cu $(OBJ_DIR)/re_squared_cl.h;
$(OBJ_DIR)/lj_cut_gpu_cl.h: lj_cut_gpu_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh lj_cut_gpu_kernel.cu $(OBJ_DIR)/lj_cut_gpu_cl.h;
$(OBJ_DIR)/re_squared_lj_cl.h: lal_re_squared_lj.cu lal_ellipsoid_extra.h lal_preprocessor.h
$(BSH) ./geryon/file_to_cstr.sh re_squared_lj lal_preprocessor.h lal_ellipsoid_extra.h lal_re_squared_lj.cu $(OBJ_DIR)/re_squared_lj_cl.h;
$(OBJ_DIR)/lj_cut_gpu_memory.o: $(ALL_H) lj_cut_gpu_memory.h lj_cut_gpu_memory.cpp $(OBJ_DIR)/lj_cut_gpu_cl.h $(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/lj_cut_gpu_cl.h $(OBJ_DIR)/atomic_gpu_memory.o
$(OCL) -o $@ -c lj_cut_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_re_squared.o: $(ALL_H) lal_re_squared.h lal_re_squared.cpp $(OBJ_DIR)/re_squared_cl.h $(OBJ_DIR)/re_squared_lj_cl.h $(OBJ_DIR)/lal_base_ellipsoid.o
$(OCL) -o $@ -c lal_re_squared.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_cut_gpu.o: $(ALL_H) lj_cut_gpu_memory.h lj_cut_gpu.cpp atomic_gpu_memory.h
$(OCL) -o $@ -c lj_cut_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_re_squared_ext.o: $(ALL_H) $(OBJ_DIR)/lal_re_squared.o lal_re_squared_ext.cpp
$(OCL) -o $@ -c lal_re_squared_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/ljc_cut_gpu_cl.h: ljc_cut_gpu_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh ljc_cut_gpu_kernel.cu $(OBJ_DIR)/ljc_cut_gpu_cl.h;
$(OBJ_DIR)/lj_cl.h: lal_lj.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh lj $(PRE1_H) lal_lj.cu $(OBJ_DIR)/lj_cl.h;
$(OBJ_DIR)/ljc_cut_gpu_memory.o: $(ALL_H) ljc_cut_gpu_memory.h ljc_cut_gpu_memory.cpp $(OBJ_DIR)/ljc_cut_gpu_cl.h $(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/ljc_cut_gpu_cl.h $(OBJ_DIR)/charge_gpu_memory.o
$(OCL) -o $@ -c ljc_cut_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj.o: $(ALL_H) lal_lj.h lal_lj.cpp $(OBJ_DIR)/lj_cl.h $(OBJ_DIR)/lj_cl.h $(OBJ_DIR)/lal_base_atomic.o
$(OCL) -o $@ -c lal_lj.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/ljc_cut_gpu.o: $(ALL_H) ljc_cut_gpu_memory.h ljc_cut_gpu.cpp charge_gpu_memory.h
$(OCL) -o $@ -c ljc_cut_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_ext.o: $(ALL_H) lal_lj.h lal_lj_ext.cpp lal_base_atomic.h
$(OCL) -o $@ -c lal_lj_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/ljcl_cut_gpu_cl.h: ljcl_cut_gpu_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh ljcl_cut_gpu_kernel.cu $(OBJ_DIR)/ljcl_cut_gpu_cl.h;
$(OBJ_DIR)/lj_coul_cl.h: lal_lj_coul.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh lj_coul $(PRE1_H) lal_lj_coul.cu $(OBJ_DIR)/lj_coul_cl.h;
$(OBJ_DIR)/ljcl_cut_gpu_memory.o: $(ALL_H) ljcl_cut_gpu_memory.h ljcl_cut_gpu_memory.cpp $(OBJ_DIR)/ljcl_cut_gpu_cl.h $(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/charge_gpu_memory.o
$(OCL) -o $@ -c ljcl_cut_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_coul.o: $(ALL_H) lal_lj_coul.h lal_lj_coul.cpp $(OBJ_DIR)/lj_coul_cl.h $(OBJ_DIR)/lj_coul_cl.h $(OBJ_DIR)/lal_base_charge.o
$(OCL) -o $@ -c lal_lj_coul.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/ljcl_cut_gpu.o: $(ALL_H) ljcl_cut_gpu_memory.h ljcl_cut_gpu.cpp charge_gpu_memory.h
$(OCL) -o $@ -c ljcl_cut_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_coul_ext.o: $(ALL_H) lal_lj_coul.h lal_lj_coul_ext.cpp lal_base_charge.h
$(OCL) -o $@ -c lal_lj_coul_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_class2_long_cl.h: lj_class2_long.cu
$(BSH) ./geryon/file_to_cstr.sh lj_class2_long.cu $(OBJ_DIR)/lj_class2_long_cl.h;
$(OBJ_DIR)/lj_coul_long_cl.h: lal_lj_coul_long.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh lj_coul_long $(PRE1_H) lal_lj_coul_long.cu $(OBJ_DIR)/lj_coul_long_cl.h;
$(OBJ_DIR)/lj_class2_long.o: $(ALL_H) lj_class2_long.h lj_class2_long.cpp $(OBJ_DIR)/lj_class2_long_cl.h $(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/charge_gpu_memory.o
$(OCL) -o $@ -c lj_class2_long.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_coul_long.o: $(ALL_H) lal_lj_coul_long.h lal_lj_coul_long.cpp $(OBJ_DIR)/lj_coul_long_cl.h $(OBJ_DIR)/lal_base_charge.o
$(OCL) -o $@ -c lal_lj_coul_long.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_class2_long_ext.o: $(ALL_H) lj_class2_long.h lj_class2_long_ext.cpp charge_gpu_memory.h
$(OCL) -o $@ -c lj_class2_long_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_coul_long_ext.o: $(ALL_H) lal_lj_coul_long.h lal_lj_coul_long_ext.cpp lal_base_charge.h
$(OCL) -o $@ -c lal_lj_coul_long_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/morse_gpu_cl.h: morse_gpu_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh morse_gpu_kernel.cu $(OBJ_DIR)/morse_gpu_cl.h;
$(OBJ_DIR)/lj_class2_long_cl.h: lal_lj_class2_long.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh lj_class2_long $(PRE1_H) lal_lj_class2_long.cu $(OBJ_DIR)/lj_class2_long_cl.h;
$(OBJ_DIR)/morse_gpu_memory.o: $(ALL_H) morse_gpu_memory.h morse_gpu_memory.cpp $(OBJ_DIR)/morse_gpu_cl.h $(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/morse_gpu_cl.h $(OBJ_DIR)/atomic_gpu_memory.o
$(OCL) -o $@ -c morse_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_class2_long.o: $(ALL_H) lal_lj_class2_long.h lal_lj_class2_long.cpp $(OBJ_DIR)/lj_class2_long_cl.h $(OBJ_DIR)/lal_base_charge.o
$(OCL) -o $@ -c lal_lj_class2_long.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/morse_gpu.o: $(ALL_H) morse_gpu_memory.h morse_gpu.cpp atomic_gpu_memory.h
$(OCL) -o $@ -c morse_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_class2_long_ext.o: $(ALL_H) lal_lj_class2_long.h lal_lj_class2_long_ext.cpp lal_base_charge.h
$(OCL) -o $@ -c lal_lj_class2_long_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/crml_gpu_cl.h: crml_gpu_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh crml_gpu_kernel.cu $(OBJ_DIR)/crml_gpu_cl.h;
$(OBJ_DIR)/coul_long_cl.h: lal_coul_long.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh coul_long $(PRE1_H) lal_coul_long.cu $(OBJ_DIR)/coul_long_cl.h;
$(OBJ_DIR)/crml_gpu_memory.o: $(ALL_H) crml_gpu_memory.h crml_gpu_memory.cpp $(OBJ_DIR)/crml_gpu_cl.h $(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/crml_gpu_cl.h $(OBJ_DIR)/charge_gpu_memory.o
$(OCL) -o $@ -c crml_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_coul_long.o: $(ALL_H) lal_coul_long.h lal_coul_long.cpp $(OBJ_DIR)/coul_long_cl.h $(OBJ_DIR)/lal_base_charge.o
$(OCL) -o $@ -c lal_coul_long.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/crml_gpu.o: $(ALL_H) crml_gpu_memory.h crml_gpu.cpp charge_gpu_memory.h
$(OCL) -o $@ -c crml_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_coul_long_ext.o: $(ALL_H) lal_coul_long.h lal_coul_long_ext.cpp lal_base_charge.h
$(OCL) -o $@ -c lal_coul_long_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj96_cut_gpu_cl.h: lj96_cut_gpu_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh lj96_cut_gpu_kernel.cu $(OBJ_DIR)/lj96_cut_gpu_cl.h;
$(OBJ_DIR)/morse_cl.h: lal_morse.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh morse $(PRE1_H) lal_morse.cu $(OBJ_DIR)/morse_cl.h;
$(OBJ_DIR)/lj96_cut_gpu_memory.o: $(ALL_H) lj96_cut_gpu_memory.h lj96_cut_gpu_memory.cpp $(OBJ_DIR)/lj96_cut_gpu_cl.h $(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/lj96_cut_gpu_cl.h $(OBJ_DIR)/atomic_gpu_memory.o
$(OCL) -o $@ -c lj96_cut_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_morse.o: $(ALL_H) lal_morse.h lal_morse.cpp $(OBJ_DIR)/morse_cl.h $(OBJ_DIR)/morse_cl.h $(OBJ_DIR)/lal_base_atomic.o
$(OCL) -o $@ -c lal_morse.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj96_cut_gpu.o: $(ALL_H) lj96_cut_gpu_memory.h lj96_cut_gpu.cpp atomic_gpu_memory.h
$(OCL) -o $@ -c lj96_cut_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_morse_ext.o: $(ALL_H) lal_morse.h lal_morse_ext.cpp lal_base_atomic.h
$(OCL) -o $@ -c lal_morse_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_expand_gpu_cl.h: lj_expand_gpu_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh lj_expand_gpu_kernel.cu $(OBJ_DIR)/lj_expand_gpu_cl.h;
$(OBJ_DIR)/charmm_long_cl.h: lal_charmm_long.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh charmm_long $(PRE1_H) lal_charmm_long.cu $(OBJ_DIR)/charmm_long_cl.h;
$(OBJ_DIR)/lj_expand_gpu_memory.o: $(ALL_H) lj_expand_gpu_memory.h lj_expand_gpu_memory.cpp $(OBJ_DIR)/lj_expand_gpu_cl.h $(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/lj_expand_gpu_cl.h $(OBJ_DIR)/atomic_gpu_memory.o
$(OCL) -o $@ -c lj_expand_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_charmm_long.o: $(ALL_H) lal_charmm_long.h lal_charmm_long.cpp $(OBJ_DIR)/charmm_long_cl.h $(OBJ_DIR)/charmm_long_cl.h $(OBJ_DIR)/lal_base_charge.o
$(OCL) -o $@ -c lal_charmm_long.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_expand_gpu.o: $(ALL_H) lj_expand_gpu_memory.h lj_expand_gpu.cpp atomic_gpu_memory.h
$(OCL) -o $@ -c lj_expand_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_charmm_long_ext.o: $(ALL_H) lal_charmm_long.h lal_charmm_long_ext.cpp lal_base_charge.h
$(OCL) -o $@ -c lal_charmm_long_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/cmm_cut_gpu_cl.h: cmm_cut_gpu_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh cmm_cut_gpu_kernel.cu $(OBJ_DIR)/cmm_cut_gpu_cl.h;
$(OBJ_DIR)/lj96_cl.h: lal_lj96.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh lj96 $(PRE1_H) lal_lj96.cu $(OBJ_DIR)/lj96_cl.h;
$(OBJ_DIR)/cmm_cut_gpu_memory.o: $(ALL_H) cmm_cut_gpu_memory.h cmm_cut_gpu_memory.cpp $(OBJ_DIR)/cmm_cut_gpu_cl.h $(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/cmm_cut_gpu_cl.h $(OBJ_DIR)/atomic_gpu_memory.o
$(OCL) -o $@ -c cmm_cut_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj96.o: $(ALL_H) lal_lj96.h lal_lj96.cpp $(OBJ_DIR)/lj96_cl.h $(OBJ_DIR)/lj96_cl.h $(OBJ_DIR)/lal_base_atomic.o
$(OCL) -o $@ -c lal_lj96.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/cmm_cut_gpu.o: $(ALL_H) cmm_cut_gpu_memory.h cmm_cut_gpu.cpp atomic_gpu_memory.h
$(OCL) -o $@ -c cmm_cut_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj96_ext.o: $(ALL_H) lal_lj96.h lal_lj96_ext.cpp lal_base_atomic.h
$(OCL) -o $@ -c lal_lj96_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/cmmc_long_gpu_cl.h: cmmc_long_gpu_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh cmmc_long_gpu_kernel.cu $(OBJ_DIR)/cmmc_long_gpu_cl.h;
$(OBJ_DIR)/lj_expand_cl.h: lal_lj_expand.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh lj_expand $(PRE1_H) lal_lj_expand.cu $(OBJ_DIR)/lj_expand_cl.h;
$(OBJ_DIR)/cmmc_long_gpu_memory.o: $(ALL_H) cmmc_long_gpu_memory.h cmmc_long_gpu_memory.cpp $(OBJ_DIR)/cmmc_long_gpu_cl.h $(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/cmmc_long_gpu_cl.h $(OBJ_DIR)/atomic_gpu_memory.o
$(OCL) -o $@ -c cmmc_long_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_expand.o: $(ALL_H) lal_lj_expand.h lal_lj_expand.cpp $(OBJ_DIR)/lj_expand_cl.h $(OBJ_DIR)/lj_expand_cl.h $(OBJ_DIR)/lal_base_atomic.o
$(OCL) -o $@ -c lal_lj_expand.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/cmmc_long_gpu.o: $(ALL_H) cmmc_long_gpu_memory.h cmmc_long_gpu.cpp charge_gpu_memory.h
$(OCL) -o $@ -c cmmc_long_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_lj_expand_ext.o: $(ALL_H) lal_lj_expand.h lal_lj_expand_ext.cpp lal_base_atomic.h
$(OCL) -o $@ -c lal_lj_expand_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/cg_cmm_cl.h: lal_cg_cmm.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh cg_cmm $(PRE1_H) lal_cg_cmm.cu $(OBJ_DIR)/cg_cmm_cl.h;
$(OBJ_DIR)/lal_cg_cmm.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm.cpp $(OBJ_DIR)/cg_cmm_cl.h $(OBJ_DIR)/cg_cmm_cl.h $(OBJ_DIR)/lal_base_atomic.o
$(OCL) -o $@ -c lal_cg_cmm.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_cg_cmm_ext.o: $(ALL_H) lal_cg_cmm.h lal_cg_cmm_ext.cpp lal_base_atomic.h
$(OCL) -o $@ -c lal_cg_cmm_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/cg_cmm_long_cl.h: lal_cg_cmm_long.cu $(PRE1_H)
$(BSH) ./geryon/file_to_cstr.sh cg_cmm_long $(PRE1_H) lal_cg_cmm_long.cu $(OBJ_DIR)/cg_cmm_long_cl.h;
$(OBJ_DIR)/lal_cg_cmm_long.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long.cpp $(OBJ_DIR)/cg_cmm_long_cl.h $(OBJ_DIR)/cg_cmm_long_cl.h $(OBJ_DIR)/lal_base_atomic.o
$(OCL) -o $@ -c lal_cg_cmm_long.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_cg_cmm_long_ext.o: $(ALL_H) lal_cg_cmm_long.h lal_cg_cmm_long_ext.cpp lal_base_charge.h
$(OCL) -o $@ -c lal_cg_cmm_long_ext.cpp -I$(OBJ_DIR)
$(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp
$(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK)

View File

@ -1,3 +1,16 @@
--------------------------------
LAMMPS ACCELERATOR LIBRARY
--------------------------------
W. Michael Brown (ORNL)
Peng Wang (NVIDIA)
Axel Kohlmeyer (Temple)
Steve Plimpton (SNL)
Inderaj Bains (NVIDIA)
-------------------------------------------------------------------
This directory has source files to build a library that LAMMPS
links against when using the GPU package.
@ -20,44 +33,56 @@ SYSPATH is the path(s) to where those libraries are
You must insure these settings are correct for your system, else
the LAMMPS build will likely fail.
-------------------------------------------------------------------------
-------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
Peng Wang (Nvidia), penwang@nvidia.com
Inderaj Bains (NVIDIA), ibains@nvidia.com
Paul Crozier (SNL), pscrozi@sandia.gov
-------------------------------------------------------------------------
GENERAL NOTES
--------------------------------
This library, libgpu.a, provides routines for GPU acceleration
of LAMMPS pair styles. Compilation of this library requires
installing the CUDA GPU driver and CUDA toolkit for your operating
system. In addition to the LAMMPS library, the binary nvc_get_devices
will also be built. This can be used to query the names and
properties of GPU devices on your system. A Makefile for OpenCL
compilation is provided, but support for OpenCL use is not currently
provided by the developers.
of certain LAMMPS styles and neighbor list builds. Compilation of this
library requires installing the CUDA GPU driver and CUDA toolkit for
your operating system. Installation of the CUDA SDK is not necessary.
In addition to the LAMMPS library, the binary nvc_get_devices will also
be built. This can be used to query the names and properties of GPU
devices on your system. A Makefile for OpenCL compilation is provided,
but support for OpenCL use is not currently provided by the developers.
Details of the implementation are provided in:
Brown, W.M., Wang, P. Plimpton, S.J., Tharrington, A.N. Implementing
Molecular Dynamics on Hybrid High Performance Computers - Short Range
Forces. Computer Physics Communications. 2011. 182: p. 898-911.
and
Brown, W.M., Kohlmeyer, A. Plimpton, S.J., Tharrington, A.N. Implementing
Molecular Dynamics on Hybrid High Performance Computers - Particle-Particle
Particle-Mesh. Computer Physics Communications. 2011. In press.
NOTE: Installation of the CUDA SDK is not required.
Current pair styles supporting GPU acceleration:
Current styles supporting GPU acceleration:
1. lj/cut
2. lj96/cut
3. lj/expand
4. lj/cut/coul/cut
5. lj/cut/coul/long
6. lj/charmm/coul/long
7. lj/class2
8. lj/class2/coul/long
9. morse
10. cg/cmm
11. cg/cmm/coul/long
12. coul/long
13. gayberne
14. resquared
15. pppm
1. lj/cut
2. lj96/cut
3. lj/expand
4. lj/cut/coul/cut
5. lj/cut/coul/long
6. lj/charmm/coul/long
7. morse
8. cg/cmm
9. cg/cmm/coul/long
10. coul/long
11. gayberne
12. pppm
MULTIPLE LAMMPS PROCESSES
--------------------------------
Multiple LAMMPS MPI processes can share GPUs on the system, but multiple
GPUs cannot be utilized by a single MPI process. In many cases, the
@ -66,7 +91,9 @@ CPU cores available with the condition that the number of MPI processes
is an integer multiple of the number of GPUs being used. See the
LAMMPS user manual for details on running with GPU acceleration.
BUILDING AND PRECISION MODES
--------------------------------
To build, edit the CUDA_ARCH, CUDA_PRECISION, CUDA_HOME variables in one of
the Makefiles. CUDA_ARCH should be set based on the compute capability of
@ -97,7 +124,7 @@ NOTE: PPPM acceleration can only be run on GPUs with compute capability>=1.1.
NOTE: Double precision is only supported on certain GPUs (with
compute capability>=1.3). If you compile the GPU library for
a GPU with compute capability 1.1 and 1.2, then only single
precistion FFTs are supported, i.e. LAMMPS has to be compiled
precision FFTs are supported, i.e. LAMMPS has to be compiled
with -DFFT_SINGLE. For details on configuring FFT support in
LAMMPS, see http://lammps.sandia.gov/doc/Section_start.html#2_2_4
@ -121,6 +148,7 @@ NOTE: The lj/cut/coul/long/gpu, cg/cmm/coul/long/gpu, coul/long/gpu,
EXAMPLE BUILD PROCESS
--------------------------------
cd ~/lammps/lib/gpu
emacs Makefile.linux

View File

@ -9,39 +9,46 @@
num_args=$#
# we write to a scratch file, since
# we know the real file name only at
# the very end.
output=geryon.tmp.$$
: > $output
# Check command-line arguments
if [ $num_args -gt 9 ]; then
echo "$0 can only take 9 arguments; not $num_args"
exit 1
fi
if [ $num_args -lt 3 ]; then
echo "Not enough arguments."
echo "$0 name_for_string input_file1 input_file2 ... output"
exit 1
fi
# Name is first arg, output file is last argument
string_name=$1
eval output=\${$num_args}
shift
# remove temporary file in case we're interrupted.
cleanup () {
rm -f geryon.tmp.$$
rm -f $output
}
trap cleanup INT QUIT TERM
# loop over arguments and convert to
# string constants.
i=1
# string constant.
i=2
echo "const char * $string_name = " > $output
while [ $i -lt $num_args ]
do \
src=$1
krn=${src##*/}
krn=${krn%.*}
echo "Converting kernel $krn from $src to a c-style string"
echo "const char * $krn = " >> $output
echo "Converting $src to a c-style string"
sed -e 's/\\/\\\\/g' \
-e 's/"/\\"/g' \
-e 's/ *\/\/.*$//' \
-e '/\.file/D' \
-e '/^[ ]*$/D' \
-e 's/^\(.*\)$/"\1\\n"/' $src >> $output
echo ';' >> $output
shift
i=`expr $i + 1`
done
# $1 holds now the real output file name
mv $output $1
echo ';' >> $output

View File

@ -64,7 +64,9 @@ class UCL_Device {
inline int num_devices() { return _properties.size(); }
/// Set the CUDA device to the specified device number
void set(int num);
/** Returns UCL_SUCCESS if successful or UCL_ERROR if the device could not
* be allocated for use **/
int set(int num);
/// Get the current device number
inline int device_num() { return _device; }
@ -160,9 +162,17 @@ class UCL_Device {
/// Return the maximum memory pitch in bytes
inline size_t max_pitch(const int i) { return _properties[i].memPitch; }
/// Returns false if accelerator cannot be shared by multiple processes
/** If it cannot be determined, true is returned **/
inline bool sharing_supported() { return sharing_supported(_device); }
/// Returns false if accelerator cannot be shared by multiple processes
/** If it cannot be determined, true is returned **/
inline bool sharing_supported(const int i)
{ return (_properties[i].computeMode == cudaComputeModeDefault); }
/// List all devices along with all properties
void print_all(std::ostream &out);
private:
int _device, _num_devices;
std::vector<cudaDeviceProp> _properties;
@ -191,13 +201,22 @@ inline UCL_Device::~UCL_Device() {
}
// Set the CUDA device to the specified device number
inline void UCL_Device::set(int num) {
inline int UCL_Device::set(int num) {
if (_device==num)
return;
return UCL_SUCCESS;
for (int i=1; i<num_queues(); i++) pop_command_queue();
cudaThreadExit();
CUDA_SAFE_CALL_NS(cudaSetDevice(_device_ids[num]));
cudaError err=cudaSetDevice(_device_ids[num]);
if (err!=cudaSuccess) {
#ifndef UCL_NO_EXIT
std::cerr << "UCL Error: Could not access accelerator number " << num
<< " for use.\n";
exit(1);
#endif
return UCL_ERROR;
}
_device=num;
return UCL_SUCCESS;
}
// List all devices along with all properties
@ -289,6 +308,10 @@ inline void UCL_Device::print_all(std::ostream &out) {
out << "Exclusive\n"; // only thread can use device
else if (_properties[i].computeMode == cudaComputeModeProhibited)
out << "Prohibited\n"; // no thread can use device
#if CUDART_VERSION >= 4000
else if (_properties[i].computeMode == cudaComputeModeExclusiveProcess)
out << "Exclusive Process\n"; // multiple threads 1 process
#endif
else
out << "Unknown\n";
#endif

View File

@ -54,6 +54,7 @@ struct NVDProperties {
int canMapHostMemory;
int concurrentKernels;
int ECCEnabled;
int computeMode;
};
/// Class for looking at device properties
@ -78,8 +79,10 @@ class UCL_Device {
inline int num_devices() { return _properties.size(); }
/// Set the CUDA device to the specified device number
/** A context and default command queue will be created for the device **/
void set(int num);
/** A context and default command queue will be created for the device
* Returns UCL_SUCCESS if successful or UCL_ERROR if the device could not
* be allocated for use **/
int set(int num);
/// Get the current device number
inline int device_num() { return _device; }
@ -191,6 +194,14 @@ class UCL_Device {
/// Return the maximum memory pitch in bytes
inline size_t max_pitch(const int i) { return _properties[i].p.memPitch; }
/// Returns false if accelerator cannot be shared by multiple processes
/** If it cannot be determined, true is returned **/
inline bool sharing_supported() { return sharing_supported(_device); }
/// Returns false if accelerator cannot be shared by multiple processes
/** If it cannot be determined, true is returned **/
inline bool sharing_supported(const int i)
{ return (_properties[i].computeMode == CU_COMPUTEMODE_DEFAULT); }
/// List all devices along with all properties
void print_all(std::ostream &out);
@ -238,6 +249,8 @@ inline UCL_Device::UCL_Device() {
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
&_properties.back().canMapHostMemory,
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&_properties.back().computeMode,
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE,dev));
#endif
#if CUDA_VERSION >= 3010
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
@ -261,16 +274,25 @@ inline UCL_Device::~UCL_Device() {
}
// Set the CUDA device to the specified device number
inline void UCL_Device::set(int num) {
inline int UCL_Device::set(int num) {
if (_device==num)
return;
return UCL_SUCCESS;
if (_device>-1) {
CU_SAFE_CALL_NS(cuCtxDestroy(_context));
for (int i=1; i<num_queues(); i++) pop_command_queue();
}
_device=_properties[num].device_id;
CU_SAFE_CALL_NS(cuDeviceGet(&_cu_device,_device));
CU_SAFE_CALL_NS(cuCtxCreate(&_context,0,_cu_device));
CUresult err=cuCtxCreate(&_context,0,_cu_device);
if (err!=CUDA_SUCCESS) {
#ifndef UCL_NO_EXIT
std::cerr << "UCL Error: Could not access accelerator number " << num
<< " for use.\n";
exit(1);
#endif
return UCL_ERROR;
}
return UCL_SUCCESS;
}
// List all devices along with all properties
@ -344,6 +366,19 @@ inline void UCL_Device::print_all(std::ostream &out) {
out << "Yes\n";
else
out << "No\n";
out << " Compute mode: ";
if (_properties[i].computeMode == CU_COMPUTEMODE_DEFAULT)
out << "Default\n"; // multiple threads can use device
else if (_properties[i].computeMode == CU_COMPUTEMODE_EXCLUSIVE)
out << "Exclusive\n"; // only thread can use device
else if (_properties[i].computeMode == CU_COMPUTEMODE_PROHIBITED)
out << "Prohibited\n"; // no thread can use device
#if CUDART_VERSION >= 4000
else if (_properties[i].computeMode == CU_COMPUTEMODE_EXCLUSIVE_PROCESS)
out << "Exclusive Process\n"; // multiple threads 1 process
#endif
else
out << "Unknown\n";
#endif
#if CUDA_VERSION >= 3010
out << " Concurrent kernel execution: ";

View File

@ -82,8 +82,10 @@ class UCL_Device {
inline int num_devices() { return _num_devices; }
/// Set the OpenCL device to the specified device number
/** A context and default command queue will be created for the device **/
void set(int num);
/** A context and default command queue will be created for the device *
* Returns UCL_SUCCESS if successful or UCL_ERROR if the device could not
* be allocated for use **/
int set(int num);
/// Get the current device number
inline int device_num() { return _device; }
@ -200,6 +202,14 @@ class UCL_Device {
/// Return the maximum memory pitch in bytes
inline size_t max_pitch(const int i) { return 0; }
/// Returns false if accelerator cannot be shared by multiple processes
/** If it cannot be determined, true is returned **/
inline bool sharing_supported() { return sharing_supported(_device); }
/// Returns false if accelerator cannot be shared by multiple processes
/** If it cannot be determined, true is returned **/
inline bool sharing_supported(const int i)
{ return true; }
/// List all devices along with all properties
void print_all(std::ostream &out);
@ -219,7 +229,7 @@ class UCL_Device {
std::vector<OCLProperties> _properties; // Properties for each device
void add_properties(cl_device_id);
void create_context();
int create_context();
};
@ -272,7 +282,7 @@ inline UCL_Device::~UCL_Device() {
}
}
inline void UCL_Device::create_context() {
inline int UCL_Device::create_context() {
cl_int errorv;
cl_context_properties props[3];
props[0]=CL_CONTEXT_PLATFORM;
@ -280,10 +290,15 @@ inline void UCL_Device::create_context() {
props[2]=0;
_context=clCreateContext(0,1,&_cl_device,NULL,NULL,&errorv);
if (errorv!=CL_SUCCESS) {
std::cerr << "Could not create context on device: " << name() << std::endl;
#ifndef UCL_NO_EXIT
std::cerr << "UCL Error: Could not access accelerator number " << _device
<< " for use.\n";
exit(1);
#endif
return UCL_ERROR;
}
push_command_queue();
return UCL_SUCCESS;
}
inline void UCL_Device::add_properties(cl_device_id device_list) {
@ -375,9 +390,9 @@ inline int UCL_Device::device_type(const int i) {
}
// Set the CUDA device to the specified device number
inline void UCL_Device::set(int num) {
inline int UCL_Device::set(int num) {
if (_device==num)
return;
return UCL_SUCCESS;
if (_device>-1) {
for (size_t i=0; i<_cq.size(); i++) {
@ -394,7 +409,7 @@ inline void UCL_Device::set(int num) {
_device=num;
_cl_device=device_list[_device];
create_context();
return create_context();
}
// List all devices along with all properties

View File

@ -85,6 +85,8 @@ class UCL_Program {
_program=clCreateProgramWithSource(_context,1,&prog,NULL,&error_flag);
CL_CHECK_ERR(error_flag);
error_flag = clBuildProgram(_program,1,&_device,flags,NULL,NULL);
if (error_flag!=-11)
CL_CHECK_ERR(error_flag);
cl_build_status build_status;
CL_SAFE_CALL(clGetProgramBuildInfo(_program,_device,
CL_PROGRAM_BUILD_STATUS,
@ -106,7 +108,8 @@ class UCL_Program {
#ifndef UCL_NO_EXIT
std::cerr << std::endl
<< "----------------------------------------------------------\n"
<< " UCL Error: Error compiling OpenCL Program...\n"
<< " UCL Error: Error compiling OpenCL Program ("
<< build_status << ") ...\n"
<< "----------------------------------------------------------\n";
std::cerr << build_log << std::endl;
#endif

View File

@ -55,10 +55,7 @@ typedef struct _double4 double4;
#define __local __shared__
#define __global
#define atom_add atomicAdd
#ifndef __inline
#define __inline static __inline__ __device__
#endif
#define ucl_inline static __inline__ __device__
#endif