git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6340 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2011-06-08 20:24:29 +00:00
parent 029f0ee874
commit 98ccbe3e69
24 changed files with 173 additions and 113 deletions

View File

@ -43,19 +43,22 @@ CUDPP = $(OBJ_DIR)/cudpp.o $(OBJ_DIR)/cudpp_plan.o \
$(OBJ_DIR)/radixsort_app.cu_o $(OBJ_DIR)/scan_app.cu_o
OBJS = $(OBJ_DIR)/pair_gpu_atom.o $(OBJ_DIR)/pair_gpu_ans.o \
$(OBJ_DIR)/pair_gpu_nbor.o $(OBJ_DIR)/pair_gpu_nbor_shared.o \
$(OBJ_DIR)/pair_gpu_device.o \
$(OBJ_DIR)/atomic_gpu_memory.o $(OBJ_DIR)/charge_gpu_memory.o \
$(OBJ_DIR)/pair_gpu_device.o $(OBJ_DIR)/atomic_gpu_memory.o \
$(OBJ_DIR)/charge_gpu_memory.o $(OBJ_DIR)/base_ellipsoid.o \
$(OBJ_DIR)/pppm_gpu_memory.o $(OBJ_DIR)/pppm_l_gpu.o \
$(OBJ_DIR)/gb_gpu_memory.o $(OBJ_DIR)/gb_gpu.o \
$(OBJ_DIR)/gayberne.o $(OBJ_DIR)/gayberne_ext.o \
$(OBJ_DIR)/re_squared.o $(OBJ_DIR)/re_squared_ext.o \
$(OBJ_DIR)/lj_cut_gpu_memory.o $(OBJ_DIR)/lj_cut_gpu.o \
$(OBJ_DIR)/lj96_cut_gpu_memory.o $(OBJ_DIR)/lj96_cut_gpu.o \
$(OBJ_DIR)/lj_expand_gpu_memory.o $(OBJ_DIR)/lj_expand_gpu.o \
$(OBJ_DIR)/ljc_cut_gpu_memory.o $(OBJ_DIR)/ljc_cut_gpu.o \
$(OBJ_DIR)/ljcl_cut_gpu_memory.o $(OBJ_DIR)/ljcl_cut_gpu.o \
$(OBJ_DIR)/lj_class2_long.o $(OBJ_DIR)/lj_class2_long_ext.o \
$(OBJ_DIR)/morse_gpu_memory.o $(OBJ_DIR)/morse_gpu.o \
$(OBJ_DIR)/crml_gpu_memory.o $(OBJ_DIR)/crml_gpu.o \
$(OBJ_DIR)/cmm_cut_gpu_memory.o $(OBJ_DIR)/cmm_cut_gpu.o \
$(OBJ_DIR)/cmmc_long_gpu_memory.o $(OBJ_DIR)/cmmc_long_gpu.o \
$(OBJ_DIR)/cmmc_msm_gpu_memory.o $(OBJ_DIR)/cmmc_msm_gpu.o \
$(CUDPP)
PTXS = $(OBJ_DIR)/pair_gpu_dev_kernel.ptx \
$(OBJ_DIR)/pair_gpu_atom_kernel.ptx $(OBJ_DIR)/pair_gpu_atom_ptx.h \
@ -63,17 +66,21 @@ PTXS = $(OBJ_DIR)/pair_gpu_dev_kernel.ptx \
$(OBJ_DIR)/pair_gpu_build_kernel.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h \
$(OBJ_DIR)/pppm_f_gpu_kernel.ptx $(OBJ_DIR)/pppm_f_gpu_ptx.h \
$(OBJ_DIR)/pppm_d_gpu_kernel.ptx $(OBJ_DIR)/pppm_d_gpu_ptx.h \
$(OBJ_DIR)/gb_gpu_kernel_nbor.ptx $(OBJ_DIR)/gb_gpu_kernel.ptx \
$(OBJ_DIR)/gb_gpu_kernel_lj.ptx $(OBJ_DIR)/gb_gpu_ptx.h \
$(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/ellipsoid_nbor_ptx.h \
$(OBJ_DIR)/gayberne.ptx $(OBJ_DIR)/gayberne_lj.ptx \
$(OBJ_DIR)/gayberne_ptx.h $(OBJ_DIR)/re_squared.ptx \
$(OBJ_DIR)/re_squared_lj.ptx $(OBJ_DIR)/re_squared_ptx.h \
$(OBJ_DIR)/lj_cut_gpu_kernel.ptx $(OBJ_DIR)/lj_cut_gpu_ptx.h \
$(OBJ_DIR)/lj96_cut_gpu_kernel.ptx $(OBJ_DIR)/lj96_cut_gpu_ptx.h \
$(OBJ_DIR)/lj_expand_gpu_kernel.ptx $(OBJ_DIR)/lj_expand_gpu_ptx.h \
$(OBJ_DIR)/ljc_cut_gpu_kernel.ptx $(OBJ_DIR)/ljc_cut_gpu_ptx.h \
$(OBJ_DIR)/ljcl_cut_gpu_kernel.ptx $(OBJ_DIR)/ljcl_cut_gpu_ptx.h \
$(OBJ_DIR)/lj_class2_long.ptx $(OBJ_DIR)/lj_class2_long_ptx.h \
$(OBJ_DIR)/morse_gpu_kernel.ptx $(OBJ_DIR)/morse_gpu_ptx.h \
$(OBJ_DIR)/crml_gpu_kernel.ptx $(OBJ_DIR)/crml_gpu_ptx.h \
$(OBJ_DIR)/cmm_cut_gpu_kernel.ptx $(OBJ_DIR)/cmm_cut_gpu_ptx.h \
$(OBJ_DIR)/cmmc_long_gpu_kernel.ptx $(OBJ_DIR)/cmmc_long_gpu_ptx.h
$(OBJ_DIR)/cmmc_long_gpu_kernel.ptx $(OBJ_DIR)/cmmc_long_gpu_ptx.h \
$(OBJ_DIR)/cmmc_msm_gpu_kernel.ptx $(OBJ_DIR)/cmmc_msm_gpu_ptx.h
all: $(GPU_LIB) $(EXECS)
@ -140,6 +147,9 @@ $(OBJ_DIR)/atomic_gpu_memory.o: $(ALL_H) atomic_gpu_memory.h atomic_gpu_memory.c
$(OBJ_DIR)/charge_gpu_memory.o: $(ALL_H) charge_gpu_memory.h charge_gpu_memory.cpp
$(CUDR) -o $@ -c charge_gpu_memory.cpp
$(OBJ_DIR)/base_ellipsoid.o: $(ALL_H) base_ellipsoid.h base_ellipsoid.cpp $(OBJ_DIR)/ellipsoid_nbor_ptx.h
$(CUDR) -o $@ -c base_ellipsoid.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/pppm_f_gpu_kernel.ptx: pppm_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -Dgrdtyp=float -Dgrdtyp4=float4 -o $@ pppm_gpu_kernel.cu
@ -158,23 +168,41 @@ $(OBJ_DIR)/pppm_gpu_memory.o: $(ALL_H) pppm_gpu_memory.h pppm_gpu_memory.cpp $(O
$(OBJ_DIR)/pppm_l_gpu.o: $(ALL_H) pppm_gpu_memory.h pppm_l_gpu.cpp
$(CUDR) -o $@ -c pppm_l_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/gb_gpu_kernel.ptx: gb_gpu_kernel.cu pair_gpu_precision.h gb_gpu_extra.h
$(CUDA) --ptx -DNV_KERNEL -o $@ gb_gpu_kernel.cu
$(OBJ_DIR)/ellipsoid_nbor.ptx: ellipsoid_nbor.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ ellipsoid_nbor.cu
$(OBJ_DIR)/gb_gpu_kernel_lj.ptx: gb_gpu_kernel_lj.cu pair_gpu_precision.h gb_gpu_extra.h
$(CUDA) --ptx -DNV_KERNEL -o $@ gb_gpu_kernel_lj.cu
$(OBJ_DIR)/ellipsoid_nbor_ptx.h: $(OBJ_DIR)/ellipsoid_nbor.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/ellipsoid_nbor_ptx.h
$(OBJ_DIR)/gb_gpu_kernel_nbor.ptx: gb_gpu_kernel_nbor.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ gb_gpu_kernel_nbor.cu
$(OBJ_DIR)/gayberne.ptx: gayberne.cu pair_gpu_precision.h ellipsoid_extra.h
$(CUDA) --ptx -DNV_KERNEL -o $@ gayberne.cu
$(OBJ_DIR)/gb_gpu_ptx.h: $(OBJ_DIR)/gb_gpu_kernel_nbor.ptx $(OBJ_DIR)/gb_gpu_kernel.ptx $(OBJ_DIR)/gb_gpu_kernel_lj.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/gb_gpu_kernel_nbor.ptx $(OBJ_DIR)/gb_gpu_kernel.ptx $(OBJ_DIR)/gb_gpu_kernel_lj.ptx $(OBJ_DIR)/gb_gpu_ptx.h
$(OBJ_DIR)/gayberne_lj.ptx: gayberne_lj.cu pair_gpu_precision.h ellipsoid_extra.h
$(CUDA) --ptx -DNV_KERNEL -o $@ gayberne_lj.cu
$(OBJ_DIR)/gb_gpu_memory.o: $(ALL_H) gb_gpu_memory.h gb_gpu_memory.cpp $(OBJ_DIR)/gb_gpu_ptx.h
$(CUDR) -o $@ -c gb_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/gayberne_ptx.h: $(OBJ_DIR)/gayberne.ptx $(OBJ_DIR)/gayberne_lj.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/gayberne.ptx $(OBJ_DIR)/gayberne_lj.ptx $(OBJ_DIR)/gayberne_ptx.h
$(OBJ_DIR)/gb_gpu.o: $(ALL_H) gb_gpu_memory.h gb_gpu.cpp
$(CUDR) -o $@ -c gb_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/gayberne.o: $(ALL_H) gayberne.h gayberne.cpp $(OBJ_DIR)/gayberne_ptx.h $(OBJ_DIR)/base_ellipsoid.o
$(CUDR) -o $@ -c gayberne.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/gayberne_ext.o: $(ALL_H) $(OBJ_DIR)/gayberne.o gayberne_ext.cpp
$(CUDR) -o $@ -c gayberne_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/re_squared.ptx: re_squared.cu pair_gpu_precision.h ellipsoid_extra.h
$(CUDA) --ptx -DNV_KERNEL -o $@ re_squared.cu
$(OBJ_DIR)/re_squared_lj.ptx: re_squared_lj.cu pair_gpu_precision.h ellipsoid_extra.h
$(CUDA) --ptx -DNV_KERNEL -o $@ re_squared_lj.cu
$(OBJ_DIR)/re_squared_ptx.h: $(OBJ_DIR)/re_squared.ptx $(OBJ_DIR)/re_squared_lj.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/re_squared.ptx $(OBJ_DIR)/re_squared_lj.ptx $(OBJ_DIR)/re_squared_ptx.h
$(OBJ_DIR)/re_squared.o: $(ALL_H) re_squared.h re_squared.cpp $(OBJ_DIR)/re_squared_ptx.h $(OBJ_DIR)/base_ellipsoid.o
$(CUDR) -o $@ -c re_squared.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/re_squared_ext.o: $(ALL_H) $(OBJ_DIR)/re_squared.o re_squared_ext.cpp
$(CUDR) -o $@ -c re_squared_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_cut_gpu_kernel.ptx: lj_cut_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lj_cut_gpu_kernel.cu
@ -200,6 +228,18 @@ $(OBJ_DIR)/ljc_cut_gpu_memory.o: $(ALL_H) ljc_cut_gpu_memory.h ljc_cut_gpu_memor
$(OBJ_DIR)/ljc_cut_gpu.o: $(ALL_H) ljc_cut_gpu_memory.h ljc_cut_gpu.cpp charge_gpu_memory.h
$(CUDR) -o $@ -c ljc_cut_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_class2_long.ptx: lj_class2_long.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ lj_class2_long.cu
$(OBJ_DIR)/lj_class2_long_ptx.h: $(OBJ_DIR)/lj_class2_long.ptx $(OBJ_DIR)/lj_class2_long.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/lj_class2_long.ptx $(OBJ_DIR)/lj_class2_long_ptx.h
$(OBJ_DIR)/lj_class2_long.o: $(ALL_H) lj_class2_long.h lj_class2_long.cpp $(OBJ_DIR)/lj_class2_long_ptx.h $(OBJ_DIR)/charge_gpu_memory.o
$(CUDR) -o $@ -c lj_class2_long.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_class2_long_ext.o: $(ALL_H) lj_class2_long.h lj_class2_long_ext.cpp charge_gpu_memory.h
$(CUDR) -o $@ -c lj_class2_long_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/ljcl_cut_gpu_kernel.ptx: ljcl_cut_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ ljcl_cut_gpu_kernel.cu
@ -284,6 +324,18 @@ $(OBJ_DIR)/cmmc_long_gpu_memory.o: $(ALL_H) cmmc_long_gpu_memory.h cmmc_long_gpu
$(OBJ_DIR)/cmmc_long_gpu.o: $(ALL_H) cmmc_long_gpu_memory.h cmmc_long_gpu.cpp charge_gpu_memory.h
$(CUDR) -o $@ -c cmmc_long_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/cmmc_msm_gpu_kernel.ptx: cmmc_msm_gpu_kernel.cu pair_gpu_precision.h
$(CUDA) --ptx -DNV_KERNEL -o $@ cmmc_msm_gpu_kernel.cu
$(OBJ_DIR)/cmmc_msm_gpu_ptx.h: $(OBJ_DIR)/cmmc_msm_gpu_kernel.ptx $(OBJ_DIR)/cmmc_msm_gpu_kernel.ptx
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/cmmc_msm_gpu_kernel.ptx $(OBJ_DIR)/cmmc_msm_gpu_ptx.h
$(OBJ_DIR)/cmmc_msm_gpu_memory.o: $(ALL_H) cmmc_msm_gpu_memory.h cmmc_msm_gpu_memory.cpp $(OBJ_DIR)/cmmc_msm_gpu_ptx.h $(OBJ_DIR)/atomic_gpu_memory.o
$(CUDR) -o $@ -c cmmc_msm_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/cmmc_msm_gpu.o: $(ALL_H) cmmc_msm_gpu_memory.h cmmc_msm_gpu.cpp charge_gpu_memory.h
$(CUDR) -o $@ -c cmmc_msm_gpu.cpp -I$(OBJ_DIR)
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVC_H)
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDART $(CUDA_LINK)

View File

@ -33,25 +33,29 @@ ALL_H = $(OCL_H) $(PAIR_H)
EXECS = $(BIN_DIR)/ocl_get_devices
OBJS = $(OBJ_DIR)/pair_gpu_atom.o $(OBJ_DIR)/pair_gpu_ans.o \
$(OBJ_DIR)/pair_gpu_nbor_shared.o $(OBJ_DIR)/pair_gpu_nbor.o \
$(OBJ_DIR)/pair_gpu_device.o \
$(OBJ_DIR)/atomic_gpu_memory.o $(OBJ_DIR)/charge_gpu_memory.o \
$(OBJ_DIR)/pair_gpu_device.o $(OBJ_DIR)/atomic_gpu_memory.o \
$(OBJ_DIR)/charge_gpu_memory.o $(OBJ_DIR)/base_ellipsoid.o \
$(OBJ_DIR)/pppm_gpu_memory.o $(OBJ_DIR)/pppm_l_gpu.o \
$(OBJ_DIR)/gb_gpu_memory.o $(OBJ_DIR)/gb_gpu.o \
$(OBJ_DIR)/gayberne.o $(OBJ_DIR)/gayberne_ext.o \
$(OBJ_DIR)/re_squared.o $(OBJ_DIR)/re_squared_ext.o \
$(OBJ_DIR)/lj_cut_gpu_memory.o $(OBJ_DIR)/lj_cut_gpu.o \
$(OBJ_DIR)/lj96_cut_gpu_memory.o $(OBJ_DIR)/lj96_cut_gpu.o \
$(OBJ_DIR)/lj_expand_gpu_memory.o $(OBJ_DIR)/lj_expand_gpu.o \
$(OBJ_DIR)/ljc_cut_gpu_memory.o $(OBJ_DIR)/ljc_cut_gpu.o \
$(OBJ_DIR)/ljcl_cut_gpu_memory.o $(OBJ_DIR)/ljcl_cut_gpu.o \
$(OBJ_DIR)/lj_class2_long.o $(OBJ_DIR)/lj_class2_long_ext.o \
$(OBJ_DIR)/morse_gpu_memory.o $(OBJ_DIR)/morse_gpu.o \
$(OBJ_DIR)/crml_gpu_memory.o $(OBJ_DIR)/crml_gpu.o \
$(OBJ_DIR)/cmm_cut_gpu_memory.o $(OBJ_DIR)/cmm_cut_gpu.o \
$(OBJ_DIR)/cmmc_long_gpu_memory.o $(OBJ_DIR)/cmmc_long_gpu.o
KERS = $(OBJ_DIR)/pair_gpu_dev_cl.h $(OBJ_DIR)/pair_gpu_atom_cl.h \
$(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/pppm_gpu_cl.h \
$(OBJ_DIR)/gb_gpu_nbor_cl.h $(OBJ_DIR)/gb_gpu_cl.h \
$(OBJ_DIR)/ellipsoid_nbor_cl.h $(OBJ_DIR)/gayberne_cl.h \
$(OBJ_DIR)/re_squared_cl.h \
$(OBJ_DIR)/lj_cut_gpu_cl.h $(OBJ_DIR)/lj96_cut_gpu_cl.h \
$(OBJ_DIR)/lj_expand_gpu_cl.h $(OBJ_DIR)/ljc_cut_gpu_cl.h \
$(OBJ_DIR)/ljcl_cut_gpu_cl.h $(OBJ_DIR)/morse_gpu_cl.h \
$(OBJ_DIR)/ljcl_cut_gpu_cl.h $(OBJ_DIR)/lj_class2_long_cl.h \
$(OBJ_DIR)/morse_gpu_cl.h \
$(OBJ_DIR)/crml_gpu_cl.h $(OBJ_DIR)/cmm_cut_gpu_cl.h \
$(OBJ_DIR)/cmmc_long_gpu_cl.h
@ -89,6 +93,9 @@ $(OBJ_DIR)/atomic_gpu_memory.o: $(OCL_H) atomic_gpu_memory.h atomic_gpu_memory.c
$(OBJ_DIR)/charge_gpu_memory.o: $(OCL_H) charge_gpu_memory.h charge_gpu_memory.cpp
$(OCL) -o $@ -c charge_gpu_memory.cpp
$(OBJ_DIR)/base_ellipsoid.o: $(OCL_H) base_ellipsoid.h base_ellipsoid.cpp $(OBJ_DIR)/ellipsoid_nbor_cl.h
$(OCL) -o $@ -c base_ellipsoid.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/pppm_gpu_cl.h: pppm_gpu_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh pppm_gpu_kernel.cu $(OBJ_DIR)/pppm_gpu_cl.h;
@ -98,20 +105,32 @@ $(OBJ_DIR)/pppm_gpu_memory.o: $(ALL_H) pppm_gpu_memory.h pppm_gpu_memory.cpp $(
$(OBJ_DIR)/pppm_l_gpu.o: $(ALL_H) pppm_gpu_memory.h pppm_l_gpu.cpp
$(OCL) -o $@ -c pppm_l_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/gb_gpu_nbor_cl.h: gb_gpu_kernel_nbor.cu
$(BSH) ./geryon/file_to_cstr.sh gb_gpu_kernel_nbor.cu $(OBJ_DIR)/gb_gpu_nbor_cl.h
$(OBJ_DIR)/ellipsoid_nbor_cl.h: ellipsoid_nbor.cu
$(BSH) ./geryon/file_to_cstr.sh ellipsoid_nbor.cu $(OBJ_DIR)/ellipsoid_nbor_cl.h
$(OBJ_DIR)/gb_gpu_cl.h: gb_gpu_kernel.cu gb_gpu_kernel_lj.cu gb_gpu_extra.h
cat gb_gpu_extra.h gb_gpu_kernel.cu > $(OBJ_DIR)/gb_gpu_kernel.tar; \
cat gb_gpu_extra.h gb_gpu_kernel_lj.cu > $(OBJ_DIR)/gb_gpu_kernel_lj.tar; \
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/gb_gpu_kernel.tar $(OBJ_DIR)/gb_gpu_kernel_lj.tar $(OBJ_DIR)/gb_gpu_cl.h; \
rm -f $(OBJ_DIR)/gb_gpu_kernel.tar $(OBJ_DIR)/gb_gpu_kernel_lj.tar
$(OBJ_DIR)/gayberne_cl.h: gayberne.cu gayberne_lj.cu ellipsoid_extra.h
cat ellipsoid_extra.h gayberne.cu > $(OBJ_DIR)/gayberne.tar; \
cat ellipsoid_extra.h gayberne_lj.cu > $(OBJ_DIR)/gayberne_lj.tar; \
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/gayberne.tar $(OBJ_DIR)/gayberne_lj.tar $(OBJ_DIR)/gayberne_cl.h; \
rm -f $(OBJ_DIR)/gayberne.tar $(OBJ_DIR)/gayberne_lj.tar
$(OBJ_DIR)/gb_gpu_memory.o: $(ALL_H) gb_gpu_memory.h gb_gpu_memory.cpp $(OBJ_DIR)/gb_gpu_nbor_cl.h $(OBJ_DIR)/gb_gpu_cl.h
$(OCL) -o $@ -c gb_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/gayberne.o: $(ALL_H) gayberne.h gayberne.cpp $(OBJ_DIR)/gayberne_cl.h $(OBJ_DIR)/base_ellipsoid.o
$(OCL) -o $@ -c gayberne.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/gb_gpu.o: $(ALL_H) gb_gpu_memory.h gb_gpu.cpp
$(OCL) -o $@ -c gb_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/gayberne_ext.o: $(ALL_H) $(OBJ_DIR)/gayberne.o gayberne_ext.cpp
$(OCL) -o $@ -c gayberne_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/re_squared_cl.h: re_squared.cu re_squared_lj.cu ellipsoid_extra.h
cat ellipsoid_extra.h re_squared.cu > $(OBJ_DIR)/re_squared.tar; \
cat ellipsoid_extra.h re_squared_lj.cu > $(OBJ_DIR)/re_squared_lj.tar; \
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/re_squared.tar $(OBJ_DIR)/re_squared_lj.tar $(OBJ_DIR)/re_squared_cl.h; \
rm -f $(OBJ_DIR)/re_squared.tar $(OBJ_DIR)/re_squared_lj.tar
$(OBJ_DIR)/re_squared.o: $(ALL_H) re_squared.h re_squared.cpp $(OBJ_DIR)/re_squared_cl.h $(OBJ_DIR)/base_ellipsoid.o
$(OCL) -o $@ -c re_squared.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/re_squared_ext.o: $(ALL_H) $(OBJ_DIR)/re_squared.o re_squared_ext.cpp
$(OCL) -o $@ -c re_squared_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_cut_gpu_cl.h: lj_cut_gpu_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh lj_cut_gpu_kernel.cu $(OBJ_DIR)/lj_cut_gpu_cl.h;
@ -134,12 +153,21 @@ $(OBJ_DIR)/ljc_cut_gpu.o: $(ALL_H) ljc_cut_gpu_memory.h ljc_cut_gpu.cpp charge_g
$(OBJ_DIR)/ljcl_cut_gpu_cl.h: ljcl_cut_gpu_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh ljcl_cut_gpu_kernel.cu $(OBJ_DIR)/ljcl_cut_gpu_cl.h;
$(OBJ_DIR)/ljcl_cut_gpu_memory.o: $(ALL_H) ljcl_cut_gpu_memory.h ljcl_cut_gpu_memory.cpp $(OBJ_DIR)/ljcl_cut_gpu_cl.h $(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/ljcl_cut_gpu_cl.h $(OBJ_DIR)/charge_gpu_memory.o
$(OBJ_DIR)/ljcl_cut_gpu_memory.o: $(ALL_H) ljcl_cut_gpu_memory.h ljcl_cut_gpu_memory.cpp $(OBJ_DIR)/ljcl_cut_gpu_cl.h $(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/charge_gpu_memory.o
$(OCL) -o $@ -c ljcl_cut_gpu_memory.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/ljcl_cut_gpu.o: $(ALL_H) ljcl_cut_gpu_memory.h ljcl_cut_gpu.cpp charge_gpu_memory.h
$(OCL) -o $@ -c ljcl_cut_gpu.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_class2_long_cl.h: lj_class2_long.cu
$(BSH) ./geryon/file_to_cstr.sh lj_class2_long.cu $(OBJ_DIR)/lj_class2_long_cl.h;
$(OBJ_DIR)/lj_class2_long.o: $(ALL_H) lj_class2_long.h lj_class2_long.cpp $(OBJ_DIR)/lj_class2_long_cl.h $(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/charge_gpu_memory.o
$(OCL) -o $@ -c lj_class2_long.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lj_class2_long_ext.o: $(ALL_H) lj_class2_long.h lj_class2_long_ext.cpp charge_gpu_memory.h
$(OCL) -o $@ -c lj_class2_long_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/morse_gpu_cl.h: morse_gpu_kernel.cu
$(BSH) ./geryon/file_to_cstr.sh morse_gpu_kernel.cu $(OBJ_DIR)/morse_gpu_cl.h;

View File

@ -84,7 +84,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
__global numtyp *sp_lj_in, __global int *dev_nbor,
__global int *dev_packed, __global acctyp4 *ans,
__global acctyp *engv, const int eflag,
const int vflag, const int inum, const int nall,
const int vflag, const int inum,
const int nbor_pitch, const int t_per_atom) {
int tid=THREAD_ID_X;
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
@ -241,7 +241,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
__global numtyp* sp_lj_in,__global int *dev_nbor,
__global int *dev_packed, __global acctyp4 *ans,
__global acctyp *engv, const int eflag,
const int vflag, const int inum, const int nall,
const int vflag, const int inum,
const int nbor_pitch, const int t_per_atom) {
int tid=THREAD_ID_X;
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);

View File

@ -130,7 +130,6 @@ void CMM_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
(BX/this->_threads_per_atom)));
int ainum=this->ans->inum();
int anall=this->atom->nall();
int nbor_pitch=this->nbor->nbor_pitch();
this->time_pair.start();
if (shared_types) {
@ -141,15 +140,14 @@ void CMM_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
&this->_nbor_data->begin(),
&this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag,
&ainum, &anall, &nbor_pitch,
&this->_threads_per_atom);
&ainum, &nbor_pitch, &this->_threads_per_atom);
} else {
this->k_pair.set_size(GX,BX);
this->k_pair.run(&this->atom->dev_x.begin(), &lj1.begin(), &lj3.begin(),
&_cmm_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
&anall, &nbor_pitch, &this->_threads_per_atom);
&nbor_pitch, &this->_threads_per_atom);
}
this->time_pair.stop();
}

View File

@ -102,7 +102,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
__global numtyp *sp_lj_in, __global int *dev_nbor,
__global int *dev_packed, __global acctyp4 *ans,
__global acctyp *engv, const int eflag,
const int vflag, const int inum, const int nall,
const int vflag, const int inum,
const int nbor_pitch, __global numtyp *q_ ,
const numtyp cut_coulsq, const numtyp qqrd2e,
const numtyp g_ewald, const int t_per_atom) {
@ -295,10 +295,9 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
__global int *dev_nbor, __global int *dev_packed,
__global acctyp4 *ans, __global acctyp *engv,
const int eflag, const int vflag, const int inum,
const int nall, const int nbor_pitch,
__global numtyp *q_ , const numtyp cut_coulsq,
const numtyp qqrd2e, const numtyp g_ewald,
const int t_per_atom) {
const int nbor_pitch, __global numtyp *q_,
const numtyp cut_coulsq, const numtyp qqrd2e,
const numtyp g_ewald, const int t_per_atom) {
int tid=THREAD_ID_X;
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
ii+=tid/t_per_atom;

View File

@ -141,7 +141,6 @@ void CMML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
(BX/this->_threads_per_atom)));
int ainum=this->ans->inum();
int anall=this->atom->nall();
int nbor_pitch=this->nbor->nbor_pitch();
this->time_pair.start();
if (shared_types) {
@ -152,7 +151,7 @@ void CMML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
&this->_nbor_data->begin(),
&this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag,
&ainum, &anall, &nbor_pitch,
&ainum, &nbor_pitch,
&this->atom->dev_q.begin(), &_cut_coulsq,
&_qqrd2e, &_g_ewald, &this->_threads_per_atom);
} else {
@ -161,7 +160,7 @@ void CMML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
&_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
&anall, &nbor_pitch, &this->atom->dev_q.begin(),
&nbor_pitch, &this->atom->dev_q.begin(),
&_cut_coulsq, &_qqrd2e, &_g_ewald,
&this->_threads_per_atom);
}

View File

@ -103,12 +103,11 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
__global int *dev_nbor, __global int *dev_packed,
__global acctyp4 *ans, __global acctyp *engv,
const int eflag, const int vflag, const int inum,
const int nall, const int nbor_pitch,
__global numtyp *q_, const numtyp cut_coulsq,
const numtyp qqrd2e, const numtyp g_ewald,
const numtyp denom_lj, const numtyp cut_bothsq,
const numtyp cut_ljsq, const numtyp cut_lj_innersq,
const int t_per_atom) {
const int nbor_pitch, __global numtyp *q_,
const numtyp cut_coulsq, const numtyp qqrd2e,
const numtyp g_ewald, const numtyp denom_lj,
const numtyp cut_bothsq, const numtyp cut_ljsq,
const numtyp cut_lj_innersq, const int t_per_atom) {
int tid=THREAD_ID_X;
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
@ -300,7 +299,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp2 *ljd_in,
__global numtyp* sp_lj_in, __global int *dev_nbor,
__global int *dev_packed, __global acctyp4 *ans,
__global acctyp *engv, const int eflag,
const int vflag, const int inum, const int nall,
const int vflag, const int inum,
const int nbor_pitch, __global numtyp *q_,
const numtyp cut_coulsq, const numtyp qqrd2e,
const numtyp g_ewald, const numtyp denom_lj,

View File

@ -145,7 +145,6 @@ void CRML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
(BX/this->_threads_per_atom)));
int ainum=this->ans->inum();
int anall=this->atom->nall();
int nbor_pitch=this->nbor->nbor_pitch();
this->time_pair.start();
if (shared_types) {
@ -155,10 +154,9 @@ void CRML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
&this->_nbor_data->begin(),
&this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag,
&ainum, &anall, &nbor_pitch,
&this->atom->dev_q.begin(), &_cut_coulsq,
&_qqrd2e, &_g_ewald, &_denom_lj, &_cut_bothsq,
&_cut_ljsq, &_cut_lj_innersq,
&ainum, &nbor_pitch, &this->atom->dev_q.begin(),
&_cut_coulsq, &_qqrd2e, &_g_ewald, &_denom_lj,
&_cut_bothsq, &_cut_ljsq, &_cut_lj_innersq,
&this->_threads_per_atom);
} else {
this->k_pair.set_size(GX,BX);
@ -166,7 +164,7 @@ void CRML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
&_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
&anall, &nbor_pitch, &this->atom->dev_q.begin(),
&nbor_pitch, &this->atom->dev_q.begin(),
&_cut_coulsq, &_qqrd2e, &_g_ewald, &_denom_lj,
&_cut_bothsq, &_cut_ljsq, &_cut_lj_innersq,
&this->_threads_per_atom);

View File

@ -54,8 +54,11 @@ typedef struct _double4 double4;
#define __kernel extern "C" __global__
#define __local __shared__
#define __global
#define __inline static __inline__ __device__
#define atom_add atomicAdd
#ifndef __inline
#define __inline static __inline__ __device__
#endif
#endif

View File

@ -84,7 +84,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
__global numtyp *sp_lj_in, __global int *dev_nbor,
__global int *dev_packed, __global acctyp4 *ans,
__global acctyp *engv, const int eflag,
const int vflag, const int inum, const int nall,
const int vflag, const int inum,
const int nbor_pitch, const int t_per_atom) {
int tid=THREAD_ID_X;
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
@ -234,8 +234,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
__global int *dev_nbor, __global int *dev_packed,
__global acctyp4 *ans, __global acctyp *engv,
const int eflag, const int vflag, const int inum,
const int nall, const int nbor_pitch,
const int t_per_atom) {
const int nbor_pitch, const int t_per_atom) {
int tid=THREAD_ID_X;
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
ii+=tid/t_per_atom;

View File

@ -130,7 +130,6 @@ void LJ96_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
(BX/this->_threads_per_atom)));
int ainum=this->ans->inum();
int anall=this->atom->nall();
int nbor_pitch=this->nbor->nbor_pitch();
this->time_pair.start();
if (shared_types) {
@ -141,15 +140,14 @@ void LJ96_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
&this->_nbor_data->begin(),
&this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag,
&ainum, &anall, &nbor_pitch,
&this->_threads_per_atom);
&ainum, &nbor_pitch, &this->_threads_per_atom);
} else {
this->k_pair.set_size(GX,BX);
this->k_pair.run(&this->atom->dev_x.begin(), &lj1.begin(), &lj3.begin(),
&_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
&anall, &nbor_pitch, &this->_threads_per_atom);
&nbor_pitch, &this->_threads_per_atom);
}
this->time_pair.stop();
}

View File

@ -84,7 +84,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
__global numtyp *sp_lj_in, __global int *dev_nbor,
__global int *dev_packed, __global acctyp4 *ans,
__global acctyp *engv, const int eflag,
const int vflag, const int inum, const int nall,
const int vflag, const int inum,
const int nbor_pitch, const int t_per_atom) {
int tid=THREAD_ID_X;
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
@ -233,8 +233,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
__global int *dev_nbor, __global int *dev_packed,
__global acctyp4 *ans, __global acctyp *engv,
const int eflag, const int vflag, const int inum,
const int nall, const int nbor_pitch,
const int t_per_atom) {
const int nbor_pitch, const int t_per_atom) {
int tid=THREAD_ID_X;
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
ii+=tid/t_per_atom;

View File

@ -130,7 +130,6 @@ void LJL_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
(BX/this->_threads_per_atom)));
int ainum=this->ans->inum();
int anall=this->atom->nall();
int nbor_pitch=this->nbor->nbor_pitch();
this->time_pair.start();
if (shared_types) {
@ -141,15 +140,14 @@ void LJL_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
&this->_nbor_data->begin(),
&this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag,
&ainum, &anall, &nbor_pitch,
&this->_threads_per_atom);
&ainum, &nbor_pitch, &this->_threads_per_atom);
} else {
this->k_pair.set_size(GX,BX);
this->k_pair.run(&this->atom->dev_x.begin(), &lj1.begin(), &lj3.begin(),
&_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
&anall, &nbor_pitch, &this->_threads_per_atom);
&nbor_pitch, &this->_threads_per_atom);
}
this->time_pair.stop();
}

View File

@ -84,7 +84,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
__global numtyp *sp_lj_in, __global int *dev_nbor,
__global int *dev_packed, __global acctyp4 *ans,
__global acctyp *engv, const int eflag,
const int vflag, const int inum, const int nall,
const int vflag, const int inum,
const int nbor_pitch, const int t_per_atom) {
int tid=THREAD_ID_X;
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
@ -236,8 +236,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
__global int *dev_nbor, __global int *dev_packed,
__global acctyp4 *ans, __global acctyp *engv,
const int eflag, const int vflag, const int inum,
const int nall, const int nbor_pitch,
const int t_per_atom) {
const int nbor_pitch, const int t_per_atom) {
int tid=THREAD_ID_X;
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
ii+=tid/t_per_atom;

View File

@ -130,7 +130,6 @@ void LJE_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
(BX/this->_threads_per_atom)));
int ainum=this->ans->inum();
int anall=this->atom->nall();
int nbor_pitch=this->nbor->nbor_pitch();
this->time_pair.start();
if (shared_types) {
@ -141,15 +140,14 @@ void LJE_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
&this->_nbor_data->begin(),
&this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag,
&ainum, &anall, &nbor_pitch,
&this->_threads_per_atom);
&ainum, &nbor_pitch, &this->_threads_per_atom);
} else {
this->k_pair.set_size(GX,BX);
this->k_pair.run(&this->atom->dev_x.begin(), &lj1.begin(), &lj3.begin(),
&_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
&anall, &nbor_pitch, &this->_threads_per_atom);
&nbor_pitch, &this->_threads_per_atom);
}
this->time_pair.stop();
}

View File

@ -94,7 +94,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
__global numtyp *sp_lj_in, __global int *dev_nbor,
__global int *dev_packed, __global acctyp4 *ans,
__global acctyp *engv, const int eflag,
const int vflag, const int inum, const int nall,
const int vflag, const int inum,
const int nbor_pitch, __global numtyp *q_ ,
__global numtyp *cutsq, const numtyp qqrd2e,
const int t_per_atom) {
@ -270,9 +270,9 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
__global int *dev_nbor, __global int *dev_packed,
__global acctyp4 *ans, __global acctyp *engv,
const int eflag, const int vflag, const int inum,
const int nall, const int nbor_pitch,
__global numtyp *q_ , __global numtyp *_cutsq,
const numtyp qqrd2e, const int t_per_atom) {
const int nbor_pitch, __global numtyp *q_,
__global numtyp *_cutsq, const numtyp qqrd2e,
const int t_per_atom) {
int tid=THREAD_ID_X;
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
ii+=tid/t_per_atom;

View File

@ -142,7 +142,6 @@ void LJC_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
(BX/this->_threads_per_atom)));
int ainum=this->ans->inum();
int anall=this->atom->nall();
int nbor_pitch=this->nbor->nbor_pitch();
this->time_pair.start();
if (shared_types) {
@ -153,7 +152,7 @@ void LJC_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
&this->_nbor_data->begin(),
&this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag,
&ainum, &anall, &nbor_pitch,
&ainum, &nbor_pitch,
&this->atom->dev_q.begin(), &cutsq.begin(),
&_qqrd2e, &this->_threads_per_atom);
} else {
@ -162,7 +161,7 @@ void LJC_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
&_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
&anall, &nbor_pitch, &this->atom->dev_q.begin(),
&nbor_pitch, &this->atom->dev_q.begin(),
&cutsq.begin(), &_qqrd2e, &this->_threads_per_atom);
}
this->time_pair.stop();

View File

@ -102,7 +102,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
__global numtyp *sp_lj_in, __global int *dev_nbor,
__global int *dev_packed, __global acctyp4 *ans,
__global acctyp *engv, const int eflag,
const int vflag, const int inum, const int nall,
const int vflag, const int inum,
const int nbor_pitch, __global numtyp *q_,
const numtyp cut_coulsq, const numtyp qqrd2e,
const numtyp g_ewald, const int t_per_atom) {
@ -286,10 +286,9 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
__global int *dev_nbor, __global int *dev_packed,
__global acctyp4 *ans, __global acctyp *engv,
const int eflag, const int vflag, const int inum,
const int nall, const int nbor_pitch,
__global numtyp *q_ , const numtyp cut_coulsq,
const numtyp qqrd2e, const numtyp g_ewald,
const int t_per_atom) {
const int nbor_pitch, __global numtyp *q_,
const numtyp cut_coulsq, const numtyp qqrd2e,
const numtyp g_ewald, const int t_per_atom) {
int tid=THREAD_ID_X;
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
ii+=tid/t_per_atom;

View File

@ -140,7 +140,6 @@ void LJCL_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
(BX/this->_threads_per_atom)));
int ainum=this->ans->inum();
int anall=this->atom->nall();
int nbor_pitch=this->nbor->nbor_pitch();
this->time_pair.start();
if (shared_types) {
@ -151,18 +150,17 @@ void LJCL_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
&this->_nbor_data->begin(),
&this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag,
&ainum, &anall, &nbor_pitch,
&this->atom->dev_q.begin(), &_cut_coulsq,
&_qqrd2e, &_g_ewald, &this->_threads_per_atom);
&ainum, &nbor_pitch, &this->atom->dev_q.begin(),
&_cut_coulsq, &_qqrd2e, &_g_ewald,
&this->_threads_per_atom);
} else {
this->k_pair.set_size(GX,BX);
this->k_pair.run(&this->atom->dev_x.begin(), &lj1.begin(), &lj3.begin(),
&_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
&anall, &nbor_pitch, &this->atom->dev_q.begin(),
&_cut_coulsq, &_qqrd2e, &_g_ewald,
&this->_threads_per_atom);
&nbor_pitch, &this->atom->dev_q.begin(), &_cut_coulsq,
&_qqrd2e, &_g_ewald, &this->_threads_per_atom);
}
this->time_pair.stop();
}

View File

@ -84,7 +84,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *mor1,
__global numtyp *sp_lj_in, __global int *dev_nbor,
__global int *dev_packed, __global acctyp4 *ans,
__global acctyp *engv, const int eflag,
const int vflag, const int inum, const int nall,
const int vflag, const int inum,
const int nbor_pitch, const int t_per_atom) {
int tid=THREAD_ID_X;
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
@ -234,8 +234,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *mor1_in,
__global int *dev_nbor, __global int *dev_packed,
__global acctyp4 *ans, __global acctyp *engv,
const int eflag, const int vflag, const int inum,
const int nall, const int nbor_pitch,
const int t_per_atom) {
const int nbor_pitch, const int t_per_atom) {
int tid=THREAD_ID_X;
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
ii+=tid/t_per_atom;

View File

@ -129,7 +129,6 @@ void MOR_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
(BX/this->_threads_per_atom)));
int ainum=this->ans->inum();
int anall=this->atom->nall();
int nbor_pitch=this->nbor->nbor_pitch();
this->time_pair.start();
if (shared_types) {
@ -140,15 +139,14 @@ void MOR_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
&this->_nbor_data->begin(),
&this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag,
&ainum, &anall, &nbor_pitch,
&this->_threads_per_atom);
&ainum, &nbor_pitch, &this->_threads_per_atom);
} else {
this->k_pair.set_size(GX,BX);
this->k_pair.run(&this->atom->dev_x.begin(), &mor1.begin(), &mor2.begin(),
&_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
&anall, &nbor_pitch, &this->_threads_per_atom);
&nbor_pitch, &this->_threads_per_atom);
}
this->time_pair.stop();
}

View File

@ -253,7 +253,7 @@ __kernel void kernel_special(__global int *dev_nbor,
__global int *host_nbor_list,
__global int *host_numj, __global int *tag,
__global int *nspecial, __global int *special,
int inum, int nt, int nall, int max_nbors) {
int inum, int nt, int max_nbors) {
// ii indexes the two interacting particles in gi
int ii=GLOBAL_ID_X;

View File

@ -389,7 +389,7 @@ void PairGPUNbor::build_nbor_list(const int inum, const int host_inum,
_shared->k_special.run(&dev_nbor.begin(), &dev_host_nbor.begin(),
&dev_host_numj.begin(), &atom.dev_tag.begin(),
&dev_nspecial.begin(), &dev_special.begin(),
&inum, &nt, &nall, &_max_nbors);
&inum, &nt, &_max_nbors);
}
time_kernel.stop();

View File

@ -49,7 +49,7 @@ __inline float fetch_q(const int& i, const float *q)
// Allow PPPM to compile without atomics for NVIDIA 1.0 cards, error
// generated at runtime with use of pppm/gpu
#if (__CUDA_ARCH__ < 110)
#define atom_add(x,y) 0
#define atomicAdd(x,y) *(x)+=0
#endif
#else