mirror of https://github.com/lammps/lammps.git
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6340 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
029f0ee874
commit
98ccbe3e69
|
@ -43,19 +43,22 @@ CUDPP = $(OBJ_DIR)/cudpp.o $(OBJ_DIR)/cudpp_plan.o \
|
|||
$(OBJ_DIR)/radixsort_app.cu_o $(OBJ_DIR)/scan_app.cu_o
|
||||
OBJS = $(OBJ_DIR)/pair_gpu_atom.o $(OBJ_DIR)/pair_gpu_ans.o \
|
||||
$(OBJ_DIR)/pair_gpu_nbor.o $(OBJ_DIR)/pair_gpu_nbor_shared.o \
|
||||
$(OBJ_DIR)/pair_gpu_device.o \
|
||||
$(OBJ_DIR)/atomic_gpu_memory.o $(OBJ_DIR)/charge_gpu_memory.o \
|
||||
$(OBJ_DIR)/pair_gpu_device.o $(OBJ_DIR)/atomic_gpu_memory.o \
|
||||
$(OBJ_DIR)/charge_gpu_memory.o $(OBJ_DIR)/base_ellipsoid.o \
|
||||
$(OBJ_DIR)/pppm_gpu_memory.o $(OBJ_DIR)/pppm_l_gpu.o \
|
||||
$(OBJ_DIR)/gb_gpu_memory.o $(OBJ_DIR)/gb_gpu.o \
|
||||
$(OBJ_DIR)/gayberne.o $(OBJ_DIR)/gayberne_ext.o \
|
||||
$(OBJ_DIR)/re_squared.o $(OBJ_DIR)/re_squared_ext.o \
|
||||
$(OBJ_DIR)/lj_cut_gpu_memory.o $(OBJ_DIR)/lj_cut_gpu.o \
|
||||
$(OBJ_DIR)/lj96_cut_gpu_memory.o $(OBJ_DIR)/lj96_cut_gpu.o \
|
||||
$(OBJ_DIR)/lj_expand_gpu_memory.o $(OBJ_DIR)/lj_expand_gpu.o \
|
||||
$(OBJ_DIR)/ljc_cut_gpu_memory.o $(OBJ_DIR)/ljc_cut_gpu.o \
|
||||
$(OBJ_DIR)/ljcl_cut_gpu_memory.o $(OBJ_DIR)/ljcl_cut_gpu.o \
|
||||
$(OBJ_DIR)/lj_class2_long.o $(OBJ_DIR)/lj_class2_long_ext.o \
|
||||
$(OBJ_DIR)/morse_gpu_memory.o $(OBJ_DIR)/morse_gpu.o \
|
||||
$(OBJ_DIR)/crml_gpu_memory.o $(OBJ_DIR)/crml_gpu.o \
|
||||
$(OBJ_DIR)/cmm_cut_gpu_memory.o $(OBJ_DIR)/cmm_cut_gpu.o \
|
||||
$(OBJ_DIR)/cmmc_long_gpu_memory.o $(OBJ_DIR)/cmmc_long_gpu.o \
|
||||
$(OBJ_DIR)/cmmc_msm_gpu_memory.o $(OBJ_DIR)/cmmc_msm_gpu.o \
|
||||
$(CUDPP)
|
||||
PTXS = $(OBJ_DIR)/pair_gpu_dev_kernel.ptx \
|
||||
$(OBJ_DIR)/pair_gpu_atom_kernel.ptx $(OBJ_DIR)/pair_gpu_atom_ptx.h \
|
||||
|
@ -63,17 +66,21 @@ PTXS = $(OBJ_DIR)/pair_gpu_dev_kernel.ptx \
|
|||
$(OBJ_DIR)/pair_gpu_build_kernel.ptx $(OBJ_DIR)/pair_gpu_build_ptx.h \
|
||||
$(OBJ_DIR)/pppm_f_gpu_kernel.ptx $(OBJ_DIR)/pppm_f_gpu_ptx.h \
|
||||
$(OBJ_DIR)/pppm_d_gpu_kernel.ptx $(OBJ_DIR)/pppm_d_gpu_ptx.h \
|
||||
$(OBJ_DIR)/gb_gpu_kernel_nbor.ptx $(OBJ_DIR)/gb_gpu_kernel.ptx \
|
||||
$(OBJ_DIR)/gb_gpu_kernel_lj.ptx $(OBJ_DIR)/gb_gpu_ptx.h \
|
||||
$(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/ellipsoid_nbor_ptx.h \
|
||||
$(OBJ_DIR)/gayberne.ptx $(OBJ_DIR)/gayberne_lj.ptx \
|
||||
$(OBJ_DIR)/gayberne_ptx.h $(OBJ_DIR)/re_squared.ptx \
|
||||
$(OBJ_DIR)/re_squared_lj.ptx $(OBJ_DIR)/re_squared_ptx.h \
|
||||
$(OBJ_DIR)/lj_cut_gpu_kernel.ptx $(OBJ_DIR)/lj_cut_gpu_ptx.h \
|
||||
$(OBJ_DIR)/lj96_cut_gpu_kernel.ptx $(OBJ_DIR)/lj96_cut_gpu_ptx.h \
|
||||
$(OBJ_DIR)/lj_expand_gpu_kernel.ptx $(OBJ_DIR)/lj_expand_gpu_ptx.h \
|
||||
$(OBJ_DIR)/ljc_cut_gpu_kernel.ptx $(OBJ_DIR)/ljc_cut_gpu_ptx.h \
|
||||
$(OBJ_DIR)/ljcl_cut_gpu_kernel.ptx $(OBJ_DIR)/ljcl_cut_gpu_ptx.h \
|
||||
$(OBJ_DIR)/lj_class2_long.ptx $(OBJ_DIR)/lj_class2_long_ptx.h \
|
||||
$(OBJ_DIR)/morse_gpu_kernel.ptx $(OBJ_DIR)/morse_gpu_ptx.h \
|
||||
$(OBJ_DIR)/crml_gpu_kernel.ptx $(OBJ_DIR)/crml_gpu_ptx.h \
|
||||
$(OBJ_DIR)/cmm_cut_gpu_kernel.ptx $(OBJ_DIR)/cmm_cut_gpu_ptx.h \
|
||||
$(OBJ_DIR)/cmmc_long_gpu_kernel.ptx $(OBJ_DIR)/cmmc_long_gpu_ptx.h
|
||||
$(OBJ_DIR)/cmmc_long_gpu_kernel.ptx $(OBJ_DIR)/cmmc_long_gpu_ptx.h \
|
||||
$(OBJ_DIR)/cmmc_msm_gpu_kernel.ptx $(OBJ_DIR)/cmmc_msm_gpu_ptx.h
|
||||
|
||||
all: $(GPU_LIB) $(EXECS)
|
||||
|
||||
|
@ -140,6 +147,9 @@ $(OBJ_DIR)/atomic_gpu_memory.o: $(ALL_H) atomic_gpu_memory.h atomic_gpu_memory.c
|
|||
$(OBJ_DIR)/charge_gpu_memory.o: $(ALL_H) charge_gpu_memory.h charge_gpu_memory.cpp
|
||||
$(CUDR) -o $@ -c charge_gpu_memory.cpp
|
||||
|
||||
$(OBJ_DIR)/base_ellipsoid.o: $(ALL_H) base_ellipsoid.h base_ellipsoid.cpp $(OBJ_DIR)/ellipsoid_nbor_ptx.h
|
||||
$(CUDR) -o $@ -c base_ellipsoid.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/pppm_f_gpu_kernel.ptx: pppm_gpu_kernel.cu pair_gpu_precision.h
|
||||
$(CUDA) --ptx -DNV_KERNEL -Dgrdtyp=float -Dgrdtyp4=float4 -o $@ pppm_gpu_kernel.cu
|
||||
|
||||
|
@ -158,23 +168,41 @@ $(OBJ_DIR)/pppm_gpu_memory.o: $(ALL_H) pppm_gpu_memory.h pppm_gpu_memory.cpp $(O
|
|||
$(OBJ_DIR)/pppm_l_gpu.o: $(ALL_H) pppm_gpu_memory.h pppm_l_gpu.cpp
|
||||
$(CUDR) -o $@ -c pppm_l_gpu.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/gb_gpu_kernel.ptx: gb_gpu_kernel.cu pair_gpu_precision.h gb_gpu_extra.h
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ gb_gpu_kernel.cu
|
||||
$(OBJ_DIR)/ellipsoid_nbor.ptx: ellipsoid_nbor.cu pair_gpu_precision.h
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ ellipsoid_nbor.cu
|
||||
|
||||
$(OBJ_DIR)/gb_gpu_kernel_lj.ptx: gb_gpu_kernel_lj.cu pair_gpu_precision.h gb_gpu_extra.h
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ gb_gpu_kernel_lj.cu
|
||||
$(OBJ_DIR)/ellipsoid_nbor_ptx.h: $(OBJ_DIR)/ellipsoid_nbor.ptx
|
||||
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/ellipsoid_nbor.ptx $(OBJ_DIR)/ellipsoid_nbor_ptx.h
|
||||
|
||||
$(OBJ_DIR)/gb_gpu_kernel_nbor.ptx: gb_gpu_kernel_nbor.cu pair_gpu_precision.h
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ gb_gpu_kernel_nbor.cu
|
||||
$(OBJ_DIR)/gayberne.ptx: gayberne.cu pair_gpu_precision.h ellipsoid_extra.h
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ gayberne.cu
|
||||
|
||||
$(OBJ_DIR)/gb_gpu_ptx.h: $(OBJ_DIR)/gb_gpu_kernel_nbor.ptx $(OBJ_DIR)/gb_gpu_kernel.ptx $(OBJ_DIR)/gb_gpu_kernel_lj.ptx
|
||||
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/gb_gpu_kernel_nbor.ptx $(OBJ_DIR)/gb_gpu_kernel.ptx $(OBJ_DIR)/gb_gpu_kernel_lj.ptx $(OBJ_DIR)/gb_gpu_ptx.h
|
||||
$(OBJ_DIR)/gayberne_lj.ptx: gayberne_lj.cu pair_gpu_precision.h ellipsoid_extra.h
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ gayberne_lj.cu
|
||||
|
||||
$(OBJ_DIR)/gb_gpu_memory.o: $(ALL_H) gb_gpu_memory.h gb_gpu_memory.cpp $(OBJ_DIR)/gb_gpu_ptx.h
|
||||
$(CUDR) -o $@ -c gb_gpu_memory.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/gayberne_ptx.h: $(OBJ_DIR)/gayberne.ptx $(OBJ_DIR)/gayberne_lj.ptx
|
||||
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/gayberne.ptx $(OBJ_DIR)/gayberne_lj.ptx $(OBJ_DIR)/gayberne_ptx.h
|
||||
|
||||
$(OBJ_DIR)/gb_gpu.o: $(ALL_H) gb_gpu_memory.h gb_gpu.cpp
|
||||
$(CUDR) -o $@ -c gb_gpu.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/gayberne.o: $(ALL_H) gayberne.h gayberne.cpp $(OBJ_DIR)/gayberne_ptx.h $(OBJ_DIR)/base_ellipsoid.o
|
||||
$(CUDR) -o $@ -c gayberne.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/gayberne_ext.o: $(ALL_H) $(OBJ_DIR)/gayberne.o gayberne_ext.cpp
|
||||
$(CUDR) -o $@ -c gayberne_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/re_squared.ptx: re_squared.cu pair_gpu_precision.h ellipsoid_extra.h
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ re_squared.cu
|
||||
|
||||
$(OBJ_DIR)/re_squared_lj.ptx: re_squared_lj.cu pair_gpu_precision.h ellipsoid_extra.h
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ re_squared_lj.cu
|
||||
|
||||
$(OBJ_DIR)/re_squared_ptx.h: $(OBJ_DIR)/re_squared.ptx $(OBJ_DIR)/re_squared_lj.ptx
|
||||
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/re_squared.ptx $(OBJ_DIR)/re_squared_lj.ptx $(OBJ_DIR)/re_squared_ptx.h
|
||||
|
||||
$(OBJ_DIR)/re_squared.o: $(ALL_H) re_squared.h re_squared.cpp $(OBJ_DIR)/re_squared_ptx.h $(OBJ_DIR)/base_ellipsoid.o
|
||||
$(CUDR) -o $@ -c re_squared.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/re_squared_ext.o: $(ALL_H) $(OBJ_DIR)/re_squared.o re_squared_ext.cpp
|
||||
$(CUDR) -o $@ -c re_squared_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_cut_gpu_kernel.ptx: lj_cut_gpu_kernel.cu pair_gpu_precision.h
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ lj_cut_gpu_kernel.cu
|
||||
|
@ -200,6 +228,18 @@ $(OBJ_DIR)/ljc_cut_gpu_memory.o: $(ALL_H) ljc_cut_gpu_memory.h ljc_cut_gpu_memor
|
|||
$(OBJ_DIR)/ljc_cut_gpu.o: $(ALL_H) ljc_cut_gpu_memory.h ljc_cut_gpu.cpp charge_gpu_memory.h
|
||||
$(CUDR) -o $@ -c ljc_cut_gpu.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_class2_long.ptx: lj_class2_long.cu pair_gpu_precision.h
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ lj_class2_long.cu
|
||||
|
||||
$(OBJ_DIR)/lj_class2_long_ptx.h: $(OBJ_DIR)/lj_class2_long.ptx $(OBJ_DIR)/lj_class2_long.ptx
|
||||
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/lj_class2_long.ptx $(OBJ_DIR)/lj_class2_long_ptx.h
|
||||
|
||||
$(OBJ_DIR)/lj_class2_long.o: $(ALL_H) lj_class2_long.h lj_class2_long.cpp $(OBJ_DIR)/lj_class2_long_ptx.h $(OBJ_DIR)/charge_gpu_memory.o
|
||||
$(CUDR) -o $@ -c lj_class2_long.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_class2_long_ext.o: $(ALL_H) lj_class2_long.h lj_class2_long_ext.cpp charge_gpu_memory.h
|
||||
$(CUDR) -o $@ -c lj_class2_long_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/ljcl_cut_gpu_kernel.ptx: ljcl_cut_gpu_kernel.cu pair_gpu_precision.h
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ ljcl_cut_gpu_kernel.cu
|
||||
|
||||
|
@ -284,6 +324,18 @@ $(OBJ_DIR)/cmmc_long_gpu_memory.o: $(ALL_H) cmmc_long_gpu_memory.h cmmc_long_gpu
|
|||
$(OBJ_DIR)/cmmc_long_gpu.o: $(ALL_H) cmmc_long_gpu_memory.h cmmc_long_gpu.cpp charge_gpu_memory.h
|
||||
$(CUDR) -o $@ -c cmmc_long_gpu.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/cmmc_msm_gpu_kernel.ptx: cmmc_msm_gpu_kernel.cu pair_gpu_precision.h
|
||||
$(CUDA) --ptx -DNV_KERNEL -o $@ cmmc_msm_gpu_kernel.cu
|
||||
|
||||
$(OBJ_DIR)/cmmc_msm_gpu_ptx.h: $(OBJ_DIR)/cmmc_msm_gpu_kernel.ptx $(OBJ_DIR)/cmmc_msm_gpu_kernel.ptx
|
||||
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/cmmc_msm_gpu_kernel.ptx $(OBJ_DIR)/cmmc_msm_gpu_ptx.h
|
||||
|
||||
$(OBJ_DIR)/cmmc_msm_gpu_memory.o: $(ALL_H) cmmc_msm_gpu_memory.h cmmc_msm_gpu_memory.cpp $(OBJ_DIR)/cmmc_msm_gpu_ptx.h $(OBJ_DIR)/atomic_gpu_memory.o
|
||||
$(CUDR) -o $@ -c cmmc_msm_gpu_memory.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/cmmc_msm_gpu.o: $(ALL_H) cmmc_msm_gpu_memory.h cmmc_msm_gpu.cpp charge_gpu_memory.h
|
||||
$(CUDR) -o $@ -c cmmc_msm_gpu.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVC_H)
|
||||
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDART $(CUDA_LINK)
|
||||
|
||||
|
|
|
@ -33,25 +33,29 @@ ALL_H = $(OCL_H) $(PAIR_H)
|
|||
EXECS = $(BIN_DIR)/ocl_get_devices
|
||||
OBJS = $(OBJ_DIR)/pair_gpu_atom.o $(OBJ_DIR)/pair_gpu_ans.o \
|
||||
$(OBJ_DIR)/pair_gpu_nbor_shared.o $(OBJ_DIR)/pair_gpu_nbor.o \
|
||||
$(OBJ_DIR)/pair_gpu_device.o \
|
||||
$(OBJ_DIR)/atomic_gpu_memory.o $(OBJ_DIR)/charge_gpu_memory.o \
|
||||
$(OBJ_DIR)/pair_gpu_device.o $(OBJ_DIR)/atomic_gpu_memory.o \
|
||||
$(OBJ_DIR)/charge_gpu_memory.o $(OBJ_DIR)/base_ellipsoid.o \
|
||||
$(OBJ_DIR)/pppm_gpu_memory.o $(OBJ_DIR)/pppm_l_gpu.o \
|
||||
$(OBJ_DIR)/gb_gpu_memory.o $(OBJ_DIR)/gb_gpu.o \
|
||||
$(OBJ_DIR)/gayberne.o $(OBJ_DIR)/gayberne_ext.o \
|
||||
$(OBJ_DIR)/re_squared.o $(OBJ_DIR)/re_squared_ext.o \
|
||||
$(OBJ_DIR)/lj_cut_gpu_memory.o $(OBJ_DIR)/lj_cut_gpu.o \
|
||||
$(OBJ_DIR)/lj96_cut_gpu_memory.o $(OBJ_DIR)/lj96_cut_gpu.o \
|
||||
$(OBJ_DIR)/lj_expand_gpu_memory.o $(OBJ_DIR)/lj_expand_gpu.o \
|
||||
$(OBJ_DIR)/ljc_cut_gpu_memory.o $(OBJ_DIR)/ljc_cut_gpu.o \
|
||||
$(OBJ_DIR)/ljcl_cut_gpu_memory.o $(OBJ_DIR)/ljcl_cut_gpu.o \
|
||||
$(OBJ_DIR)/lj_class2_long.o $(OBJ_DIR)/lj_class2_long_ext.o \
|
||||
$(OBJ_DIR)/morse_gpu_memory.o $(OBJ_DIR)/morse_gpu.o \
|
||||
$(OBJ_DIR)/crml_gpu_memory.o $(OBJ_DIR)/crml_gpu.o \
|
||||
$(OBJ_DIR)/cmm_cut_gpu_memory.o $(OBJ_DIR)/cmm_cut_gpu.o \
|
||||
$(OBJ_DIR)/cmmc_long_gpu_memory.o $(OBJ_DIR)/cmmc_long_gpu.o
|
||||
KERS = $(OBJ_DIR)/pair_gpu_dev_cl.h $(OBJ_DIR)/pair_gpu_atom_cl.h \
|
||||
$(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/pppm_gpu_cl.h \
|
||||
$(OBJ_DIR)/gb_gpu_nbor_cl.h $(OBJ_DIR)/gb_gpu_cl.h \
|
||||
$(OBJ_DIR)/ellipsoid_nbor_cl.h $(OBJ_DIR)/gayberne_cl.h \
|
||||
$(OBJ_DIR)/re_squared_cl.h \
|
||||
$(OBJ_DIR)/lj_cut_gpu_cl.h $(OBJ_DIR)/lj96_cut_gpu_cl.h \
|
||||
$(OBJ_DIR)/lj_expand_gpu_cl.h $(OBJ_DIR)/ljc_cut_gpu_cl.h \
|
||||
$(OBJ_DIR)/ljcl_cut_gpu_cl.h $(OBJ_DIR)/morse_gpu_cl.h \
|
||||
$(OBJ_DIR)/ljcl_cut_gpu_cl.h $(OBJ_DIR)/lj_class2_long_cl.h \
|
||||
$(OBJ_DIR)/morse_gpu_cl.h \
|
||||
$(OBJ_DIR)/crml_gpu_cl.h $(OBJ_DIR)/cmm_cut_gpu_cl.h \
|
||||
$(OBJ_DIR)/cmmc_long_gpu_cl.h
|
||||
|
||||
|
@ -89,6 +93,9 @@ $(OBJ_DIR)/atomic_gpu_memory.o: $(OCL_H) atomic_gpu_memory.h atomic_gpu_memory.c
|
|||
$(OBJ_DIR)/charge_gpu_memory.o: $(OCL_H) charge_gpu_memory.h charge_gpu_memory.cpp
|
||||
$(OCL) -o $@ -c charge_gpu_memory.cpp
|
||||
|
||||
$(OBJ_DIR)/base_ellipsoid.o: $(OCL_H) base_ellipsoid.h base_ellipsoid.cpp $(OBJ_DIR)/ellipsoid_nbor_cl.h
|
||||
$(OCL) -o $@ -c base_ellipsoid.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/pppm_gpu_cl.h: pppm_gpu_kernel.cu
|
||||
$(BSH) ./geryon/file_to_cstr.sh pppm_gpu_kernel.cu $(OBJ_DIR)/pppm_gpu_cl.h;
|
||||
|
||||
|
@ -98,20 +105,32 @@ $(OBJ_DIR)/pppm_gpu_memory.o: $(ALL_H) pppm_gpu_memory.h pppm_gpu_memory.cpp $(
|
|||
$(OBJ_DIR)/pppm_l_gpu.o: $(ALL_H) pppm_gpu_memory.h pppm_l_gpu.cpp
|
||||
$(OCL) -o $@ -c pppm_l_gpu.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/gb_gpu_nbor_cl.h: gb_gpu_kernel_nbor.cu
|
||||
$(BSH) ./geryon/file_to_cstr.sh gb_gpu_kernel_nbor.cu $(OBJ_DIR)/gb_gpu_nbor_cl.h
|
||||
$(OBJ_DIR)/ellipsoid_nbor_cl.h: ellipsoid_nbor.cu
|
||||
$(BSH) ./geryon/file_to_cstr.sh ellipsoid_nbor.cu $(OBJ_DIR)/ellipsoid_nbor_cl.h
|
||||
|
||||
$(OBJ_DIR)/gb_gpu_cl.h: gb_gpu_kernel.cu gb_gpu_kernel_lj.cu gb_gpu_extra.h
|
||||
cat gb_gpu_extra.h gb_gpu_kernel.cu > $(OBJ_DIR)/gb_gpu_kernel.tar; \
|
||||
cat gb_gpu_extra.h gb_gpu_kernel_lj.cu > $(OBJ_DIR)/gb_gpu_kernel_lj.tar; \
|
||||
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/gb_gpu_kernel.tar $(OBJ_DIR)/gb_gpu_kernel_lj.tar $(OBJ_DIR)/gb_gpu_cl.h; \
|
||||
rm -f $(OBJ_DIR)/gb_gpu_kernel.tar $(OBJ_DIR)/gb_gpu_kernel_lj.tar
|
||||
$(OBJ_DIR)/gayberne_cl.h: gayberne.cu gayberne_lj.cu ellipsoid_extra.h
|
||||
cat ellipsoid_extra.h gayberne.cu > $(OBJ_DIR)/gayberne.tar; \
|
||||
cat ellipsoid_extra.h gayberne_lj.cu > $(OBJ_DIR)/gayberne_lj.tar; \
|
||||
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/gayberne.tar $(OBJ_DIR)/gayberne_lj.tar $(OBJ_DIR)/gayberne_cl.h; \
|
||||
rm -f $(OBJ_DIR)/gayberne.tar $(OBJ_DIR)/gayberne_lj.tar
|
||||
|
||||
$(OBJ_DIR)/gb_gpu_memory.o: $(ALL_H) gb_gpu_memory.h gb_gpu_memory.cpp $(OBJ_DIR)/gb_gpu_nbor_cl.h $(OBJ_DIR)/gb_gpu_cl.h
|
||||
$(OCL) -o $@ -c gb_gpu_memory.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/gayberne.o: $(ALL_H) gayberne.h gayberne.cpp $(OBJ_DIR)/gayberne_cl.h $(OBJ_DIR)/base_ellipsoid.o
|
||||
$(OCL) -o $@ -c gayberne.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/gb_gpu.o: $(ALL_H) gb_gpu_memory.h gb_gpu.cpp
|
||||
$(OCL) -o $@ -c gb_gpu.cpp -I$(OBJ_DIR)
|
||||
$(OBJ_DIR)/gayberne_ext.o: $(ALL_H) $(OBJ_DIR)/gayberne.o gayberne_ext.cpp
|
||||
$(OCL) -o $@ -c gayberne_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/re_squared_cl.h: re_squared.cu re_squared_lj.cu ellipsoid_extra.h
|
||||
cat ellipsoid_extra.h re_squared.cu > $(OBJ_DIR)/re_squared.tar; \
|
||||
cat ellipsoid_extra.h re_squared_lj.cu > $(OBJ_DIR)/re_squared_lj.tar; \
|
||||
$(BSH) ./geryon/file_to_cstr.sh $(OBJ_DIR)/re_squared.tar $(OBJ_DIR)/re_squared_lj.tar $(OBJ_DIR)/re_squared_cl.h; \
|
||||
rm -f $(OBJ_DIR)/re_squared.tar $(OBJ_DIR)/re_squared_lj.tar
|
||||
|
||||
$(OBJ_DIR)/re_squared.o: $(ALL_H) re_squared.h re_squared.cpp $(OBJ_DIR)/re_squared_cl.h $(OBJ_DIR)/base_ellipsoid.o
|
||||
$(OCL) -o $@ -c re_squared.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/re_squared_ext.o: $(ALL_H) $(OBJ_DIR)/re_squared.o re_squared_ext.cpp
|
||||
$(OCL) -o $@ -c re_squared_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_cut_gpu_cl.h: lj_cut_gpu_kernel.cu
|
||||
$(BSH) ./geryon/file_to_cstr.sh lj_cut_gpu_kernel.cu $(OBJ_DIR)/lj_cut_gpu_cl.h;
|
||||
|
@ -134,12 +153,21 @@ $(OBJ_DIR)/ljc_cut_gpu.o: $(ALL_H) ljc_cut_gpu_memory.h ljc_cut_gpu.cpp charge_g
|
|||
$(OBJ_DIR)/ljcl_cut_gpu_cl.h: ljcl_cut_gpu_kernel.cu
|
||||
$(BSH) ./geryon/file_to_cstr.sh ljcl_cut_gpu_kernel.cu $(OBJ_DIR)/ljcl_cut_gpu_cl.h;
|
||||
|
||||
$(OBJ_DIR)/ljcl_cut_gpu_memory.o: $(ALL_H) ljcl_cut_gpu_memory.h ljcl_cut_gpu_memory.cpp $(OBJ_DIR)/ljcl_cut_gpu_cl.h $(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/ljcl_cut_gpu_cl.h $(OBJ_DIR)/charge_gpu_memory.o
|
||||
$(OBJ_DIR)/ljcl_cut_gpu_memory.o: $(ALL_H) ljcl_cut_gpu_memory.h ljcl_cut_gpu_memory.cpp $(OBJ_DIR)/ljcl_cut_gpu_cl.h $(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/charge_gpu_memory.o
|
||||
$(OCL) -o $@ -c ljcl_cut_gpu_memory.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/ljcl_cut_gpu.o: $(ALL_H) ljcl_cut_gpu_memory.h ljcl_cut_gpu.cpp charge_gpu_memory.h
|
||||
$(OCL) -o $@ -c ljcl_cut_gpu.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_class2_long_cl.h: lj_class2_long.cu
|
||||
$(BSH) ./geryon/file_to_cstr.sh lj_class2_long.cu $(OBJ_DIR)/lj_class2_long_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lj_class2_long.o: $(ALL_H) lj_class2_long.h lj_class2_long.cpp $(OBJ_DIR)/lj_class2_long_cl.h $(OBJ_DIR)/pair_gpu_nbor_cl.h $(OBJ_DIR)/charge_gpu_memory.o
|
||||
$(OCL) -o $@ -c lj_class2_long.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_class2_long_ext.o: $(ALL_H) lj_class2_long.h lj_class2_long_ext.cpp charge_gpu_memory.h
|
||||
$(OCL) -o $@ -c lj_class2_long_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/morse_gpu_cl.h: morse_gpu_kernel.cu
|
||||
$(BSH) ./geryon/file_to_cstr.sh morse_gpu_kernel.cu $(OBJ_DIR)/morse_gpu_cl.h;
|
||||
|
||||
|
|
|
@ -84,7 +84,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum, const int nall,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
|
||||
|
@ -241,7 +241,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__global numtyp* sp_lj_in,__global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum, const int nall,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
|
||||
|
|
|
@ -130,7 +130,6 @@ void CMM_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
(BX/this->_threads_per_atom)));
|
||||
|
||||
int ainum=this->ans->inum();
|
||||
int anall=this->atom->nall();
|
||||
int nbor_pitch=this->nbor->nbor_pitch();
|
||||
this->time_pair.start();
|
||||
if (shared_types) {
|
||||
|
@ -141,15 +140,14 @@ void CMM_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
&this->_nbor_data->begin(),
|
||||
&this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag,
|
||||
&ainum, &anall, &nbor_pitch,
|
||||
&this->_threads_per_atom);
|
||||
&ainum, &nbor_pitch, &this->_threads_per_atom);
|
||||
} else {
|
||||
this->k_pair.set_size(GX,BX);
|
||||
this->k_pair.run(&this->atom->dev_x.begin(), &lj1.begin(), &lj3.begin(),
|
||||
&_cmm_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
|
||||
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
|
||||
&anall, &nbor_pitch, &this->_threads_per_atom);
|
||||
&nbor_pitch, &this->_threads_per_atom);
|
||||
}
|
||||
this->time_pair.stop();
|
||||
}
|
||||
|
|
|
@ -102,7 +102,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum, const int nall,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_ ,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
|
@ -295,10 +295,9 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nall, const int nbor_pitch,
|
||||
__global numtyp *q_ , const numtyp cut_coulsq,
|
||||
const numtyp qqrd2e, const numtyp g_ewald,
|
||||
const int t_per_atom) {
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
|
||||
ii+=tid/t_per_atom;
|
||||
|
|
|
@ -141,7 +141,6 @@ void CMML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
(BX/this->_threads_per_atom)));
|
||||
|
||||
int ainum=this->ans->inum();
|
||||
int anall=this->atom->nall();
|
||||
int nbor_pitch=this->nbor->nbor_pitch();
|
||||
this->time_pair.start();
|
||||
if (shared_types) {
|
||||
|
@ -152,7 +151,7 @@ void CMML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
&this->_nbor_data->begin(),
|
||||
&this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag,
|
||||
&ainum, &anall, &nbor_pitch,
|
||||
&ainum, &nbor_pitch,
|
||||
&this->atom->dev_q.begin(), &_cut_coulsq,
|
||||
&_qqrd2e, &_g_ewald, &this->_threads_per_atom);
|
||||
} else {
|
||||
|
@ -161,7 +160,7 @@ void CMML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
&_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
|
||||
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
|
||||
&anall, &nbor_pitch, &this->atom->dev_q.begin(),
|
||||
&nbor_pitch, &this->atom->dev_q.begin(),
|
||||
&_cut_coulsq, &_qqrd2e, &_g_ewald,
|
||||
&this->_threads_per_atom);
|
||||
}
|
||||
|
|
|
@ -103,12 +103,11 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nall, const int nbor_pitch,
|
||||
__global numtyp *q_, const numtyp cut_coulsq,
|
||||
const numtyp qqrd2e, const numtyp g_ewald,
|
||||
const numtyp denom_lj, const numtyp cut_bothsq,
|
||||
const numtyp cut_ljsq, const numtyp cut_lj_innersq,
|
||||
const int t_per_atom) {
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const numtyp denom_lj,
|
||||
const numtyp cut_bothsq, const numtyp cut_ljsq,
|
||||
const numtyp cut_lj_innersq, const int t_per_atom) {
|
||||
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
|
||||
|
@ -300,7 +299,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp2 *ljd_in,
|
|||
__global numtyp* sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum, const int nall,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const numtyp denom_lj,
|
||||
|
|
|
@ -145,7 +145,6 @@ void CRML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
(BX/this->_threads_per_atom)));
|
||||
|
||||
int ainum=this->ans->inum();
|
||||
int anall=this->atom->nall();
|
||||
int nbor_pitch=this->nbor->nbor_pitch();
|
||||
this->time_pair.start();
|
||||
if (shared_types) {
|
||||
|
@ -155,10 +154,9 @@ void CRML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
&this->_nbor_data->begin(),
|
||||
&this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag,
|
||||
&ainum, &anall, &nbor_pitch,
|
||||
&this->atom->dev_q.begin(), &_cut_coulsq,
|
||||
&_qqrd2e, &_g_ewald, &_denom_lj, &_cut_bothsq,
|
||||
&_cut_ljsq, &_cut_lj_innersq,
|
||||
&ainum, &nbor_pitch, &this->atom->dev_q.begin(),
|
||||
&_cut_coulsq, &_qqrd2e, &_g_ewald, &_denom_lj,
|
||||
&_cut_bothsq, &_cut_ljsq, &_cut_lj_innersq,
|
||||
&this->_threads_per_atom);
|
||||
} else {
|
||||
this->k_pair.set_size(GX,BX);
|
||||
|
@ -166,7 +164,7 @@ void CRML_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
&_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
|
||||
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
|
||||
&anall, &nbor_pitch, &this->atom->dev_q.begin(),
|
||||
&nbor_pitch, &this->atom->dev_q.begin(),
|
||||
&_cut_coulsq, &_qqrd2e, &_g_ewald, &_denom_lj,
|
||||
&_cut_bothsq, &_cut_ljsq, &_cut_lj_innersq,
|
||||
&this->_threads_per_atom);
|
||||
|
|
|
@ -54,8 +54,11 @@ typedef struct _double4 double4;
|
|||
#define __kernel extern "C" __global__
|
||||
#define __local __shared__
|
||||
#define __global
|
||||
#define __inline static __inline__ __device__
|
||||
#define atom_add atomicAdd
|
||||
|
||||
#ifndef __inline
|
||||
#define __inline static __inline__ __device__
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -84,7 +84,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum, const int nall,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
|
||||
|
@ -234,8 +234,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nall, const int nbor_pitch,
|
||||
const int t_per_atom) {
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
|
||||
ii+=tid/t_per_atom;
|
||||
|
|
|
@ -130,7 +130,6 @@ void LJ96_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
(BX/this->_threads_per_atom)));
|
||||
|
||||
int ainum=this->ans->inum();
|
||||
int anall=this->atom->nall();
|
||||
int nbor_pitch=this->nbor->nbor_pitch();
|
||||
this->time_pair.start();
|
||||
if (shared_types) {
|
||||
|
@ -141,15 +140,14 @@ void LJ96_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
&this->_nbor_data->begin(),
|
||||
&this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag,
|
||||
&ainum, &anall, &nbor_pitch,
|
||||
&this->_threads_per_atom);
|
||||
&ainum, &nbor_pitch, &this->_threads_per_atom);
|
||||
} else {
|
||||
this->k_pair.set_size(GX,BX);
|
||||
this->k_pair.run(&this->atom->dev_x.begin(), &lj1.begin(), &lj3.begin(),
|
||||
&_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
|
||||
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
|
||||
&anall, &nbor_pitch, &this->_threads_per_atom);
|
||||
&nbor_pitch, &this->_threads_per_atom);
|
||||
}
|
||||
this->time_pair.stop();
|
||||
}
|
||||
|
|
|
@ -84,7 +84,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum, const int nall,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
|
||||
|
@ -233,8 +233,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nall, const int nbor_pitch,
|
||||
const int t_per_atom) {
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
|
||||
ii+=tid/t_per_atom;
|
||||
|
|
|
@ -130,7 +130,6 @@ void LJL_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
(BX/this->_threads_per_atom)));
|
||||
|
||||
int ainum=this->ans->inum();
|
||||
int anall=this->atom->nall();
|
||||
int nbor_pitch=this->nbor->nbor_pitch();
|
||||
this->time_pair.start();
|
||||
if (shared_types) {
|
||||
|
@ -141,15 +140,14 @@ void LJL_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
&this->_nbor_data->begin(),
|
||||
&this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag,
|
||||
&ainum, &anall, &nbor_pitch,
|
||||
&this->_threads_per_atom);
|
||||
&ainum, &nbor_pitch, &this->_threads_per_atom);
|
||||
} else {
|
||||
this->k_pair.set_size(GX,BX);
|
||||
this->k_pair.run(&this->atom->dev_x.begin(), &lj1.begin(), &lj3.begin(),
|
||||
&_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
|
||||
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
|
||||
&anall, &nbor_pitch, &this->_threads_per_atom);
|
||||
&nbor_pitch, &this->_threads_per_atom);
|
||||
}
|
||||
this->time_pair.stop();
|
||||
}
|
||||
|
|
|
@ -84,7 +84,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum, const int nall,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
|
||||
|
@ -236,8 +236,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nall, const int nbor_pitch,
|
||||
const int t_per_atom) {
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
|
||||
ii+=tid/t_per_atom;
|
||||
|
|
|
@ -130,7 +130,6 @@ void LJE_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
(BX/this->_threads_per_atom)));
|
||||
|
||||
int ainum=this->ans->inum();
|
||||
int anall=this->atom->nall();
|
||||
int nbor_pitch=this->nbor->nbor_pitch();
|
||||
this->time_pair.start();
|
||||
if (shared_types) {
|
||||
|
@ -141,15 +140,14 @@ void LJE_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
&this->_nbor_data->begin(),
|
||||
&this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag,
|
||||
&ainum, &anall, &nbor_pitch,
|
||||
&this->_threads_per_atom);
|
||||
&ainum, &nbor_pitch, &this->_threads_per_atom);
|
||||
} else {
|
||||
this->k_pair.set_size(GX,BX);
|
||||
this->k_pair.run(&this->atom->dev_x.begin(), &lj1.begin(), &lj3.begin(),
|
||||
&_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
|
||||
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
|
||||
&anall, &nbor_pitch, &this->_threads_per_atom);
|
||||
&nbor_pitch, &this->_threads_per_atom);
|
||||
}
|
||||
this->time_pair.stop();
|
||||
}
|
||||
|
|
|
@ -94,7 +94,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum, const int nall,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_ ,
|
||||
__global numtyp *cutsq, const numtyp qqrd2e,
|
||||
const int t_per_atom) {
|
||||
|
@ -270,9 +270,9 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nall, const int nbor_pitch,
|
||||
__global numtyp *q_ , __global numtyp *_cutsq,
|
||||
const numtyp qqrd2e, const int t_per_atom) {
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
__global numtyp *_cutsq, const numtyp qqrd2e,
|
||||
const int t_per_atom) {
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
|
||||
ii+=tid/t_per_atom;
|
||||
|
|
|
@ -142,7 +142,6 @@ void LJC_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
(BX/this->_threads_per_atom)));
|
||||
|
||||
int ainum=this->ans->inum();
|
||||
int anall=this->atom->nall();
|
||||
int nbor_pitch=this->nbor->nbor_pitch();
|
||||
this->time_pair.start();
|
||||
if (shared_types) {
|
||||
|
@ -153,7 +152,7 @@ void LJC_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
&this->_nbor_data->begin(),
|
||||
&this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag,
|
||||
&ainum, &anall, &nbor_pitch,
|
||||
&ainum, &nbor_pitch,
|
||||
&this->atom->dev_q.begin(), &cutsq.begin(),
|
||||
&_qqrd2e, &this->_threads_per_atom);
|
||||
} else {
|
||||
|
@ -162,7 +161,7 @@ void LJC_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
&_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
|
||||
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
|
||||
&anall, &nbor_pitch, &this->atom->dev_q.begin(),
|
||||
&nbor_pitch, &this->atom->dev_q.begin(),
|
||||
&cutsq.begin(), &_qqrd2e, &this->_threads_per_atom);
|
||||
}
|
||||
this->time_pair.stop();
|
||||
|
|
|
@ -102,7 +102,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
|
|||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum, const int nall,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
|
@ -286,10 +286,9 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
|
|||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nall, const int nbor_pitch,
|
||||
__global numtyp *q_ , const numtyp cut_coulsq,
|
||||
const numtyp qqrd2e, const numtyp g_ewald,
|
||||
const int t_per_atom) {
|
||||
const int nbor_pitch, __global numtyp *q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
|
||||
ii+=tid/t_per_atom;
|
||||
|
|
|
@ -140,7 +140,6 @@ void LJCL_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
(BX/this->_threads_per_atom)));
|
||||
|
||||
int ainum=this->ans->inum();
|
||||
int anall=this->atom->nall();
|
||||
int nbor_pitch=this->nbor->nbor_pitch();
|
||||
this->time_pair.start();
|
||||
if (shared_types) {
|
||||
|
@ -151,18 +150,17 @@ void LJCL_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
&this->_nbor_data->begin(),
|
||||
&this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag,
|
||||
&ainum, &anall, &nbor_pitch,
|
||||
&this->atom->dev_q.begin(), &_cut_coulsq,
|
||||
&_qqrd2e, &_g_ewald, &this->_threads_per_atom);
|
||||
&ainum, &nbor_pitch, &this->atom->dev_q.begin(),
|
||||
&_cut_coulsq, &_qqrd2e, &_g_ewald,
|
||||
&this->_threads_per_atom);
|
||||
} else {
|
||||
this->k_pair.set_size(GX,BX);
|
||||
this->k_pair.run(&this->atom->dev_x.begin(), &lj1.begin(), &lj3.begin(),
|
||||
&_lj_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
|
||||
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
|
||||
&anall, &nbor_pitch, &this->atom->dev_q.begin(),
|
||||
&_cut_coulsq, &_qqrd2e, &_g_ewald,
|
||||
&this->_threads_per_atom);
|
||||
&nbor_pitch, &this->atom->dev_q.begin(), &_cut_coulsq,
|
||||
&_qqrd2e, &_g_ewald, &this->_threads_per_atom);
|
||||
}
|
||||
this->time_pair.stop();
|
||||
}
|
||||
|
|
|
@ -84,7 +84,7 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *mor1,
|
|||
__global numtyp *sp_lj_in, __global int *dev_nbor,
|
||||
__global int *dev_packed, __global acctyp4 *ans,
|
||||
__global acctyp *engv, const int eflag,
|
||||
const int vflag, const int inum, const int nall,
|
||||
const int vflag, const int inum,
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
|
||||
|
@ -234,8 +234,7 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *mor1_in,
|
|||
__global int *dev_nbor, __global int *dev_packed,
|
||||
__global acctyp4 *ans, __global acctyp *engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nall, const int nbor_pitch,
|
||||
const int t_per_atom) {
|
||||
const int nbor_pitch, const int t_per_atom) {
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=mul24((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
|
||||
ii+=tid/t_per_atom;
|
||||
|
|
|
@ -129,7 +129,6 @@ void MOR_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
(BX/this->_threads_per_atom)));
|
||||
|
||||
int ainum=this->ans->inum();
|
||||
int anall=this->atom->nall();
|
||||
int nbor_pitch=this->nbor->nbor_pitch();
|
||||
this->time_pair.start();
|
||||
if (shared_types) {
|
||||
|
@ -140,15 +139,14 @@ void MOR_GPU_MemoryT::loop(const bool _eflag, const bool _vflag) {
|
|||
&this->_nbor_data->begin(),
|
||||
&this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag,
|
||||
&ainum, &anall, &nbor_pitch,
|
||||
&this->_threads_per_atom);
|
||||
&ainum, &nbor_pitch, &this->_threads_per_atom);
|
||||
} else {
|
||||
this->k_pair.set_size(GX,BX);
|
||||
this->k_pair.run(&this->atom->dev_x.begin(), &mor1.begin(), &mor2.begin(),
|
||||
&_types, &sp_lj.begin(), &this->nbor->dev_nbor.begin(),
|
||||
&this->_nbor_data->begin(), &this->ans->dev_ans.begin(),
|
||||
&this->ans->dev_engv.begin(), &eflag, &vflag, &ainum,
|
||||
&anall, &nbor_pitch, &this->_threads_per_atom);
|
||||
&nbor_pitch, &this->_threads_per_atom);
|
||||
}
|
||||
this->time_pair.stop();
|
||||
}
|
||||
|
|
|
@ -253,7 +253,7 @@ __kernel void kernel_special(__global int *dev_nbor,
|
|||
__global int *host_nbor_list,
|
||||
__global int *host_numj, __global int *tag,
|
||||
__global int *nspecial, __global int *special,
|
||||
int inum, int nt, int nall, int max_nbors) {
|
||||
int inum, int nt, int max_nbors) {
|
||||
// ii indexes the two interacting particles in gi
|
||||
int ii=GLOBAL_ID_X;
|
||||
|
||||
|
|
|
@ -389,7 +389,7 @@ void PairGPUNbor::build_nbor_list(const int inum, const int host_inum,
|
|||
_shared->k_special.run(&dev_nbor.begin(), &dev_host_nbor.begin(),
|
||||
&dev_host_numj.begin(), &atom.dev_tag.begin(),
|
||||
&dev_nspecial.begin(), &dev_special.begin(),
|
||||
&inum, &nt, &nall, &_max_nbors);
|
||||
&inum, &nt, &_max_nbors);
|
||||
}
|
||||
time_kernel.stop();
|
||||
|
||||
|
|
|
@ -49,7 +49,7 @@ __inline float fetch_q(const int& i, const float *q)
|
|||
// Allow PPPM to compile without atomics for NVIDIA 1.0 cards, error
|
||||
// generated at runtime with use of pppm/gpu
|
||||
#if (__CUDA_ARCH__ < 110)
|
||||
#define atom_add(x,y) 0
|
||||
#define atomicAdd(x,y) *(x)+=0
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
|
Loading…
Reference in New Issue