forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11335 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
dc585528b2
commit
28194a14eb
|
@ -17,13 +17,16 @@ CUDA_ARCH = -arch=sm_21
|
|||
# older CUDA
|
||||
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
|
||||
|
||||
CUDA_PRECISION = -D_SINGLE_SINGLE
|
||||
# system-specific settings, should match with LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG or LAMMPS_SMALLSMALL
|
||||
LMP_INC = -DLAMMPS_BIGBIG
|
||||
|
||||
CUDA_PRECISION = -D_SINGLE_DOUBLE
|
||||
CUDA_INCLUDE = -I$(CUDA_HOME)/include
|
||||
CUDA_LIB = -L$(CUDA_HOME)/lib64
|
||||
CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math
|
||||
CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC)
|
||||
|
||||
CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
|
||||
CUDR_OPTS = -O2 # -xHost -no-prec-div -ansi-alias
|
||||
CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias
|
||||
|
||||
BIN_DIR = ./
|
||||
OBJ_DIR = ./
|
||||
|
|
|
@ -11,9 +11,13 @@ OCL_TUNE = -DFERMI_OCL # -- Uncomment for NVIDIA Fermi
|
|||
# OCL_TUNE = -DCYPRESS_OCL # -- Uncomment for AMD Cypress
|
||||
# OCL_TUNE = -DGENERIC_OCL # -- Uncomment for generic device
|
||||
|
||||
OCL_CPP = mpic++ $(DEFAULT_DEVICE) -O3 -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK
|
||||
# system-specific settings, should match with LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
|
||||
LMP_INC = #-DLAMMPS_BIGBIG
|
||||
|
||||
OCL_INC = -I/usr/local/cuda/include # Path to CL directory
|
||||
OCL_CPP = mpic++ $(DEFAULT_DEVICE) -O3 -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC)
|
||||
OCL_LINK = -lOpenCL
|
||||
OCL_PREC = -D_SINGLE_SINGLE
|
||||
OCL_PREC = -D_SINGLE_DOUBLE
|
||||
|
||||
BIN_DIR = ./
|
||||
OBJ_DIR = ./
|
||||
|
|
|
@ -82,7 +82,7 @@ and
|
|||
|
||||
Brown, W.M., Masako, Y. Implementing Molecular Dynamics on Hybrid High
|
||||
Performance Computers - Three-Body Potentials. Computer Physics Communications.
|
||||
2013. In press.
|
||||
2013. 184: p. 2785–2793.
|
||||
|
||||
----
|
||||
|
||||
|
@ -103,37 +103,33 @@ Current styles supporting GPU acceleration:
|
|||
11 eam/alloy
|
||||
12 eam/fs
|
||||
13 eam
|
||||
14 eam/lj
|
||||
15 gauss
|
||||
16 gayberne
|
||||
17 lj96/cut
|
||||
18 lj/charmm/coul/long
|
||||
19 lj/class2/coul/long
|
||||
20 lj/class2
|
||||
21 lj/cut/coul/cut
|
||||
22 lj/cut/coul/debye
|
||||
23 lj/cut/coul/dsf
|
||||
24 lj/cut/coul/long
|
||||
25 lj/cut/coul/msm
|
||||
26 lj/cut/coul/wolf/fsw
|
||||
27 lj/cut/dipole/cut
|
||||
28 lj/cut
|
||||
29 lj/cut/tgpu
|
||||
30 lj/expand
|
||||
31 lj/sdk/coul/long
|
||||
32 cg/cmm/coul/long
|
||||
33 lj/sdk
|
||||
34 cg/cmm
|
||||
35 lj/sf/dipole/sf
|
||||
36 mie/cut
|
||||
37 morse
|
||||
38 resquared
|
||||
39 soft
|
||||
40 sw
|
||||
41 table
|
||||
42 yukawa/colloid
|
||||
43 yukawa
|
||||
44 pppm
|
||||
14 gauss
|
||||
15 gayberne
|
||||
16 lj96/cut
|
||||
17 lj/charmm/coul/long
|
||||
18 lj/class2/coul/long
|
||||
19 lj/class2
|
||||
20 lj/cut/coul/cut
|
||||
21 lj/cut/coul/debye
|
||||
22 lj/cut/coul/dsf
|
||||
23 lj/cut/coul/long
|
||||
24 lj/cut/coul/msm
|
||||
25 lj/cut/dipole/cut
|
||||
26 lj/cut
|
||||
27 lj/expand
|
||||
28 lj/gromacs
|
||||
29 lj/sdk/coul/long
|
||||
30 lj/sdk
|
||||
31 lj/sf/dipole/sf
|
||||
32 mie/cut
|
||||
33 morse
|
||||
34 resquared
|
||||
35 soft
|
||||
36 sw
|
||||
37 table
|
||||
38 yukawa/colloid
|
||||
39 yukawa
|
||||
40 pppm
|
||||
|
||||
|
||||
MULTIPLE LAMMPS PROCESSES
|
||||
|
@ -200,6 +196,10 @@ NOTE: The lj/cut/coul/long/gpu, cg/cmm/coul/long/gpu, coul/long/gpu,
|
|||
lj/charmm/coul/long/gpu and pppm/gpu styles will only be installed
|
||||
if the KSPACE package has been installed.
|
||||
|
||||
NOTE: The system-specific setting LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG,
|
||||
or LAMMPS_SMALLSMALL if specified when building LAMMPS (i.e. in
|
||||
src/MAKE/Makefile.foo) should be consistent with that specified
|
||||
when building libgpu.a (i.e. by LMP_INC in the lib/gpu/Makefile.bar).
|
||||
|
||||
EXAMPLE BUILD PROCESS
|
||||
--------------------------------
|
||||
|
|
|
@ -373,7 +373,7 @@ class Atom {
|
|||
/// Cell list identifiers for device nbor builds
|
||||
UCL_D_Vec<int> dev_particle_id;
|
||||
/// Atom tag information for device nbor builds
|
||||
UCL_D_Vec<int> dev_tag;
|
||||
UCL_D_Vec<tagint> dev_tag;
|
||||
|
||||
/// Cell list identifiers for hybrid nbor builds
|
||||
UCL_H_Vec<int> host_cell_id;
|
||||
|
|
|
@ -150,8 +150,8 @@ template <class numtyp, class acctyp>
|
|||
inline void BaseAtomicT::build_nbor_list(const int inum, const int host_inum,
|
||||
const int nall, double **host_x,
|
||||
int *host_type, double *sublo,
|
||||
double *subhi, int *tag,
|
||||
int **nspecial, int **special,
|
||||
double *subhi, tagint *tag,
|
||||
int **nspecial, tagint **special,
|
||||
bool &success) {
|
||||
success=true;
|
||||
resize_atom(inum,nall,success);
|
||||
|
@ -216,8 +216,8 @@ void BaseAtomicT::compute(const int f_ago, const int inum_full,
|
|||
template <class numtyp, class acctyp>
|
||||
int ** BaseAtomicT::compute(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag,
|
||||
int **nspecial, int **special, const bool eflag,
|
||||
double *sublo, double *subhi, tagint *tag,
|
||||
int **nspecial, tagint **special, const bool eflag,
|
||||
const bool vflag, const bool eatom,
|
||||
const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum,
|
||||
|
|
|
@ -119,8 +119,8 @@ class BaseAtomic {
|
|||
/// Build neighbor list on device
|
||||
void build_nbor_list(const int inum, const int host_inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, bool &success);
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, bool &success);
|
||||
|
||||
/// Pair loop with host neighboring
|
||||
void compute(const int f_ago, const int inum_full,
|
||||
|
@ -132,16 +132,16 @@ class BaseAtomic {
|
|||
/// Pair loop with device neighboring
|
||||
int * compute(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success);
|
||||
|
||||
/// Pair loop with device neighboring
|
||||
int ** compute(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **numj, const double cpu_time, bool &success);
|
||||
|
||||
|
|
|
@ -153,8 +153,8 @@ template <class numtyp, class acctyp>
|
|||
inline void BaseChargeT::build_nbor_list(const int inum, const int host_inum,
|
||||
const int nall, double **host_x,
|
||||
int *host_type, double *sublo,
|
||||
double *subhi, int *tag,
|
||||
int **nspecial, int **special,
|
||||
double *subhi, tagint *tag,
|
||||
int **nspecial, tagint **special,
|
||||
bool &success) {
|
||||
success=true;
|
||||
resize_atom(inum,nall,success);
|
||||
|
@ -225,8 +225,8 @@ void BaseChargeT::compute(const int f_ago, const int inum_full,
|
|||
template <class numtyp, class acctyp>
|
||||
int** BaseChargeT::compute(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag,
|
||||
int **nspecial, int **special, const bool eflag,
|
||||
double *sublo, double *subhi, tagint *tag,
|
||||
int **nspecial, tagint **special, const bool eflag,
|
||||
const bool vflag, const bool eatom,
|
||||
const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum,
|
||||
|
|
|
@ -122,8 +122,8 @@ class BaseCharge {
|
|||
/// Build neighbor list on device
|
||||
void build_nbor_list(const int inum, const int host_inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, bool &success);
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, bool &success);
|
||||
|
||||
/// Pair loop with host neighboring
|
||||
void compute(const int f_ago, const int inum_full, const int nall,
|
||||
|
@ -136,8 +136,8 @@ class BaseCharge {
|
|||
/// Pair loop with device neighboring
|
||||
int** compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **numj, const double cpu_time, bool &success,
|
||||
double *charge, double *boxlo, double *prd);
|
||||
|
|
|
@ -155,8 +155,8 @@ template <class numtyp, class acctyp>
|
|||
inline void BaseDipoleT::build_nbor_list(const int inum, const int host_inum,
|
||||
const int nall, double **host_x,
|
||||
int *host_type, double *sublo,
|
||||
double *subhi, int *tag,
|
||||
int **nspecial, int **special,
|
||||
double *subhi, tagint *tag,
|
||||
int **nspecial, tagint **special,
|
||||
bool &success) {
|
||||
success=true;
|
||||
resize_atom(inum,nall,success);
|
||||
|
@ -229,8 +229,8 @@ void BaseDipoleT::compute(const int f_ago, const int inum_full,
|
|||
template <class numtyp, class acctyp>
|
||||
int** BaseDipoleT::compute(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag,
|
||||
int **nspecial, int **special, const bool eflag,
|
||||
double *sublo, double *subhi, tagint *tag,
|
||||
int **nspecial, tagint **special, const bool eflag,
|
||||
const bool vflag, const bool eatom,
|
||||
const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum,
|
||||
|
|
|
@ -121,8 +121,8 @@ class BaseDipole {
|
|||
/// Build neighbor list on device
|
||||
void build_nbor_list(const int inum, const int host_inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, bool &success);
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, bool &success);
|
||||
|
||||
/// Pair loop with host neighboring
|
||||
void compute(const int f_ago, const int inum_full, const int nall,
|
||||
|
@ -135,8 +135,8 @@ class BaseDipole {
|
|||
/// Pair loop with device neighboring
|
||||
int** compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **numj, const double cpu_time, bool &success,
|
||||
double *charge, double **mu, double *boxlo, double *prd);
|
||||
|
|
|
@ -313,8 +313,8 @@ template <class numtyp, class acctyp>
|
|||
inline void BaseEllipsoidT::build_nbor_list(const int inum, const int host_inum,
|
||||
const int nall, double **host_x,
|
||||
int *host_type, double *sublo,
|
||||
double *subhi, int *tag,
|
||||
int **nspecial, int **special,
|
||||
double *subhi, tagint *tag,
|
||||
int **nspecial, tagint **special,
|
||||
bool &success) {
|
||||
success=true;
|
||||
resize_atom(nall,success);
|
||||
|
@ -390,8 +390,8 @@ int* BaseEllipsoidT::compute(const int f_ago, const int inum_full,
|
|||
template <class numtyp, class acctyp>
|
||||
int** BaseEllipsoidT::compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom,
|
||||
int &host_start, int **ilist, int **jnum,
|
||||
const double cpu_time, bool &success,
|
||||
|
|
|
@ -160,8 +160,8 @@ class BaseEllipsoid {
|
|||
/// Build neighbor list on device
|
||||
void build_nbor_list(const int inum, const int host_inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, bool &success);
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, bool &success);
|
||||
|
||||
/// Pair loop with host neighboring
|
||||
int* compute(const int f_ago, const int inum_full, const int nall,
|
||||
|
@ -173,8 +173,8 @@ class BaseEllipsoid {
|
|||
/// Pair loop with device neighboring
|
||||
int** compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **numj, const double cpu_time, bool &success,
|
||||
double **host_quat);
|
||||
|
|
|
@ -186,8 +186,8 @@ template <class numtyp, class acctyp>
|
|||
inline void BaseThreeT::build_nbor_list(const int inum, const int host_inum,
|
||||
const int nall, double **host_x,
|
||||
int *host_type, double *sublo,
|
||||
double *subhi, int *tag,
|
||||
int **nspecial, int **special,
|
||||
double *subhi, tagint *tag,
|
||||
int **nspecial, tagint **special,
|
||||
bool &success) {
|
||||
success=true;
|
||||
resize_atom(inum,nall,success);
|
||||
|
@ -267,8 +267,8 @@ void BaseThreeT::compute(const int f_ago, const int nlocal, const int nall,
|
|||
template <class numtyp, class acctyp>
|
||||
int ** BaseThreeT::compute(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag,
|
||||
int **nspecial, int **special, const bool eflag,
|
||||
double *sublo, double *subhi, tagint *tag,
|
||||
int **nspecial, tagint **special, const bool eflag,
|
||||
const bool vflag, const bool eatom,
|
||||
const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum,
|
||||
|
|
|
@ -131,8 +131,8 @@ class BaseThree {
|
|||
/// Build neighbor list on device
|
||||
void build_nbor_list(const int inum, const int host_inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, bool &success);
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, bool &success);
|
||||
|
||||
/// Pair loop with host neighboring
|
||||
void compute(const int f_ago, const int inum_full, const int nall,
|
||||
|
@ -144,16 +144,16 @@ class BaseThree {
|
|||
/// Pair loop with device neighboring
|
||||
int * compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success);
|
||||
|
||||
/// Pair loop with device neighboring
|
||||
int ** compute(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **numj, const double cpu_time, bool &success);
|
||||
|
||||
|
|
|
@ -94,8 +94,8 @@ void beck_gpu_clear() {
|
|||
|
||||
int ** beck_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success) {
|
||||
|
|
|
@ -102,8 +102,8 @@ void borncl_gpu_clear() {
|
|||
|
||||
int** borncl_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
|
|
|
@ -104,8 +104,8 @@ void borncw_gpu_clear() {
|
|||
|
||||
int** borncw_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
|
|
|
@ -98,8 +98,8 @@ void born_gpu_clear() {
|
|||
|
||||
int ** born_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success) {
|
||||
|
|
|
@ -101,8 +101,8 @@ void buckc_gpu_clear() {
|
|||
|
||||
int ** buckc_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
|
|
|
@ -100,8 +100,8 @@ void buckcl_gpu_clear() {
|
|||
|
||||
int** buckcl_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
|
|
|
@ -95,8 +95,8 @@ void buck_gpu_clear() {
|
|||
|
||||
int ** buck_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success) {
|
||||
|
|
|
@ -95,8 +95,8 @@ void cmm_gpu_clear() {
|
|||
|
||||
int** cmm_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success) {
|
||||
|
|
|
@ -99,8 +99,8 @@ void cmml_gpu_clear() {
|
|||
|
||||
int** cmml_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
|
|
|
@ -104,8 +104,8 @@ void crml_gpu_clear() {
|
|||
|
||||
int** crml_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
|
|
|
@ -101,8 +101,8 @@ void colloid_gpu_clear() {
|
|||
|
||||
int ** colloid_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success) {
|
||||
|
|
|
@ -95,8 +95,8 @@ void cdsf_gpu_clear() {
|
|||
|
||||
int** cdsf_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
|
|
|
@ -93,8 +93,8 @@ void cl_gpu_clear() {
|
|||
|
||||
int** cl_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
|
|
|
@ -205,7 +205,7 @@ int DeviceT::set_ocl_params(char *ocl_vendor) {
|
|||
" -DBLOCK_CELL_ID="+params[11]+
|
||||
" -DMAX_BIO_SHARED_TYPES="+params[12];
|
||||
}
|
||||
_ocl_compile_string="-cl-fast-relaxed-math -cl-mad-enable "+
|
||||
_ocl_compile_string="-cl-fast-relaxed-math -cl-mad-enable "+std::string(OCL_INT_TYPE)+" "+
|
||||
std::string(OCL_PRECISION_COMPILE)+" "+_ocl_vendor_string;
|
||||
#endif
|
||||
return 0;
|
||||
|
|
|
@ -98,8 +98,8 @@ void dpl_gpu_clear() {
|
|||
|
||||
int** dpl_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double **host_mu,
|
||||
|
|
|
@ -98,8 +98,8 @@ void dplsf_gpu_clear() {
|
|||
|
||||
int** dplsf_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double **host_mu,
|
||||
|
|
|
@ -346,7 +346,7 @@ void EAMT::compute(const int f_ago, const int inum_full, const int nlocal,
|
|||
template <class numtyp, class acctyp>
|
||||
int** EAMT::compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial, int **special,
|
||||
double *subhi, tagint *tag, int **nspecial, tagint **special,
|
||||
const bool eflag, const bool vflag, const bool eatom,
|
||||
const bool vatom, int &host_start, int **ilist, int **jnum,
|
||||
const double cpu_time, bool &success, int &inum,
|
||||
|
|
|
@ -79,8 +79,8 @@ class EAM : public BaseAtomic<numtyp, acctyp> {
|
|||
/// Pair loop with device neighboring
|
||||
int** compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **numj, const double cpu_time, bool &success,
|
||||
int &inum, void **fp_ptr);
|
||||
|
|
|
@ -108,8 +108,8 @@ void eam_gpu_clear() {
|
|||
|
||||
int ** eam_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, int &inum, void **fp_ptr) {
|
||||
|
|
|
@ -94,8 +94,8 @@ void gauss_gpu_clear() {
|
|||
|
||||
int ** gauss_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success) {
|
||||
|
|
|
@ -102,17 +102,17 @@ void gb_gpu_clear() {
|
|||
GBMF.clear();
|
||||
}
|
||||
|
||||
int** compute(const int ago, const int inum_full, const int nall,
|
||||
int** compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **numj, const double cpu_time, bool &success,
|
||||
double **host_quat);
|
||||
|
||||
int** gb_gpu_compute_n(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial, int **special,
|
||||
double *subhi, tagint *tag, int **nspecial, tagint **special,
|
||||
const bool eflag, const bool vflag, const bool eatom,
|
||||
const bool vatom, int &host_start, int **ilist,
|
||||
int **jnum, const double cpu_time, bool &success,
|
||||
|
|
|
@ -94,8 +94,8 @@ void lj96_gpu_clear() {
|
|||
|
||||
int** lj96_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success) {
|
||||
|
|
|
@ -99,8 +99,8 @@ void c2cl_gpu_clear() {
|
|||
|
||||
int** c2cl_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
|
|
|
@ -99,8 +99,8 @@ void ljcd_gpu_clear() {
|
|||
|
||||
int** ljcd_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
|
|
|
@ -98,8 +98,8 @@ void ljc_gpu_clear() {
|
|||
|
||||
int** ljc_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
|
|
|
@ -99,8 +99,8 @@ void ljcl_gpu_clear() {
|
|||
|
||||
int** ljcl_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
|
|
|
@ -101,8 +101,8 @@ void ljcm_gpu_clear() {
|
|||
|
||||
int** ljcm_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
|
|
|
@ -102,8 +102,8 @@ void ljd_gpu_clear() {
|
|||
|
||||
int** ljd_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
|
|
|
@ -95,8 +95,8 @@ void lje_gpu_clear() {
|
|||
|
||||
int** lje_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success) {
|
||||
|
|
|
@ -94,8 +94,8 @@ void ljl_gpu_clear() {
|
|||
|
||||
int ** ljl_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success) {
|
||||
|
|
|
@ -99,8 +99,8 @@ void ljgrm_gpu_clear() {
|
|||
|
||||
int ** ljgrm_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success) {
|
||||
|
|
|
@ -98,8 +98,8 @@ void mie_gpu_clear() {
|
|||
|
||||
int ** mie_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success) {
|
||||
|
|
|
@ -95,8 +95,8 @@ void mor_gpu_clear() {
|
|||
|
||||
int** mor_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success) {
|
||||
|
|
|
@ -389,8 +389,8 @@ void Neighbor::resize_max_neighbors(const int maxn, bool &success) {
|
|||
template <class numtyp, class acctyp>
|
||||
void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
|
||||
const int nall, Atom<numtyp,acctyp> &atom,
|
||||
double *sublo, double *subhi, int *tag,
|
||||
int **nspecial, int **special, bool &success,
|
||||
double *sublo, double *subhi, tagint *tag,
|
||||
int **nspecial, tagint **special, bool &success,
|
||||
int &mn) {
|
||||
_nbor_time_avail=true;
|
||||
const int nt=inum+host_inum;
|
||||
|
@ -423,7 +423,8 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
|
|||
|
||||
if (_maxspecial>0) {
|
||||
time_nbor.start();
|
||||
UCL_H_Vec<int> view_nspecial, view_special, view_tag;
|
||||
UCL_H_Vec<int> view_nspecial;
|
||||
UCL_H_Vec<tagint> view_special, view_tag;
|
||||
view_nspecial.view(nspecial[0],nt*3,*dev);
|
||||
view_special.view(special[0],nt*_maxspecial,*dev);
|
||||
view_tag.view(tag,nall,*dev);
|
||||
|
@ -615,5 +616,5 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
|
|||
template void Neighbor::build_nbor_list<PRECISION,ACC_PRECISION>
|
||||
(double **x, const int inum, const int host_inum, const int nall,
|
||||
Atom<PRECISION,ACC_PRECISION> &atom, double *sublo, double *subhi,
|
||||
int *, int **, int **, bool &success, int &mn);
|
||||
tagint *, int **, tagint **, bool &success, int &mn);
|
||||
|
||||
|
|
|
@ -173,7 +173,7 @@ class Neighbor {
|
|||
template <class numtyp, class acctyp>
|
||||
void build_nbor_list(double **x, const int inum, const int host_inum,
|
||||
const int nall, Atom<numtyp,acctyp> &atom, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial, int **special,
|
||||
double *subhi, tagint *tag, int **nspecial, tagint **special,
|
||||
bool &success, int &max_nbors);
|
||||
|
||||
/// Return the number of bytes used on device
|
||||
|
@ -212,7 +212,7 @@ class Neighbor {
|
|||
/// Device storage for special neighbor counts
|
||||
UCL_D_Vec<int> dev_nspecial;
|
||||
/// Device storage for special neighbors
|
||||
UCL_D_Vec<int> dev_special, dev_special_t;
|
||||
UCL_D_Vec<tagint> dev_special, dev_special_t;
|
||||
/// Host/Device storage for number of particles per cell
|
||||
UCL_Vector<int,int> cell_counts;
|
||||
int *cell_iter;
|
||||
|
|
|
@ -16,6 +16,16 @@
|
|||
|
||||
#ifdef NV_KERNEL
|
||||
#include "lal_preprocessor.h"
|
||||
#ifdef LAMMPS_SMALLBIG
|
||||
#define tagint int
|
||||
#endif
|
||||
#ifdef LAMMPS_BIGBIG
|
||||
#include "inttypes.h"
|
||||
#define tagint int64_t
|
||||
#endif
|
||||
#ifdef LAMMPS_SMALLSMALL
|
||||
#define tagint int
|
||||
#endif
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
#else
|
||||
|
@ -93,15 +103,22 @@ __kernel void kernel_calc_cell_counts(const unsigned *restrict cell_id,
|
|||
|
||||
#else
|
||||
#define pos_tex x_
|
||||
#ifdef LAMMPS_SMALLBIG
|
||||
#define tagint int
|
||||
#endif
|
||||
#ifdef LAMMPS_BIGBIG
|
||||
#define tagint long long int
|
||||
#endif
|
||||
#ifdef LAMMPS_SMALLSMALL
|
||||
#define tagint int
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
__kernel void transpose(__global int *restrict out,
|
||||
const __global int *restrict in,
|
||||
__kernel void transpose(__global tagint *restrict out,
|
||||
const __global tagint *restrict in,
|
||||
int columns_in, int rows_in)
|
||||
{
|
||||
__local int block[BLOCK_CELL_2D][BLOCK_CELL_2D+1];
|
||||
__local tagint block[BLOCK_CELL_2D][BLOCK_CELL_2D+1];
|
||||
|
||||
unsigned ti=THREAD_ID_X;
|
||||
unsigned tj=THREAD_ID_Y;
|
||||
|
@ -239,9 +256,9 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_,
|
|||
__kernel void kernel_special(__global int *dev_nbor,
|
||||
__global int *host_nbor_list,
|
||||
const __global int *host_numj,
|
||||
const __global int *restrict tag,
|
||||
const __global tagint *restrict tag,
|
||||
const __global int *restrict nspecial,
|
||||
const __global int *restrict special,
|
||||
const __global tagint *restrict special,
|
||||
int inum, int nt, int max_nbors, int t_per_atom) {
|
||||
int tid=THREAD_ID_X;
|
||||
int ii=fast_mul((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom);
|
||||
|
@ -275,7 +292,7 @@ __kernel void kernel_special(__global int *dev_nbor,
|
|||
|
||||
for ( ; list<list_end; list+=stride) {
|
||||
int nbor=*list;
|
||||
int jtag=tag[nbor];
|
||||
tagint jtag=tag[nbor];
|
||||
|
||||
int offset=ii;
|
||||
for (int i=0; i<n3; i++) {
|
||||
|
|
|
@ -119,5 +119,23 @@ enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
|
|||
#define OCL_DEFAULT_VENDOR "none"
|
||||
#endif
|
||||
|
||||
// default to 32-bit smallint and other ints, 64-bit bigint: same as defined in src/lmptype.h
|
||||
#if !defined(LAMMPS_SMALLSMALL) && !defined(LAMMPS_BIGBIG) && !defined(LAMMPS_SMALLBIG)
|
||||
#define LAMMPS_SMALLBIG
|
||||
#endif
|
||||
|
||||
#ifdef LAMMPS_SMALLBIG
|
||||
typedef int tagint;
|
||||
#define OCL_INT_TYPE "-DLAMMPS_SMALLBIG"
|
||||
#endif
|
||||
#ifdef LAMMPS_BIGBIG
|
||||
#include "inttypes.h"
|
||||
typedef int64_t tagint;
|
||||
#define OCL_INT_TYPE "-DLAMMPS_BIGBIG"
|
||||
#endif
|
||||
#ifdef LAMMPS_SMALLSMALL
|
||||
typedef int tagint;
|
||||
#define OCL_INT_TYPE "-DLAMMPS_SMALLSMALL"
|
||||
#endif
|
||||
|
||||
#endif // LAL_PRECISION_H
|
||||
|
|
|
@ -475,3 +475,7 @@ ucl_inline int sbmask(int j) { return j >> SBBITS & 3; };
|
|||
#define BLOCK_ELLIPSE BLOCK_PAIR
|
||||
#endif
|
||||
|
||||
// default to 32-bit smallint and other ints, 64-bit bigint: same as defined in src/lmptype.h
|
||||
#if !defined(LAMMPS_SMALLSMALL) && !defined(LAMMPS_BIGBIG) && !defined(LAMMPS_SMALLBIG)
|
||||
#define LAMMPS_SMALLBIG
|
||||
#endif
|
||||
|
|
|
@ -101,15 +101,15 @@ void re_gpu_clear() {
|
|||
|
||||
int** compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **numj, const double cpu_time, bool &success,
|
||||
double **host_quat);
|
||||
|
||||
int** re_gpu_compute_n(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial, int **special,
|
||||
double *subhi, tagint *tag, int **nspecial, tagint **special,
|
||||
const bool eflag, const bool vflag, const bool eatom,
|
||||
const bool vatom, int &host_start, int **ilist,
|
||||
int **jnum, const double cpu_time, bool &success,
|
||||
|
|
|
@ -94,8 +94,8 @@ void soft_gpu_clear() {
|
|||
|
||||
int ** soft_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success) {
|
||||
|
|
|
@ -393,8 +393,6 @@ __kernel void k_sw_three_center(const __global numtyp4 *restrict x_,
|
|||
if (rsq1 > sw3_ijparam.y) continue;
|
||||
|
||||
numtyp4 sw1_ijparam; fetch4(sw1_ijparam,ijparam,sw1_tex);
|
||||
sw_sigma=sw1_ijparam.y;
|
||||
sw_gamma=sw1_ijparam.w;
|
||||
sw_sigma_gamma_ij=sw1_ijparam.y*sw1_ijparam.w; //sw_sigma*sw_gamma;
|
||||
sw_cut_ij=sw3_ijparam.x;
|
||||
|
||||
|
@ -418,15 +416,11 @@ __kernel void k_sw_three_center(const __global numtyp4 *restrict x_,
|
|||
numtyp rsq2 = delr2x*delr2x + delr2y*delr2y + delr2z*delr2z;
|
||||
if (rsq2 < sw3_ikparam.y) { // sw_cutsq=sw3[ikparam].y;
|
||||
numtyp4 sw1_ikparam; fetch4(sw1_ikparam,ikparam,sw1_tex);
|
||||
sw_sigma=sw1_ikparam.y;
|
||||
sw_gamma=sw1_ikparam.w;
|
||||
sw_sigma_gamma_ik=sw1_ikparam.y*sw1_ikparam.w; //sw_sigma*sw_gamma;
|
||||
sw_cut_ik=sw3_ikparam.x;
|
||||
|
||||
int ijkparam=elem2param[itype*nelements*nelements+jtype*nelements+ktype];
|
||||
numtyp4 sw1_ijkparam; fetch4(sw1_ijkparam,ijkparam,sw1_tex);
|
||||
sw_epsilon=sw1_ijkparam.x;
|
||||
sw_lambda=sw1_ijkparam.z;
|
||||
sw_lambda_epsilon_ijk=sw1_ijkparam.x*sw1_ijkparam.z; //sw_lambda*sw_epsilon;
|
||||
sw_lambda_epsilon2_ijk=(numtyp)2.0*sw_lambda_epsilon_ijk;
|
||||
numtyp4 sw3_ijkparam; fetch4(sw3_ijkparam,ijkparam,sw3_tex);
|
||||
|
@ -522,8 +516,6 @@ __kernel void k_sw_three_end(const __global numtyp4 *restrict x_,
|
|||
if (rsq1 > sw3_ijparam.y) continue;
|
||||
|
||||
numtyp4 sw1_ijparam; fetch4(sw1_ijparam,ijparam,sw1_tex);
|
||||
sw_sigma=sw1_ijparam.y;
|
||||
sw_gamma=sw1_ijparam.w;
|
||||
sw_sigma_gamma_ij=sw1_ijparam.y*sw1_ijparam.w; //sw_sigma*sw_gamma;
|
||||
sw_cut_ij=sw3_ijparam.x;
|
||||
|
||||
|
@ -559,15 +551,11 @@ __kernel void k_sw_three_end(const __global numtyp4 *restrict x_,
|
|||
|
||||
if (rsq2 < sw3_ikparam.y) {
|
||||
numtyp4 sw1_ikparam; fetch4(sw1_ikparam,ikparam,sw1_tex);
|
||||
sw_sigma=sw1_ikparam.y;
|
||||
sw_gamma=sw1_ikparam.w;
|
||||
sw_sigma_gamma_ik=sw1_ikparam.y*sw1_ikparam.w; //sw_sigma*sw_gamma;
|
||||
sw_cut_ik=sw3_ikparam.x;
|
||||
|
||||
int ijkparam=elem2param[itype*nelements*nelements+jtype*nelements+ktype];
|
||||
numtyp4 sw1_ijkparam; fetch4(sw1_ijkparam,ijkparam,sw1_tex);
|
||||
sw_epsilon=sw1_ijkparam.x;
|
||||
sw_lambda=sw1_ijkparam.z;
|
||||
sw_lambda_epsilon_ijk=sw1_ijkparam.x*sw1_ijkparam.z; //sw_lambda*sw_epsilon;
|
||||
sw_lambda_epsilon2_ijk=(numtyp)2.0*sw_lambda_epsilon_ijk;
|
||||
numtyp4 sw3_ijkparam; fetch4(sw3_ijkparam,ijkparam,sw3_tex);
|
||||
|
@ -663,8 +651,6 @@ __kernel void k_sw_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||
if (rsq1 > sw3_ijparam.y) continue;
|
||||
|
||||
numtyp4 sw1_ijparam; fetch4(sw1_ijparam,ijparam,sw1_tex);
|
||||
sw_sigma=sw1_ijparam.y;
|
||||
sw_gamma=sw1_ijparam.w;
|
||||
sw_sigma_gamma_ij=sw1_ijparam.y*sw1_ijparam.w; //sw_sigma*sw_gamma;
|
||||
sw_cut_ij=sw3_ijparam.x;
|
||||
|
||||
|
@ -700,15 +686,11 @@ __kernel void k_sw_three_end_vatom(const __global numtyp4 *restrict x_,
|
|||
|
||||
if (rsq2 < sw3_ikparam.y) {
|
||||
numtyp4 sw1_ikparam; fetch4(sw1_ikparam,ikparam,sw1_tex);
|
||||
sw_sigma=sw1_ikparam.y;
|
||||
sw_gamma=sw1_ikparam.w;
|
||||
sw_sigma_gamma_ik=sw1_ikparam.y*sw1_ikparam.w; //sw_sigma*sw_gamma;
|
||||
sw_cut_ik=sw3_ikparam.x;
|
||||
|
||||
int ijkparam=elem2param[itype*nelements*nelements+jtype*nelements+ktype];
|
||||
numtyp4 sw1_ijkparam; fetch4(sw1_ijkparam,ijkparam,sw1_tex);
|
||||
sw_epsilon=sw1_ijkparam.x;
|
||||
sw_lambda=sw1_ijkparam.z;
|
||||
sw_lambda_epsilon_ijk=sw1_ijkparam.x*sw1_ijkparam.z; //sw_lambda*sw_epsilon;
|
||||
sw_lambda_epsilon2_ijk=(numtyp)2.0*sw_lambda_epsilon_ijk;
|
||||
numtyp4 sw3_ijkparam; fetch4(sw3_ijkparam,ijkparam,sw3_tex);
|
||||
|
|
|
@ -105,8 +105,8 @@ void sw_gpu_clear() {
|
|||
|
||||
int ** sw_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success) {
|
||||
|
|
|
@ -94,8 +94,8 @@ void table_gpu_clear() {
|
|||
|
||||
int ** table_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success) {
|
||||
|
|
|
@ -189,8 +189,8 @@ void YukawaColloidT::compute(const int f_ago, const int inum_full,
|
|||
template <class numtyp, class acctyp>
|
||||
int** YukawaColloidT::compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time, bool &success,
|
||||
double *rad) {
|
||||
|
|
|
@ -82,8 +82,8 @@ class YukawaColloid : public BaseAtomic<numtyp, acctyp> {
|
|||
/// Pair loop with device neighboring
|
||||
int** compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *rad);
|
||||
|
|
|
@ -94,8 +94,8 @@ void ykcolloid_gpu_clear() {
|
|||
|
||||
int ** ykcolloid_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_rad) {
|
||||
|
|
|
@ -94,8 +94,8 @@ void yukawa_gpu_clear() {
|
|||
|
||||
int ** yukawa_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, int *tag, int **nspecial,
|
||||
int **special, const bool eflag, const bool vflag,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success) {
|
||||
|
|
Loading…
Reference in New Issue