forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6136 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
38acfd9fad
commit
14a348470c
|
@ -20,7 +20,11 @@
|
|||
CUDA_HOME = /usr/local/cuda
|
||||
NVCC = nvcc
|
||||
|
||||
# newer CUDA
|
||||
CUDA_ARCH = -arch=sm_13
|
||||
# older CUDA
|
||||
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
|
||||
|
||||
CUDA_PRECISION = -D_SINGLE_SINGLE
|
||||
CUDA_INCLUDE = -I$(CUDA_HOME)/include
|
||||
CUDA_LIB = -L$(CUDA_HOME)/lib64
|
||||
|
|
|
@ -33,13 +33,17 @@ NOTE: Installation of the CUDA SDK is not required.
|
|||
|
||||
Current pair styles supporting GPU acceleration:
|
||||
|
||||
1. lj/cut/gpu
|
||||
2. lj/cut/coul/cut/gpu
|
||||
3. lj/cut/coul/long/gpu
|
||||
4. lj96/cut/gpu
|
||||
5. gayberne/gpu
|
||||
6. cmm/cg/gpu
|
||||
7. cmm/cg/coul/long/gpu
|
||||
1. lj/cut
|
||||
2. lj96/cut
|
||||
3. lj/expand
|
||||
4. lj/cut/coul/cut
|
||||
5. lj/cut/coul/long
|
||||
6. lj/charmm/coul/long
|
||||
7. morse
|
||||
8. cg/cmm
|
||||
9. cg/cmm/coul/long
|
||||
10. gayberne
|
||||
11. pppm
|
||||
|
||||
MULTIPLE LAMMPS PROCESSES
|
||||
|
||||
|
@ -52,12 +56,12 @@ LAMMPS user manual for details on running with GPU acceleration.
|
|||
|
||||
BUILDING AND PRECISION MODES
|
||||
|
||||
To build, edit the CUDA_ARCH, CUDA_PRECISION, CUDA_HOME, NVCC, CUDA_INCLUD,
|
||||
CUDA_LIB and CUDA_OPTS variables in one of the Makefiles. CUDA_ARCH should
|
||||
be set based on the compute capability of your GPU. This can be verified by
|
||||
running the nvc_get_devices executable after the build is complete.
|
||||
Additionally, the GPU package must be installed and compiled for LAMMPS.
|
||||
This may require editing the gpu_SYSPATH variable in the LAMMPS makefile.
|
||||
To build, edit the CUDA_ARCH, CUDA_PRECISION, CUDA_HOME variables in one of
|
||||
the Makefiles. CUDA_ARCH should be set based on the compute capability of
|
||||
your GPU. This can be verified by running the nvc_get_devices executable after
|
||||
the build is complete. Additionally, the GPU package must be installed and
|
||||
compiled for LAMMPS. This may require editing the gpu_SYSPATH variable in the
|
||||
LAMMPS makefile.
|
||||
|
||||
Please note that the GPU library accesses the CUDA driver library directly,
|
||||
so it needs to be linked not only to the CUDA runtime library (libcudart.so)
|
||||
|
@ -74,6 +78,10 @@ the CUDA_PRECISION variable:
|
|||
CUDA_PREC = -D_DOUBLE_DOUBLE # Double precision for all calculations
|
||||
CUDA_PREC = -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double
|
||||
|
||||
NOTE: PPPM acceleration can only be run on GPUs with compute capability>=1.1.
|
||||
You will get the error "GPU library not compiled for this accelerator."
|
||||
when attempting to run PPPM on a GPU with compute capability 1.0.
|
||||
|
||||
NOTE: Double precision is only supported on certain GPUs (with
|
||||
compute capability>=1.3).
|
||||
|
||||
|
@ -83,15 +91,17 @@ NOTE: For Tesla and other graphics cards with compute capability>=1.3,
|
|||
NOTE: For Fermi, make sure that -arch=sm_20 is set on the CUDA_ARCH line.
|
||||
|
||||
NOTE: The gayberne/gpu pair style will only be installed if the ASPHERE
|
||||
package has been installed before installing the GPU package in LAMMPS.
|
||||
package has been installed.
|
||||
|
||||
NOTE: The cg/cmm/gpu and cg/cmm/coul/long/gpu pair styles will only be
|
||||
installed if the USER-CG-CMM package has been installed before
|
||||
installing the GPU package in LAMMPS.
|
||||
installed if the USER-CG-CMM package has been installed.
|
||||
|
||||
NOTE: The lj/cut/coul/long/gpu and cg/cmm/coul/long/gpu style will only be
|
||||
installed if the KSPACE package has been installed before installing
|
||||
the GPU package in LAMMPS.
|
||||
NOTE: The lj/cut/coul/long/gpu, cg/cmm/coul/long/gpu, pppm/gpu/single, and
|
||||
pppm/gpu/double styles will only be installed if the KSPACE package has
|
||||
been installed.
|
||||
|
||||
NOTE: The lj/charmm/coul/long will only be installed if the MOLECULE package
|
||||
has been installed.
|
||||
|
||||
EXAMPLE BUILD PROCESS
|
||||
|
||||
|
@ -105,7 +115,3 @@ make yes-asphere
|
|||
make yes-kspace
|
||||
make yes-gpu
|
||||
make linux
|
||||
|
||||
------------------------------------------------------------------------
|
||||
Last merge with gpulammps: r561 on 2010-11-12
|
||||
------------------------------------------------------------------------
|
||||
|
|
|
@ -18,30 +18,6 @@
|
|||
#ifndef CMM_GPU_KERNEL
|
||||
#define CMM_GPU_KERNEL
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "nv_kernel_def.h"
|
||||
|
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#define SBBITS 30
|
||||
#define NEIGHMASK 0x3FFFFFFF
|
||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||
|
|
|
@ -18,38 +18,6 @@
|
|||
#ifndef CMML_GPU_KERNEL
|
||||
#define CMML_GPU_KERNEL
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#define EWALD_F (numtyp)1.12837917
|
||||
#define EWALD_P (numtyp)0.3275911
|
||||
#define A1 (numtyp)0.254829592
|
||||
#define A2 (numtyp)-0.284496736
|
||||
#define A3 (numtyp)1.421413741
|
||||
#define A4 (numtyp)-1.453152027
|
||||
#define A5 (numtyp)1.061405429
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "nv_kernel_def.h"
|
||||
|
@ -93,6 +61,38 @@ __inline float fetch_q(const int& i, const float *q)
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#define EWALD_F (numtyp)1.12837917
|
||||
#define EWALD_P (numtyp)0.3275911
|
||||
#define A1 (numtyp)0.254829592
|
||||
#define A2 (numtyp)-0.284496736
|
||||
#define A3 (numtyp)1.421413741
|
||||
#define A4 (numtyp)-1.453152027
|
||||
#define A5 (numtyp)1.061405429
|
||||
|
||||
#define SBBITS 30
|
||||
#define NEIGHMASK 0x3FFFFFFF
|
||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||
|
|
|
@ -18,40 +18,6 @@
|
|||
#ifndef CRML_GPU_KERNEL
|
||||
#define CRML_GPU_KERNEL
|
||||
|
||||
#define MAX_BIO_SHARED_TYPES 128
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#define EWALD_F (numtyp)1.12837917
|
||||
#define EWALD_P (numtyp)0.3275911
|
||||
#define A1 (numtyp)0.254829592
|
||||
#define A2 (numtyp)-0.284496736
|
||||
#define A3 (numtyp)1.421413741
|
||||
#define A4 (numtyp)-1.453152027
|
||||
#define A5 (numtyp)1.061405429
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "nv_kernel_def.h"
|
||||
|
@ -94,6 +60,40 @@ __inline float fetch_q(const int& i, const float *q)
|
|||
|
||||
#endif
|
||||
|
||||
#define MAX_BIO_SHARED_TYPES 128
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#define EWALD_F (numtyp)1.12837917
|
||||
#define EWALD_P (numtyp)0.3275911
|
||||
#define A1 (numtyp)0.254829592
|
||||
#define A2 (numtyp)-0.284496736
|
||||
#define A3 (numtyp)1.421413741
|
||||
#define A4 (numtyp)-1.453152027
|
||||
#define A5 (numtyp)1.061405429
|
||||
|
||||
#define SBBITS 30
|
||||
#define NEIGHMASK 0x3FFFFFFF
|
||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||
|
|
|
@ -18,16 +18,6 @@
|
|||
#ifndef PAIR_GPU_KERNEL_H
|
||||
#define PAIR_GPU_KERNEL_H
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#else
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#endif
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "nv_kernel_def.h"
|
||||
|
@ -44,6 +34,16 @@
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#else
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#endif
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Unpack neighbors from dev_ij array into dev_nbor matrix for coalesced access
|
||||
// -- Only unpack neighbors matching the specified inclusive range of forms
|
||||
|
|
|
@ -33,6 +33,14 @@
|
|||
#define MEM_THREADS 32
|
||||
#endif
|
||||
|
||||
#ifdef CUDA_PRE_THREE
|
||||
struct __builtin_align__(16) _double4
|
||||
{
|
||||
double x, y, z, w;
|
||||
};
|
||||
typedef struct _double4 double4;
|
||||
#endif
|
||||
|
||||
#define GLOBAL_ID_X threadIdx.x+mul24(blockIdx.x,blockDim.x)
|
||||
#define GLOBAL_ID_Y threadIdx.y+mul24(blockIdx.y,blockDim.y)
|
||||
#define GLOBAL_SIZE_X mul24(gridDim.x,blockDim.x);
|
||||
|
|
|
@ -18,30 +18,6 @@
|
|||
#ifndef LJ96_GPU_KERNEL
|
||||
#define LJ96_GPU_KERNEL
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "nv_kernel_def.h"
|
||||
|
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#define SBBITS 30
|
||||
#define NEIGHMASK 0x3FFFFFFF
|
||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||
|
|
|
@ -18,30 +18,6 @@
|
|||
#ifndef LJ_GPU_KERNEL
|
||||
#define LJ_GPU_KERNEL
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "nv_kernel_def.h"
|
||||
|
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#define SBBITS 30
|
||||
#define NEIGHMASK 0x3FFFFFFF
|
||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||
|
|
|
@ -18,30 +18,6 @@
|
|||
#ifndef LJE_GPU_KERNEL
|
||||
#define LJE_GPU_KERNEL
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "nv_kernel_def.h"
|
||||
|
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#define SBBITS 30
|
||||
#define NEIGHMASK 0x3FFFFFFF
|
||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||
|
|
|
@ -18,30 +18,6 @@
|
|||
#ifndef LJC_GPU_KERNEL
|
||||
#define LJC_GPU_KERNEL
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "nv_kernel_def.h"
|
||||
|
@ -85,6 +61,30 @@ __inline float fetch_q(const int& i, const float *q)
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#define SBBITS 30
|
||||
#define NEIGHMASK 0x3FFFFFFF
|
||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||
|
|
|
@ -18,38 +18,6 @@
|
|||
#ifndef LJCL_GPU_KERNEL
|
||||
#define LJCL_GPU_KERNEL
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#define EWALD_F (numtyp)1.12837917
|
||||
#define EWALD_P (numtyp)0.3275911
|
||||
#define A1 (numtyp)0.254829592
|
||||
#define A2 (numtyp)-0.284496736
|
||||
#define A3 (numtyp)1.421413741
|
||||
#define A4 (numtyp)-1.453152027
|
||||
#define A5 (numtyp)1.061405429
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "nv_kernel_def.h"
|
||||
|
@ -93,6 +61,38 @@ __inline float fetch_q(const int& i, const float *q)
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#define EWALD_F (numtyp)1.12837917
|
||||
#define EWALD_P (numtyp)0.3275911
|
||||
#define A1 (numtyp)0.254829592
|
||||
#define A2 (numtyp)-0.284496736
|
||||
#define A3 (numtyp)1.421413741
|
||||
#define A4 (numtyp)-1.453152027
|
||||
#define A5 (numtyp)1.061405429
|
||||
|
||||
#define SBBITS 30
|
||||
#define NEIGHMASK 0x3FFFFFFF
|
||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||
|
|
|
@ -18,30 +18,6 @@
|
|||
#ifndef MORSE_GPU_KERNEL
|
||||
#define MORSE_GPU_KERNEL
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "nv_kernel_def.h"
|
||||
|
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp2 double2
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp2 float2
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#define SBBITS 30
|
||||
#define NEIGHMASK 0x3FFFFFFF
|
||||
__inline int sbmask(int j) { return j >> SBBITS & 3; }
|
||||
|
|
|
@ -15,6 +15,13 @@
|
|||
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#include "geryon/ucl_nv_kernel.h"
|
||||
#else
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64: enable
|
||||
#define GLOBAL_ID_X get_global_id(0)
|
||||
#endif
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp4 double4
|
||||
|
@ -23,13 +30,6 @@
|
|||
#define numtyp4 float4
|
||||
#endif
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#include "geryon/ucl_nv_kernel.h"
|
||||
#else
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64: enable
|
||||
#define GLOBAL_ID_X get_global_id(0)
|
||||
#endif
|
||||
|
||||
__kernel void kernel_cast_x(__global numtyp4 *x_type, __global double *x,
|
||||
__global int *type, const int nall) {
|
||||
int ii=GLOBAL_ID_X;
|
||||
|
|
|
@ -549,8 +549,9 @@ int PairGPUDeviceT::compile_kernels() {
|
|||
k_info.run(&d_gpu_lib_data.begin());
|
||||
ucl_copy(h_gpu_lib_data,d_gpu_lib_data,false);
|
||||
|
||||
_ptx_arch=static_cast<double>(h_gpu_lib_data[0])/100.0;
|
||||
#ifndef USE_OPENCL
|
||||
if (static_cast<double>(h_gpu_lib_data[0])/100.0>gpu->arch())
|
||||
if (_ptx_arch>gpu->arch())
|
||||
return -4;
|
||||
#endif
|
||||
|
||||
|
|
|
@ -226,6 +226,8 @@ class PairGPUDevice {
|
|||
inline int block_bio_pair() const { return _block_bio_pair; }
|
||||
/// Return the maximum number of atom types for shared mem with "bio" styles
|
||||
inline int max_bio_shared_types() const { return _max_bio_shared_types; }
|
||||
/// Architecture gpu code compiled for (returns 0 for OpenCL)
|
||||
inline double ptx_arch() const { return _ptx_arch; }
|
||||
|
||||
// -------------------- SHARED DEVICE ROUTINES --------------------
|
||||
// Perform asynchronous zero of integer array
|
||||
|
@ -281,6 +283,7 @@ class PairGPUDevice {
|
|||
int _gpu_mode, _first_device, _last_device, _nthreads;
|
||||
double _particle_split;
|
||||
double _cpu_full;
|
||||
double _ptx_arch;
|
||||
|
||||
int _num_mem_threads, _warp_size, _threads_per_atom, _threads_per_charge;
|
||||
int _pppm_max_spline, _pppm_block;
|
||||
|
|
|
@ -18,27 +18,6 @@
|
|||
#ifndef PPPM_GPU_KERNEL
|
||||
#define PPPM_GPU_KERNEL
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "geryon/ucl_nv_kernel.h"
|
||||
|
@ -67,6 +46,12 @@ __inline float fetch_q(const int& i, const float *q)
|
|||
|
||||
#endif
|
||||
|
||||
// Allow PPPM to compile without atomics for NVIDIA 1.0 cards, error
|
||||
// generated at runtime with use of pppm/gpu
|
||||
#if (__CUDA_ARCH__ < 110)
|
||||
#define atom_add(x,y) 0
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64: enable
|
||||
|
@ -85,6 +70,27 @@ __inline float fetch_q(const int& i, const float *q)
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define numtyp double
|
||||
#define numtyp4 double4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifdef _SINGLE_DOUBLE
|
||||
#define numtyp float
|
||||
#define numtyp4 float4
|
||||
#define acctyp double
|
||||
#define acctyp4 double4
|
||||
#endif
|
||||
|
||||
#ifndef numtyp
|
||||
#define numtyp float
|
||||
#define numtyp4 float4
|
||||
#define acctyp float
|
||||
#define acctyp4 float4
|
||||
#endif
|
||||
|
||||
// Maximum order for spline
|
||||
#define PPPM_MAX_SPLINE 8
|
||||
// Thread block size for PPPM kernels
|
||||
|
|
|
@ -66,7 +66,11 @@ grdtyp * PPPMGPUMemoryT::init(const int nlocal, const int nall, FILE *_screen,
|
|||
flag=-5;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (device->ptx_arch()>0.0 && device->ptx_arch()<1.1) {
|
||||
flag=-4;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ucl_device=device->gpu;
|
||||
atom=&device->atom;
|
||||
|
||||
|
|
Loading…
Reference in New Issue