mirror of https://github.com/lammps/lammps.git
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7283 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
2e2d4c0b81
commit
2728aab0e5
|
@ -146,6 +146,10 @@ void Neighbor::alloc(bool &success) {
|
|||
ptr+=_max_nbors;
|
||||
}
|
||||
_c_bytes+=dev_host_nbor.row_bytes()+dev_host_numj.row_bytes();
|
||||
} else {
|
||||
// Some OpenCL implementations return errors for NULL pointers as args
|
||||
dev_host_nbor.view(dev_nbor);
|
||||
dev_host_numj.view(dev_nbor);
|
||||
}
|
||||
if (_maxspecial>0) {
|
||||
dev_nspecial.clear();
|
||||
|
@ -460,7 +464,8 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
|
|||
ptr+=mn;
|
||||
}
|
||||
_gpu_bytes+=dev_host_nbor.row_bytes();
|
||||
}
|
||||
} else
|
||||
dev_host_nbor.view(dev_nbor);
|
||||
if (_alloc_packed) {
|
||||
dev_packed.clear();
|
||||
success=success && (dev_packed.alloc((mn+2)*_max_atoms,*dev,
|
||||
|
|
|
@ -200,7 +200,7 @@ __kernel void calc_neigh_list_cell(__global numtyp4 *x_,
|
|||
r2 = diff.x*diff.x + diff.y*diff.y + diff.z*diff.z;
|
||||
if (r2 < cell_size*cell_size && r2 > 1e-5) {
|
||||
cnt++;
|
||||
if (cnt < neigh_bin_size) {
|
||||
if (cnt <= neigh_bin_size) {
|
||||
*neigh_list = pid_j;
|
||||
neigh_list++;
|
||||
if ((cnt & (t_per_atom-1))==0)
|
||||
|
|
|
@ -50,7 +50,8 @@ void NeighborShared::compile_kernels(UCL_Device &dev, const int gpu_nbor) {
|
|||
return;
|
||||
|
||||
_gpu_nbor=gpu_nbor;
|
||||
std::string flags="-cl-fast-relaxed-math -cl-mad-enable -D"+
|
||||
std::string flags="-cl-fast-relaxed-math -cl-mad-enable "+
|
||||
std::string(OCL_PRECISION_COMPILE)+" -D"+
|
||||
std::string(OCL_VENDOR);
|
||||
|
||||
if (_gpu_nbor==0) {
|
||||
|
|
|
@ -67,6 +67,22 @@
|
|||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#define GLOBAL_ID_X threadIdx.x+mul24(blockIdx.x,blockDim.x)
|
||||
#define GLOBAL_ID_Y threadIdx.y+mul24(blockIdx.y,blockDim.y)
|
||||
#define GLOBAL_SIZE_X mul24(gridDim.x,blockDim.x);
|
||||
#define GLOBAL_SIZE_Y mul24(gridDim.y,blockDim.y);
|
||||
#define THREAD_ID_X threadIdx.x
|
||||
#define THREAD_ID_Y threadIdx.y
|
||||
#define BLOCK_ID_X blockIdx.x
|
||||
#define BLOCK_ID_Y blockIdx.y
|
||||
#define BLOCK_SIZE_X blockDim.x
|
||||
#define BLOCK_SIZE_Y blockDim.y
|
||||
#define __kernel extern "C" __global__
|
||||
#define __local __shared__
|
||||
#define __global
|
||||
#define atom_add atomicAdd
|
||||
#define ucl_inline static __inline__ __device__
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
#define ARCH __CUDA_ARCH__
|
||||
#else
|
||||
|
@ -120,24 +136,7 @@ struct __builtin_align__(16) _double4
|
|||
typedef struct _double4 double4;
|
||||
#endif
|
||||
|
||||
#define GLOBAL_ID_X threadIdx.x+mul24(blockIdx.x,blockDim.x)
|
||||
#define GLOBAL_ID_Y threadIdx.y+mul24(blockIdx.y,blockDim.y)
|
||||
#define GLOBAL_SIZE_X mul24(gridDim.x,blockDim.x);
|
||||
#define GLOBAL_SIZE_Y mul24(gridDim.y,blockDim.y);
|
||||
#define THREAD_ID_X threadIdx.x
|
||||
#define THREAD_ID_Y threadIdx.y
|
||||
#define BLOCK_ID_X blockIdx.x
|
||||
#define BLOCK_ID_Y blockIdx.y
|
||||
#define BLOCK_SIZE_X blockDim.x
|
||||
#define BLOCK_SIZE_Y blockDim.y
|
||||
#define __kernel extern "C" __global__
|
||||
#define __local __shared__
|
||||
#define __global
|
||||
#define atom_add atomicAdd
|
||||
#define ucl_inline static __inline__ __device__
|
||||
|
||||
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
|
||||
#define ucl_exp exp
|
||||
#define ucl_powr pow
|
||||
|
@ -156,20 +155,18 @@ typedef struct _double4 double4;
|
|||
#define ucl_ceil ceilf
|
||||
#define ucl_abs fabsf
|
||||
#define ucl_recip(x) ((numtyp)1.0/(x))
|
||||
#define ucl_rsqrt rsqrtf
|
||||
#define ucl_sqrt sqrtf
|
||||
|
||||
#ifdef NO_HARDWARE_TRANSCENDENTALS
|
||||
|
||||
#define ucl_exp expf
|
||||
#define ucl_powr powf
|
||||
#define ucl_rsqrt rsqrtf
|
||||
#define ucl_sqrt sqrtf
|
||||
|
||||
#else
|
||||
|
||||
#define ucl_exp __expf
|
||||
#define ucl_powr __powf
|
||||
#define ucl_rsqrt __rsqrtf
|
||||
#define ucl_sqrt __sqrtf
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -255,6 +252,10 @@ typedef struct _double4 double4;
|
|||
#define ucl_ceil ceil
|
||||
#define ucl_abs fabs
|
||||
|
||||
#ifdef _DOUBLE_DOUBLE
|
||||
#define NO_HARDWARE_TRANSCENDENTALS
|
||||
#endif
|
||||
|
||||
#ifdef NO_HARDWARE_TRANSCENDENTALS
|
||||
|
||||
#define ucl_exp exp
|
||||
|
|
Loading…
Reference in New Issue