git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6136 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2011-05-18 20:03:24 +00:00
parent 38acfd9fad
commit 14a348470c
18 changed files with 337 additions and 305 deletions

View File

@ -20,7 +20,11 @@
CUDA_HOME = /usr/local/cuda
NVCC = nvcc
# newer CUDA
CUDA_ARCH = -arch=sm_13
# older CUDA
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
CUDA_PRECISION = -D_SINGLE_SINGLE
CUDA_INCLUDE = -I$(CUDA_HOME)/include
CUDA_LIB = -L$(CUDA_HOME)/lib64

View File

@ -33,13 +33,17 @@ NOTE: Installation of the CUDA SDK is not required.
Current pair styles supporting GPU acceleration:
1. lj/cut/gpu
2. lj/cut/coul/cut/gpu
3. lj/cut/coul/long/gpu
4. lj96/cut/gpu
5. gayberne/gpu
6. cmm/cg/gpu
7. cmm/cg/coul/long/gpu
1. lj/cut
2. lj96/cut
3. lj/expand
4. lj/cut/coul/cut
5. lj/cut/coul/long
6. lj/charmm/coul/long
7. morse
8. cg/cmm
9. cg/cmm/coul/long
10. gayberne
11. pppm
MULTIPLE LAMMPS PROCESSES
@ -52,12 +56,12 @@ LAMMPS user manual for details on running with GPU acceleration.
BUILDING AND PRECISION MODES
To build, edit the CUDA_ARCH, CUDA_PRECISION, CUDA_HOME, NVCC, CUDA_INCLUD,
CUDA_LIB and CUDA_OPTS variables in one of the Makefiles. CUDA_ARCH should
be set based on the compute capability of your GPU. This can be verified by
running the nvc_get_devices executable after the build is complete.
Additionally, the GPU package must be installed and compiled for LAMMPS.
This may require editing the gpu_SYSPATH variable in the LAMMPS makefile.
To build, edit the CUDA_ARCH, CUDA_PRECISION, CUDA_HOME variables in one of
the Makefiles. CUDA_ARCH should be set based on the compute capability of
your GPU. This can be verified by running the nvc_get_devices executable after
the build is complete. Additionally, the GPU package must be installed and
compiled for LAMMPS. This may require editing the gpu_SYSPATH variable in the
LAMMPS makefile.
Please note that the GPU library accesses the CUDA driver library directly,
so it needs to be linked not only to the CUDA runtime library (libcudart.so)
@ -74,6 +78,10 @@ the CUDA_PRECISION variable:
CUDA_PREC = -D_DOUBLE_DOUBLE # Double precision for all calculations
CUDA_PREC = -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double
NOTE: PPPM acceleration can only be run on GPUs with compute capability>=1.1.
You will get the error "GPU library not compiled for this accelerator."
when attempting to run PPPM on a GPU with compute capability 1.0.
NOTE: Double precision is only supported on certain GPUs (with
compute capability>=1.3).
@ -83,15 +91,17 @@ NOTE: For Tesla and other graphics cards with compute capability>=1.3,
NOTE: For Fermi, make sure that -arch=sm_20 is set on the CUDA_ARCH line.
NOTE: The gayberne/gpu pair style will only be installed if the ASPHERE
package has been installed before installing the GPU package in LAMMPS.
package has been installed.
NOTE: The cg/cmm/gpu and cg/cmm/coul/long/gpu pair styles will only be
installed if the USER-CG-CMM package has been installed before
installing the GPU package in LAMMPS.
installed if the USER-CG-CMM package has been installed.
NOTE: The lj/cut/coul/long/gpu and cg/cmm/coul/long/gpu style will only be
installed if the KSPACE package has been installed before installing
the GPU package in LAMMPS.
NOTE: The lj/cut/coul/long/gpu, cg/cmm/coul/long/gpu, pppm/gpu/single, and
pppm/gpu/double styles will only be installed if the KSPACE package has
been installed.
NOTE: The lj/charmm/coul/long will only be installed if the MOLECULE package
has been installed.
EXAMPLE BUILD PROCESS
@ -105,7 +115,3 @@ make yes-asphere
make yes-kspace
make yes-gpu
make linux
------------------------------------------------------------------------
Last merge with gpulammps: r561 on 2010-11-12
------------------------------------------------------------------------

View File

@ -18,30 +18,6 @@
#ifndef CMM_GPU_KERNEL
#define CMM_GPU_KERNEL
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#ifdef NV_KERNEL
#include "nv_kernel_def.h"
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
#endif
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }

View File

@ -18,38 +18,6 @@
#ifndef CMML_GPU_KERNEL
#define CMML_GPU_KERNEL
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#define EWALD_F (numtyp)1.12837917
#define EWALD_P (numtyp)0.3275911
#define A1 (numtyp)0.254829592
#define A2 (numtyp)-0.284496736
#define A3 (numtyp)1.421413741
#define A4 (numtyp)-1.453152027
#define A5 (numtyp)1.061405429
#ifdef NV_KERNEL
#include "nv_kernel_def.h"
@ -93,6 +61,38 @@ __inline float fetch_q(const int& i, const float *q)
#endif
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#define EWALD_F (numtyp)1.12837917
#define EWALD_P (numtyp)0.3275911
#define A1 (numtyp)0.254829592
#define A2 (numtyp)-0.284496736
#define A3 (numtyp)1.421413741
#define A4 (numtyp)-1.453152027
#define A5 (numtyp)1.061405429
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }

View File

@ -18,40 +18,6 @@
#ifndef CRML_GPU_KERNEL
#define CRML_GPU_KERNEL
#define MAX_BIO_SHARED_TYPES 128
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#define EWALD_F (numtyp)1.12837917
#define EWALD_P (numtyp)0.3275911
#define A1 (numtyp)0.254829592
#define A2 (numtyp)-0.284496736
#define A3 (numtyp)1.421413741
#define A4 (numtyp)-1.453152027
#define A5 (numtyp)1.061405429
#ifdef NV_KERNEL
#include "nv_kernel_def.h"
@ -94,6 +60,40 @@ __inline float fetch_q(const int& i, const float *q)
#endif
#define MAX_BIO_SHARED_TYPES 128
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#define EWALD_F (numtyp)1.12837917
#define EWALD_P (numtyp)0.3275911
#define A1 (numtyp)0.254829592
#define A2 (numtyp)-0.284496736
#define A3 (numtyp)1.421413741
#define A4 (numtyp)-1.453152027
#define A5 (numtyp)1.061405429
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }

View File

@ -18,16 +18,6 @@
#ifndef PAIR_GPU_KERNEL_H
#define PAIR_GPU_KERNEL_H
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#else
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#endif
#ifdef NV_KERNEL
#include "nv_kernel_def.h"
@ -44,6 +34,16 @@
#endif
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#else
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#endif
// ---------------------------------------------------------------------------
// Unpack neighbors from dev_ij array into dev_nbor matrix for coalesced access
// -- Only unpack neighbors matching the specified inclusive range of forms

View File

@ -33,6 +33,14 @@
#define MEM_THREADS 32
#endif
#ifdef CUDA_PRE_THREE
struct __builtin_align__(16) _double4
{
double x, y, z, w;
};
typedef struct _double4 double4;
#endif
#define GLOBAL_ID_X threadIdx.x+mul24(blockIdx.x,blockDim.x)
#define GLOBAL_ID_Y threadIdx.y+mul24(blockIdx.y,blockDim.y)
#define GLOBAL_SIZE_X mul24(gridDim.x,blockDim.x);

View File

@ -18,30 +18,6 @@
#ifndef LJ96_GPU_KERNEL
#define LJ96_GPU_KERNEL
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#ifdef NV_KERNEL
#include "nv_kernel_def.h"
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
#endif
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }

View File

@ -18,30 +18,6 @@
#ifndef LJ_GPU_KERNEL
#define LJ_GPU_KERNEL
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#ifdef NV_KERNEL
#include "nv_kernel_def.h"
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
#endif
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }

View File

@ -18,30 +18,6 @@
#ifndef LJE_GPU_KERNEL
#define LJE_GPU_KERNEL
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#ifdef NV_KERNEL
#include "nv_kernel_def.h"
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
#endif
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }

View File

@ -18,30 +18,6 @@
#ifndef LJC_GPU_KERNEL
#define LJC_GPU_KERNEL
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#ifdef NV_KERNEL
#include "nv_kernel_def.h"
@ -85,6 +61,30 @@ __inline float fetch_q(const int& i, const float *q)
#endif
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }

View File

@ -18,38 +18,6 @@
#ifndef LJCL_GPU_KERNEL
#define LJCL_GPU_KERNEL
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#define EWALD_F (numtyp)1.12837917
#define EWALD_P (numtyp)0.3275911
#define A1 (numtyp)0.254829592
#define A2 (numtyp)-0.284496736
#define A3 (numtyp)1.421413741
#define A4 (numtyp)-1.453152027
#define A5 (numtyp)1.061405429
#ifdef NV_KERNEL
#include "nv_kernel_def.h"
@ -93,6 +61,38 @@ __inline float fetch_q(const int& i, const float *q)
#endif
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#define EWALD_F (numtyp)1.12837917
#define EWALD_P (numtyp)0.3275911
#define A1 (numtyp)0.254829592
#define A2 (numtyp)-0.284496736
#define A3 (numtyp)1.421413741
#define A4 (numtyp)-1.453152027
#define A5 (numtyp)1.061405429
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }

View File

@ -18,30 +18,6 @@
#ifndef MORSE_GPU_KERNEL
#define MORSE_GPU_KERNEL
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#ifdef NV_KERNEL
#include "nv_kernel_def.h"
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
#endif
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }

View File

@ -15,6 +15,13 @@
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
#ifdef NV_KERNEL
#include "geryon/ucl_nv_kernel.h"
#else
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#define GLOBAL_ID_X get_global_id(0)
#endif
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp4 double4
@ -23,13 +30,6 @@
#define numtyp4 float4
#endif
#ifdef NV_KERNEL
#include "geryon/ucl_nv_kernel.h"
#else
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#define GLOBAL_ID_X get_global_id(0)
#endif
__kernel void kernel_cast_x(__global numtyp4 *x_type, __global double *x,
__global int *type, const int nall) {
int ii=GLOBAL_ID_X;

View File

@ -549,8 +549,9 @@ int PairGPUDeviceT::compile_kernels() {
k_info.run(&d_gpu_lib_data.begin());
ucl_copy(h_gpu_lib_data,d_gpu_lib_data,false);
_ptx_arch=static_cast<double>(h_gpu_lib_data[0])/100.0;
#ifndef USE_OPENCL
if (static_cast<double>(h_gpu_lib_data[0])/100.0>gpu->arch())
if (_ptx_arch>gpu->arch())
return -4;
#endif

View File

@ -226,6 +226,8 @@ class PairGPUDevice {
inline int block_bio_pair() const { return _block_bio_pair; }
/// Return the maximum number of atom types for shared mem with "bio" styles
inline int max_bio_shared_types() const { return _max_bio_shared_types; }
/// Architecture gpu code compiled for (returns 0 for OpenCL)
inline double ptx_arch() const { return _ptx_arch; }
// -------------------- SHARED DEVICE ROUTINES --------------------
// Perform asynchronous zero of integer array
@ -281,6 +283,7 @@ class PairGPUDevice {
int _gpu_mode, _first_device, _last_device, _nthreads;
double _particle_split;
double _cpu_full;
double _ptx_arch;
int _num_mem_threads, _warp_size, _threads_per_atom, _threads_per_charge;
int _pppm_max_spline, _pppm_block;

View File

@ -18,27 +18,6 @@
#ifndef PPPM_GPU_KERNEL
#define PPPM_GPU_KERNEL
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#ifdef NV_KERNEL
#include "geryon/ucl_nv_kernel.h"
@ -67,6 +46,12 @@ __inline float fetch_q(const int& i, const float *q)
#endif
// Allow PPPM to compile without atomics for NVIDIA 1.0 cards, error
// generated at runtime with use of pppm/gpu
#if (__CUDA_ARCH__ < 110)
#define atom_add(x,y) 0
#endif
#else
#pragma OPENCL EXTENSION cl_khr_fp64: enable
@ -85,6 +70,27 @@ __inline float fetch_q(const int& i, const float *q)
#endif
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
// Maximum order for spline
#define PPPM_MAX_SPLINE 8
// Thread block size for PPPM kernels

View File

@ -66,7 +66,11 @@ grdtyp * PPPMGPUMemoryT::init(const int nlocal, const int nall, FILE *_screen,
flag=-5;
return 0;
}
if (device->ptx_arch()>0.0 && device->ptx_arch()<1.1) {
flag=-4;
return 0;
}
ucl_device=device->gpu;
atom=&device->atom;