lammps/lib/cuda/cuda_common.h

345 lines
12 KiB
C

/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
Original Version:
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
-----------------------------------------------------------------------
USER-CUDA Package and associated modifications:
https://sourceforge.net/projects/lammpscuda/
Christian Trott, christian.trott@tu-ilmenau.de
Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
Theoretical Physics II, University of Technology Ilmenau, Germany
See the README file in the USER-CUDA directory.
This software is distributed under the GNU General Public License.
------------------------------------------------------------------------- */
#ifndef _CUDA_COMMON_H_
#define _CUDA_COMMON_H_
//#include "cutil.h"
#include "cuda_precision.h"
#include "cuda_wrapper_cu.h"
#define CUDA_MAX_TYPES_PLUS_ONE 12 //for pair styles which use constant space for parameters, this needs to be one larger than the number of atom types
//this can not be arbitrarly large, since constant space is limited.
//in principle one could alter potentials to use global memory for parameters, some du that already since the first examples I encountered had a high number (20+) of atom types
//Christian
#define CUDA_MAX_TYPES2 (CUDA_MAX_TYPES_PLUS_ONE * CUDA_MAX_TYPES_PLUS_ONE)
#define CUDA_MAX_NSPECIAL 25
// define some easy-to-use debug and emulation macros
#ifdef _DEBUG
#define MYDBG(a) a
#else
#define MYDBG(a)
#endif
#if __DEVICE_EMULATION__
#define MYEMU(a) a
#else
#define MYEMU(a)
#endif
#define MYEMUDBG(a) MYEMU(MYDBG(a))
// Add Prefix (needed as workaround, same constant's names in different files causes conflict)
#define MY_ADD_PREFIX(prefix, var) prefix##_##var
#define MY_ADD_PREFIX2(prefix, var) MY_ADD_PREFIX(prefix, var)
#define MY_AP(var) MY_ADD_PREFIX2(MY_PREFIX, var)
#define MY_VAR_TO_STR(var) #var
#define MY_VAR_TO_STR2(var) MY_VAR_TO_STR(var)
//#define &MY_AP(var) (MY_VAR_TO_STR2(MY_PREFIX) "_" MY_VAR_TO_STR2(var))
//#define &MY_AP(var) &(MY_AP(var))
#define CUDA_USE_TEXTURE
#define CUDA_USE_FLOAT4
//constants used by many classes
//domain
#define _boxhi MY_AP(boxhi)
#define _boxlo MY_AP(boxlo)
#define _subhi MY_AP(subhi)
#define _sublo MY_AP(sublo)
#define _box_size MY_AP(box_size)
#define _prd MY_AP(prd)
#define _periodicity MY_AP(periodicity)
#define _triclinic MY_AP(triclinic)
#define _boxhi_lamda MY_AP(boxhi_lamda)
#define _boxlo_lamda MY_AP(boxlo_lamda)
#define _prd_lamda MY_AP(prd_lamda)
#define _h MY_AP(h)
#define _h_inv MY_AP(h_inv)
#define _h_rate MY_AP(h_rate)
__device__ __constant__ X_FLOAT _boxhi[3];
__device__ __constant__ X_FLOAT _boxlo[3];
__device__ __constant__ X_FLOAT _subhi[3];
__device__ __constant__ X_FLOAT _sublo[3];
__device__ __constant__ X_FLOAT _box_size[3];
__device__ __constant__ X_FLOAT _prd[3];
__device__ __constant__ int _periodicity[3];
__device__ __constant__ int _triclinic;
__device__ __constant__ X_FLOAT _boxhi_lamda[3];
__device__ __constant__ X_FLOAT _boxlo_lamda[3];
__device__ __constant__ X_FLOAT _prd_lamda[3];
__device__ __constant__ X_FLOAT _h[6];
__device__ __constant__ X_FLOAT _h_inv[6];
__device__ __constant__ V_FLOAT _h_rate[6];
//atom properties
#define _x MY_AP(x)
#define _v MY_AP(v)
#define _f MY_AP(f)
#define _tag MY_AP(tag)
#define _type MY_AP(type)
#define _mask MY_AP(mask)
#define _image MY_AP(image)
#define _q MY_AP(q)
#define _mass MY_AP(mass)
#define _rmass MY_AP(rmass)
#define _rmass_flag MY_AP(rmass_flag)
#define _eatom MY_AP(eatom)
#define _vatom MY_AP(vatom)
#define _x_type MY_AP(x_type)
#define _radius MY_AP(radius)
#define _density MY_AP(density)
#define _omega MY_AP(omega)
#define _torque MY_AP(torque)
#define _special MY_AP(special)
#define _maxspecial MY_AP(maxspecial)
#define _nspecial MY_AP(nspecial)
#define _special_flag MY_AP(special_flag)
#define _molecule MY_AP(molecule)
#define _v_radius MY_AP(v_radius)
#define _omega_rmass MY_AP(omega_rmass)
#define _freeze_group_bit MY_AP(freeze_group_bit)
#define _map_array MY_AP(map_array)
__device__ __constant__ X_FLOAT* _x; //holds pointer to positions
__device__ __constant__ V_FLOAT* _v;
__device__ __constant__ F_FLOAT* _f;
__device__ __constant__ int* _tag;
__device__ __constant__ int* _type;
__device__ __constant__ int* _mask;
__device__ __constant__ int* _image;
__device__ __constant__ V_FLOAT* _mass;
__device__ __constant__ F_FLOAT* _q;
__device__ __constant__ V_FLOAT* _rmass;
__device__ __constant__ int _rmass_flag;
__device__ __constant__ ENERGY_FLOAT* _eatom;
__device__ __constant__ ENERGY_FLOAT* _vatom;
__device__ __constant__ X_FLOAT4* _x_type; //holds pointer to positions
__device__ __constant__ X_FLOAT* _radius;
__device__ __constant__ F_FLOAT* _density;
__device__ __constant__ V_FLOAT* _omega;
__device__ __constant__ F_FLOAT* _torque;
__device__ __constant__ int* _special;
__device__ __constant__ int _maxspecial;
__device__ __constant__ int* _nspecial;
__device__ __constant__ int _special_flag[4];
__device__ __constant__ int* _molecule;
__device__ __constant__ V_FLOAT4* _v_radius; //holds pointer to positions
__device__ __constant__ V_FLOAT4* _omega_rmass; //holds pointer to positions
__device__ __constant__ int _freeze_group_bit;
__device__ __constant__ int* _map_array;
#ifdef CUDA_USE_TEXTURE
#define _x_tex MY_AP(x_tex)
#if X_PRECISION == 1
texture<float> _x_tex;
#else
texture<int2, 1> _x_tex;
#endif
#define _type_tex MY_AP(type_tex)
texture<int> _type_tex;
#define _x_type_tex MY_AP(x_type_tex)
#if X_PRECISION == 1
texture<float4, 1> _x_type_tex;
#else
texture<int4, 1> _x_type_tex;
#endif
#define _v_radius_tex MY_AP(v_radius_tex)
#if V_PRECISION == 1
texture<float4, 1> _v_radius_tex;
#else
texture<int4, 1> _v_radius_tex;
#endif
#define _omega_rmass_tex MY_AP(omega_rmass_tex)
#if V_PRECISION == 1
texture<float4, 1> _omega_rmass_tex;
#else
texture<int4, 1> _omega_rmass_tex;
#endif
#define _q_tex MY_AP(q_tex)
#if F_PRECISION == 1
texture<float> _q_tex;
#else
texture<int2, 1> _q_tex;
#endif
#endif
//neighbor
#ifdef IncludeCommonNeigh
#define _inum MY_AP(inum)
#define _inum_border MY_AP(inum_border)
#define _ilist MY_AP(ilist)
#define _ilist_border MY_AP(ilist_border)
#define _numneigh MY_AP(numneigh)
#define _numneigh_border MY_AP(numneigh_border)
#define _numneigh_inner MY_AP(numneigh_inner)
#define _firstneigh MY_AP(firstneigh)
#define _neighbors MY_AP(neighbors)
#define _neighbors_border MY_AP(neighbors_border)
#define _neighbors_inner MY_AP(neighbors_inner)
#define _reneigh_flag MY_AP(reneigh_flag)
#define _triggerneighsq MY_AP(triggerneighsq)
#define _xhold MY_AP(xhold)
#define _maxhold MY_AP(maxhold)
#define _dist_check MY_AP(dist_check)
#define _neighbor_maxlocal MY_AP(neighbor_maxlocal)
#define _maxneighbors MY_AP(maxneighbors)
#define _overlap_comm MY_AP(overlap_comm)
__device__ __constant__ int _inum;
__device__ __constant__ int* _inum_border;
__device__ __constant__ int* _ilist;
__device__ __constant__ int* _ilist_border;
__device__ __constant__ int* _numneigh;
__device__ __constant__ int* _numneigh_border;
__device__ __constant__ int* _numneigh_inner;
__device__ __constant__ int** _firstneigh;
__device__ __constant__ int* _neighbors;
__device__ __constant__ int* _neighbors_border;
__device__ __constant__ int* _neighbors_inner;
__device__ __constant__ int* _reneigh_flag;
__device__ __constant__ X_FLOAT _triggerneighsq;
__device__ __constant__ X_FLOAT* _xhold; //holds pointer to positions
__device__ __constant__ int _maxhold;
__device__ __constant__ int _dist_check;
__device__ __constant__ int _neighbor_maxlocal;
__device__ __constant__ int _maxneighbors;
__device__ __constant__ int _overlap_comm;
#endif
//system properties
#define _nall MY_AP(nall)
#define _nghost MY_AP(nghost)
#define _nlocal MY_AP(nlocal)
#define _nmax MY_AP(nmax)
#define _cuda_ntypes MY_AP(cuda_ntypes)
#define _dtf MY_AP(dtf)
#define _dtv MY_AP(dtv)
#define _factor MY_AP(factor)
#define _virial MY_AP(virial)
#define _eng_vdwl MY_AP(eng_vdwl)
#define _eng_coul MY_AP(eng_coul)
#define _molecular MY_AP(molecular)
__device__ __constant__ unsigned _nall;
__device__ __constant__ unsigned _nghost;
__device__ __constant__ unsigned _nlocal;
__device__ __constant__ unsigned _nmax;
__device__ __constant__ unsigned _cuda_ntypes;
__device__ __constant__ V_FLOAT _dtf;
__device__ __constant__ X_FLOAT _dtv;
__device__ __constant__ V_FLOAT _factor;
__device__ __constant__ ENERGY_FLOAT* _virial;
__device__ __constant__ ENERGY_FLOAT* _eng_vdwl;
__device__ __constant__ ENERGY_FLOAT* _eng_coul;
__device__ __constant__ int _molecular;
//other general constants
#define _buffer MY_AP(buffer)
#define _flag MY_AP(flag)
#define _debugdata MY_AP(debugdata)
__device__ __constant__ void* _buffer;
__device__ __constant__ int* _flag;
__device__ __constant__ int* _debugdata;
// pointers to data fields on GPU are hold in constant space
// -> reduces register usage and number of parameters for kernelcalls
// will be variables of file scope in cuda files
// maybe used to output cudaError_t
#define MY_OUTPUT_RESULT(result) \
switch(result) \
{ \
case cudaSuccess: printf(" => cudaSuccess\n"); break; \
case cudaErrorInvalidValue: printf(" => cudaErrorInvalidValue\n"); break; \
case cudaErrorInvalidSymbol: printf(" => cudaErrorInvalidSymbol\n"); break; \
case cudaErrorInvalidDevicePointer: printf(" => cudaErrorInvalidDevicePointer\n"); break; \
case cudaErrorInvalidMemcpyDirection: printf(" => cudaErrorInvalidMemcpyDirection\n"); break; \
default: printf(" => unknown\n"); break; \
}
#ifdef _DEBUG
# define CUT_CHECK_ERROR(errorMessage) { \
cudaError_t err = cudaGetLastError(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
exit(EXIT_FAILURE); \
} \
err = cudaThreadSynchronize(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
exit(EXIT_FAILURE); \
} \
}
#else
# define CUT_CHECK_ERROR(errorMessage) { \
cudaError_t err = cudaGetLastError(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
exit(EXIT_FAILURE); \
} \
}
#endif
# define CUDA_SAFE_CALL_NO_SYNC( call) { \
cudaError err = call; \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \
} }
# define CUDA_SAFE_CALL( call) CUDA_SAFE_CALL_NO_SYNC(call);
#define X_MASK 1
#define V_MASK 2
#define F_MASK 4
#define TAG_MASK 8
#define TYPE_MASK 16
#define MASK_MASK 32
#define IMAGE_MASK 64
#define Q_MASK 128
#define MOLECULE_MASK 256
#define RMASS_MASK 512
#define RADIUS_MASK 1024
#define DENSITY_MASK 2048
#define OMEGA_MASK 4096
#define TORQUE_MASK 8192
#endif // #ifdef _CUDA_COMMON_H_