lammps/lib/gpu/cudpp_mini/sharedmem.h

167 lines
4.6 KiB
C++

// -------------------------------------------------------------
// cuDPP -- CUDA Data Parallel Primitives library
// -------------------------------------------------------------
// $Revision$
// $Date$
// -------------------------------------------------------------
// This source code is distributed under the terms of license.txt
// in the root directory of this source distribution.
// -------------------------------------------------------------
/**
* @file
* sharedmem.h
*
* @brief Shared memory declaration struct for templatized types.
*
* Because dynamically sized shared memory arrays are declared "extern" in CUDA,
* we can't templatize their types directly. To get around this, we declare a
* simple wrapper struct that will declare the extern array with a different
* name depending on the type. This avoids linker errors about multiple
* definitions.
*
* To use dynamically allocated shared memory in a templatized __global__ or
* __device__ function, just replace code like this:
*
* <pre>
* template<class T>
* __global__ void
* foo( T* d_out, T* d_in)
* {
* // Shared mem size is determined by the host app at run time
* extern __shared__ T sdata[];
* ...
* doStuff(sdata);
* ...
* }
* </pre>
*
* With this
* <pre>
* template<class T>
* __global__ void
* foo( T* d_out, T* d_in)
* {
* // Shared mem size is determined by the host app at run time
* SharedMemory<T> smem;
* T* sdata = smem.getPointer();
* ...
* doStuff(sdata);
* ...
* }
* </pre>
*/
#ifndef _SHAREDMEM_H_
#define _SHAREDMEM_H_
/** @brief Wrapper class for templatized dynamic shared memory arrays.
*
* This struct uses template specialization on the type \a T to declare
* a differently named dynamic shared memory array for each type
* (\code extern __shared__ T s_type[] \endcode).
*
* Currently there are specializations for the following types:
* \c int, \c uint, \c char, \c uchar, \c short, \c ushort, \c long,
* \c unsigned long, \c bool, \c float, and \c double. One can also specialize it
* for user defined types.
*/
template <typename T>
struct SharedMemory
{
/** Return a pointer to the runtime-sized shared memory array. **/
__device__ T* getPointer()
{
extern __device__ void Error_UnsupportedType(); // Ensure that we won't compile any un-specialized types
Error_UnsupportedType();
return (T*)0;
}
// TODO: Use operator overloading to make this class look like a regular array
};
// Following are the specializations for the following types.
// int, uint, char, uchar, short, ushort, long, ulong, bool, float, and double
// One could also specialize it for user-defined types.
template <>
struct SharedMemory <int>
{
__device__ int* getPointer() { extern __shared__ int s_int[]; return s_int; }
};
template <>
struct SharedMemory <unsigned int>
{
__device__ unsigned int* getPointer() { extern __shared__ unsigned int s_uint[]; return s_uint; }
};
template <>
struct SharedMemory <char>
{
__device__ char* getPointer() { extern __shared__ char s_char[]; return s_char; }
};
template <>
struct SharedMemory <unsigned char>
{
__device__ unsigned char* getPointer() { extern __shared__ unsigned char s_uchar[]; return s_uchar; }
};
template <>
struct SharedMemory <short>
{
__device__ short* getPointer() { extern __shared__ short s_short[]; return s_short; }
};
template <>
struct SharedMemory <unsigned short>
{
__device__ unsigned short* getPointer() { extern __shared__ unsigned short s_ushort[]; return s_ushort; }
};
template <>
struct SharedMemory <long>
{
__device__ long* getPointer() { extern __shared__ long s_long[]; return s_long; }
};
template <>
struct SharedMemory <unsigned long>
{
__device__ unsigned long* getPointer() { extern __shared__ unsigned long s_ulong[]; return s_ulong; }
};
template <>
struct SharedMemory <bool>
{
__device__ bool* getPointer() { extern __shared__ bool s_bool[]; return s_bool; }
};
template <>
struct SharedMemory <float>
{
__device__ float* getPointer() { extern __shared__ float s_float[]; return s_float; }
};
template <>
struct SharedMemory <double>
{
__device__ double* getPointer() { extern __shared__ double s_double[]; return s_double; }
};
template <>
struct SharedMemory <uchar4>
{
__device__ uchar4* getPointer() { extern __shared__ uchar4 s_uchar4[]; return s_uchar4; }
};
#endif //_SHAREDMEM_H_
// Leave this at the end of the file
// Local Variables:
// mode:c++
// c-file-style: "NVIDIA"
// End: