forked from lijiext/lammps
364 lines
9.0 KiB
C++
364 lines
9.0 KiB
C++
// -------------------------------------------------------------
|
|
// cuDPP -- CUDA Data Parallel Primitives library
|
|
// -------------------------------------------------------------
|
|
// $Revision$
|
|
// $Date$
|
|
// -------------------------------------------------------------
|
|
// This source code is distributed under the terms of license.txt in
|
|
// the root directory of this source distribution.
|
|
// -------------------------------------------------------------
|
|
|
|
/**
|
|
* @file
|
|
* cudpp_util.h
|
|
*
|
|
* @brief C++ utility functions and classes used internally to cuDPP
|
|
*/
|
|
|
|
#ifndef __CUDPP_UTIL_H__
|
|
#define __CUDPP_UTIL_H__
|
|
|
|
#ifdef WIN32
|
|
#include <windows.h>
|
|
#endif
|
|
|
|
#include <cuda.h>
|
|
#include <cudpp.h>
|
|
#include <limits.h>
|
|
#include <float.h>
|
|
|
|
#if (CUDA_VERSION >= 3000)
|
|
#define LAUNCH_BOUNDS(x) __launch_bounds__((x))
|
|
#define LAUNCH_BOUNDS_MINBLOCKs(x, y) __launch_bounds__((x),(y))
|
|
#else
|
|
#define LAUNCH_BOUNDS(x)
|
|
#define LAUNCH_BOUNDS_MINBLOCKS(x, y)
|
|
#endif
|
|
|
|
|
|
/** @brief Determine if \a n is a power of two.
|
|
* @param n Value to be checked to see if it is a power of two
|
|
* @returns True if \a n is a power of two, false otherwise
|
|
*/
|
|
inline bool
|
|
isPowerOfTwo(int n)
|
|
{
|
|
return ((n&(n-1))==0) ;
|
|
}
|
|
|
|
/** @brief Determine if an integer \a n is a multiple of an integer \a f.
|
|
* @param n Multiple
|
|
* @param f Factor
|
|
* @returns True if \a n is a multiple of \a f, false otherwise
|
|
*/
|
|
inline bool
|
|
isMultiple(int n, int f)
|
|
{
|
|
if (isPowerOfTwo(f))
|
|
return ((n&(f-1))==0);
|
|
else
|
|
return (n%f==0);
|
|
}
|
|
|
|
/** @brief Compute the smallest power of two larger than \a n.
|
|
* @param n Input value
|
|
* @returns The smallest power f two larger than \a n
|
|
*/
|
|
inline int
|
|
ceilPow2(int n)
|
|
{
|
|
double log2n = log2((double)n);
|
|
if (isPowerOfTwo(n))
|
|
return n;
|
|
else
|
|
return 1 << (int)ceil(log2n);
|
|
}
|
|
|
|
/** @brief Compute the largest power of two smaller than \a n.
|
|
* @param n Input value
|
|
* @returns The largest power of two smaller than \a n.
|
|
*/
|
|
inline int
|
|
floorPow2(int n)
|
|
{
|
|
#ifdef WIN32
|
|
// method 2
|
|
return 1 << (int)_logb((float)n);
|
|
#else
|
|
// method 3
|
|
int exp;
|
|
frexp((float)n, &exp);
|
|
return 1 << (exp - 1);
|
|
#endif
|
|
}
|
|
|
|
/** @brief Returns the maximum value for type \a T.
|
|
*
|
|
* Implemented using template specialization on \a T.
|
|
*/
|
|
template <class T>
|
|
__host__ __device__ inline T getMax() { return 0; }
|
|
/** @brief Returns the minimum value for type \a T.
|
|
*
|
|
* Implemented using template specialization on \a T.
|
|
*/
|
|
template <class T>
|
|
__host__ __device__ inline T getMin() { return 0; }
|
|
// type specializations for the above
|
|
// getMax
|
|
template <> __host__ __device__ inline int getMax() { return INT_MAX; }
|
|
template <> __host__ __device__ inline unsigned int getMax() { return INT_MAX; }
|
|
template <> __host__ __device__ inline float getMax() { return FLT_MAX; }
|
|
template <> __host__ __device__ inline char getMax() { return (char)INT_MAX; }
|
|
template <> __host__ __device__ inline unsigned char getMax() { return (unsigned char)INT_MAX; }
|
|
// getMin
|
|
template <> __host__ __device__ inline int getMin() { return INT_MIN; }
|
|
template <> __host__ __device__ inline unsigned int getMin() { return 0; }
|
|
template <> __host__ __device__ inline float getMin() { return -FLT_MAX; }
|
|
template <> __host__ __device__ inline char getMin() { return (char)INT_MIN; }
|
|
template <> __host__ __device__ inline unsigned char getMin() { return (unsigned char)0; }
|
|
|
|
/** @brief Returns the maximum of three values.
|
|
* @param a First value.
|
|
* @param b Second value.
|
|
* @param c Third value.
|
|
* @returns The maximum of \a a, \a b and \a c.
|
|
*/
|
|
template<class T>
|
|
inline int max3(T a, T b, T c)
|
|
{
|
|
return (a > b) ? ((a > c)? a : c) : ((b > c) ? b : c);
|
|
}
|
|
|
|
/** @brief Utility template struct for generating small vector types from scalar types
|
|
*
|
|
* Given a base scalar type (\c int, \c float, etc.) and a vector length (1 through 4) as
|
|
* template parameters, this struct defines a vector type (\c float3, \c int4, etc.) of the
|
|
* specified length and base type. For example:
|
|
* \code
|
|
* template <class T>
|
|
* __device__ void myKernel(T *data)
|
|
* {
|
|
* typeToVector<T,4>::Result myVec4; // create a vec4 of type T
|
|
* myVec4 = (typeToVector<T,4>::Result*)data[0]; // load first element of data as a vec4
|
|
* }
|
|
* \endcode
|
|
*
|
|
* This functionality is implemented using template specialization. Currently specializations
|
|
* for int, float, and unsigned int vectors of lengths 2-4 are defined. Note that this results
|
|
* in types being generated at compile time -- there is no runtime cost. typeToVector is used by
|
|
* the optimized scan \c __device__ functions in scan_cta.cu.
|
|
*/
|
|
template <typename T, int N>
|
|
struct typeToVector
|
|
{
|
|
typedef T Result;
|
|
};
|
|
|
|
template<>
|
|
struct typeToVector<int, 4>
|
|
{
|
|
typedef int4 Result;
|
|
};
|
|
template<>
|
|
struct typeToVector<unsigned int, 4>
|
|
{
|
|
typedef uint4 Result;
|
|
};
|
|
template<>
|
|
struct typeToVector<float, 4>
|
|
{
|
|
typedef float4 Result;
|
|
};
|
|
template<>
|
|
struct typeToVector<int, 3>
|
|
{
|
|
typedef int3 Result;
|
|
};
|
|
template<>
|
|
struct typeToVector<unsigned int, 3>
|
|
{
|
|
typedef uint3 Result;
|
|
};
|
|
template<>
|
|
struct typeToVector<float, 3>
|
|
{
|
|
typedef float3 Result;
|
|
};
|
|
template<>
|
|
struct typeToVector<int, 2>
|
|
{
|
|
typedef int2 Result;
|
|
};
|
|
template<>
|
|
struct typeToVector<unsigned int, 2>
|
|
{
|
|
typedef uint2 Result;
|
|
};
|
|
template<>
|
|
struct typeToVector<float, 2>
|
|
{
|
|
typedef float2 Result;
|
|
};
|
|
|
|
/** @brief Templatized operator class used by scan and segmented scan
|
|
*
|
|
* This Operator class is used to allow generic support of binary
|
|
* associative operators in scan. It defines two member functions,
|
|
* op() and identity(), that are used in place of + and 0 (for
|
|
* example) in the scan and segmented scan code. Because this is
|
|
* template code, all decisions in the code are made at compile
|
|
* time, resulting in optimal operator code. Currently the operators
|
|
* CUDPP_ADD, CUDPP_MULTIPLY, CUDPP_MIN, and CUDPP_MAX are supported.
|
|
* Operator is implemented using template specialization for the
|
|
* types \c int, \c unsigned int, and \c float.
|
|
*/
|
|
template <typename T, CUDPPOperator oper>
|
|
class Operator
|
|
{
|
|
public:
|
|
/** Applies the operator to operands \a a and \a b.
|
|
* @param a First operand
|
|
* @param b Second operand
|
|
* @returns a OP b, where OP is defined by ::CUDPPOperator \a oper.
|
|
*/
|
|
static __device__ T op(const T a, const T b)
|
|
{
|
|
switch (oper)
|
|
{
|
|
case CUDPP_ADD:
|
|
return a + b;
|
|
case CUDPP_MULTIPLY:
|
|
return a * b;
|
|
case CUDPP_MIN:
|
|
return min(a, b);
|
|
case CUDPP_MAX:
|
|
return max(a, b);
|
|
}
|
|
}
|
|
|
|
/** Returns the identity element defined for type \a T */
|
|
static __device__ T identity() { return 0; }
|
|
};
|
|
|
|
// specializations for different types
|
|
template <CUDPPOperator oper>
|
|
class Operator <int, oper>
|
|
{
|
|
public:
|
|
static __device__ int op(const int a, const int b)
|
|
{
|
|
switch (oper)
|
|
{
|
|
default:
|
|
case CUDPP_ADD:
|
|
return a + b;
|
|
case CUDPP_MULTIPLY:
|
|
return a * b;
|
|
case CUDPP_MIN:
|
|
return min(a, b);
|
|
case CUDPP_MAX:
|
|
return max(a, b);
|
|
}
|
|
}
|
|
|
|
static __device__ int identity()
|
|
{
|
|
switch (oper)
|
|
{
|
|
default:
|
|
case CUDPP_ADD:
|
|
return 0;
|
|
case CUDPP_MULTIPLY:
|
|
return 1;
|
|
case CUDPP_MIN:
|
|
return INT_MAX;
|
|
case CUDPP_MAX:
|
|
return INT_MIN;
|
|
}
|
|
}
|
|
};
|
|
|
|
template <CUDPPOperator oper>
|
|
class Operator <unsigned int, oper>
|
|
{
|
|
public:
|
|
static __device__ unsigned int op(const unsigned int a, const unsigned int b)
|
|
{
|
|
switch (oper)
|
|
{
|
|
default:
|
|
case CUDPP_ADD:
|
|
return a + b;
|
|
case CUDPP_MULTIPLY:
|
|
return a * b;
|
|
case CUDPP_MIN:
|
|
return min(a, b);
|
|
case CUDPP_MAX:
|
|
return max(a, b);
|
|
}
|
|
}
|
|
|
|
static __device__ unsigned int identity()
|
|
{
|
|
switch (oper)
|
|
{
|
|
default:
|
|
case CUDPP_ADD:
|
|
return 0;
|
|
case CUDPP_MULTIPLY:
|
|
return 1;
|
|
case CUDPP_MIN:
|
|
return UINT_MAX;
|
|
case CUDPP_MAX:
|
|
return 0;
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
template <CUDPPOperator oper>
|
|
class Operator <float, oper>
|
|
{
|
|
public:
|
|
static __device__ float op(const float a, const float b)
|
|
{
|
|
switch (oper)
|
|
{
|
|
default:
|
|
case CUDPP_ADD:
|
|
return a + b;
|
|
case CUDPP_MULTIPLY:
|
|
return a * b;
|
|
case CUDPP_MIN:
|
|
return min(a, b);
|
|
case CUDPP_MAX:
|
|
return max(a, b);
|
|
}
|
|
}
|
|
|
|
static __device__ float identity()
|
|
{
|
|
switch (oper)
|
|
{
|
|
default:
|
|
case CUDPP_ADD:
|
|
return 0.0f;
|
|
case CUDPP_MULTIPLY:
|
|
return 1.0f;
|
|
case CUDPP_MIN:
|
|
return FLT_MAX;
|
|
case CUDPP_MAX:
|
|
return -FLT_MAX;
|
|
}
|
|
}
|
|
};
|
|
|
|
#endif // __CUDPP_UTIL_H__
|
|
|
|
// Leave this at the end of the file
|
|
// Local Variables:
|
|
// mode:c++
|
|
// c-file-style: "NVIDIA"
|
|
// End:
|