Fix performance regression in KOKKOS package

This commit is contained in:
Stan Moore 2018-05-16 16:12:05 -06:00
parent 307e471456
commit 5a4a7cebc1
1 changed files with 14 additions and 0 deletions

View File

@ -70,6 +70,20 @@ __inline__ __device__
unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val )
{ return atomicSub((unsigned int*)dest,val); }
__inline__ __device__
unsigned int atomic_fetch_sub( volatile int64_t * const dest , const int64_t val )
{ return atomic_fetch_add(dest,-val); }
__inline__ __device__
unsigned int atomic_fetch_sub( volatile float * const dest , const float val )
{ return atomicAdd((float*)dest,-val); }
#if ( 600 <= __CUDA_ARCH__ )
__inline__ __device__
unsigned int atomic_fetch_sub( volatile double * const dest , const double val )
{ return atomicAdd((double*)dest,-val); }
#endif
template < typename T >
__inline__ __device__
T atomic_fetch_sub( volatile T * const dest ,