!45046 fix gpu pow issue

Merge pull request !45046 from chenweifeng/gpu-pow-integer
This commit is contained in:
i-robot 2022-11-03 08:55:20 +00:00 committed by Gitee
commit f0607ba90e
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
1 changed files with 27 additions and 0 deletions

View File

@ -169,6 +169,33 @@ struct PowerFunc<half2> {
}
};
#define POW_INTEGER_IMPL(T) \
template <> \
struct PowerFunc<T> { \
__device__ __host__ __forceinline__ T operator()(const T &lhs, const T &rhs) { \
T ret = 1; \
T base = lhs; \
T exp = rhs; \
while (exp) { \
if (exp & 1) { \
ret *= base; \
} \
base *= base; \
exp /= 2; \
} \
return ret; \
} \
};
POW_INTEGER_IMPL(int8_t)
POW_INTEGER_IMPL(int16_t)
POW_INTEGER_IMPL(int32_t)
POW_INTEGER_IMPL(int64_t)
POW_INTEGER_IMPL(uint8_t)
POW_INTEGER_IMPL(uint16_t)
POW_INTEGER_IMPL(uint32_t)
POW_INTEGER_IMPL(uint64_t)
template <typename T>
struct RealDivFunc {
__device__ __host__ __forceinline__ T operator()(const T &lhs, const T &rhs) { return (lhs / rhs); }