!23349 FloorDiv/floorMod precision problem in big number of int64 int32

Merge pull request !23349 from zong_shuai/int64_precise
This commit is contained in:
i-robot 2021-09-15 06:15:17 +00:00 committed by Gitee
commit a8e4a6f228
1 changed files with 46 additions and 4 deletions

View File

@ -252,6 +252,30 @@ struct FloorDivFunc {
return floorf(static_cast<float>(lhs) / static_cast<float>(rhs));
}
};
template <>
struct FloorDivFunc<int64_t> {
__device__ __host__ __forceinline__ int64_t operator()(const int64_t &lhs, const int64_t &rhs) {
return floor(static_cast<double>(lhs) / static_cast<double>(rhs));
}
};
template <>
struct FloorDivFunc<int32_t> {
__device__ __host__ __forceinline__ int32_t operator()(const int32_t &lhs, const int32_t &rhs) {
return floor(static_cast<double>(lhs) / static_cast<double>(rhs));
}
};
template <>
struct FloorDivFunc<uint64_t> {
__device__ __host__ __forceinline__ int64_t operator()(const uint64_t &lhs, const uint64_t &rhs) {
return floor(static_cast<double>(lhs) / static_cast<double>(rhs));
}
};
template <>
struct FloorDivFunc<uint32_t> {
__device__ __host__ __forceinline__ uint32_t operator()(const uint32_t &lhs, const uint32_t &rhs) {
return floor(static_cast<double>(lhs) / static_cast<double>(rhs));
}
};
template <>
struct FloorDivFunc<half> {
@ -354,9 +378,27 @@ struct FloorModFunc<half2> {
// error. I realize the specializations are exactly the same, but I found
// no good alternative.
template <>
struct FloorModFunc<uint32_t> {
struct FloorModFunc<int32_t> {
__device__ __host__ __forceinline__ int32_t operator()(const int32_t &lhs, const int32_t &rhs) {
int32_t res = lhs - floorf(lhs / rhs) * rhs;
int32_t res = lhs - floor(static_cast<double>(lhs) / static_cast<double>(rhs)) * rhs;
res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res;
return res;
}
};
template <>
struct FloorModFunc<int64_t> {
__device__ __host__ __forceinline__ int64_t operator()(const int64_t &lhs, const int64_t &rhs) {
int64_t res = lhs - floor(static_cast<double>(lhs) / static_cast<double>(rhs)) * rhs;
res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res;
return res;
}
};
template <>
struct FloorModFunc<uint32_t> {
__device__ __host__ __forceinline__ int32_t operator()(const uint32_t &lhs, const uint32_t &rhs) {
int32_t res = lhs - floor(static_cast<double>(lhs) / static_cast<double>(rhs)) * rhs;
res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res;
return res;
}
@ -364,8 +406,8 @@ struct FloorModFunc<uint32_t> {
template <>
struct FloorModFunc<uint64_t> {
__device__ __host__ __forceinline__ int64_t operator()(const int64_t &lhs, const int64_t &rhs) {
int64_t res = lhs - floorf(lhs / rhs) * rhs;
__device__ __host__ __forceinline__ int64_t operator()(const uint64_t &lhs, const uint64_t &rhs) {
int64_t res = lhs - floor(static_cast<double>(lhs) / static_cast<double>(rhs)) * rhs;
res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res;
return res;
}