From 200bd1fae39f93eccde117db5a8bbf8c1ff39774 Mon Sep 17 00:00:00 2001 From: zong_shuai Date: Mon, 13 Sep 2021 10:49:23 +0800 Subject: [PATCH 1/8] precise int64 --- .../kernel_compiler/gpu/cuda_impl/broadcast_impl.cu | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu index e10f1e3df25..7f94ab36b2f 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu @@ -223,10 +223,9 @@ struct DivNoNanFunc { template struct FloorDivFunc { __device__ __host__ __forceinline__ T operator()(const T &lhs, const T &rhs) { - return floorf(static_cast(lhs) / static_cast(rhs)); + return floor(static_cast(lhs) / static_cast(rhs)); } }; - template <> struct FloorDivFunc { __device__ __host__ __forceinline__ half operator()(const half &lhs, const half &rhs) { @@ -251,8 +250,8 @@ struct ModFunc { T data_div = lhs / rhs; T data_div_min = data_div < 0.0 ? data_div : 0.0; T data_div_max = data_div > 0.0 ? data_div : 0.0; - T data_div_max_floor = floorf(data_div_max); - T data_div_min_ceil = ceilf(data_div_min); + T data_div_max_floor = static_cast(floor(static_cast(data_div_max))); + T data_div_min_ceil = static_cast(ceil(static_cast(data_div_min))); T data_div_res = data_div_max_floor + data_div_min_ceil; return lhs - data_div_res * rhs; } @@ -292,7 +291,7 @@ struct ModFunc { template struct FloorModFunc { __device__ __host__ __forceinline__ T operator()(const T &lhs, const T &rhs) { - T res = lhs - floorf(lhs / rhs) * rhs; + T res = lhs - static_cast(floor(static_cast(lhs) / static_cast(rhs))) * rhs; res = (std::abs(res) > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res; return res; } From c2bc0382dc90f21d08ffd93797ffddd4233d8a4e Mon Sep 17 00:00:00 2001 From: zong_shuai Date: Tue, 14 Sep 2021 17:48:36 +0800 Subject: [PATCH 2/8] int_64 --- .../gpu/cuda_impl/broadcast_impl.cu | 49 +++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu index 7f94ab36b2f..b76260c8297 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu @@ -223,9 +223,34 @@ struct DivNoNanFunc { template struct FloorDivFunc { __device__ __host__ __forceinline__ T operator()(const T &lhs, const T &rhs) { - return floor(static_cast(lhs) / static_cast(rhs)); + return floorf(static_cast(lhs) / static_cast(rhs)); } }; +template <> +struct FloorDivFunc { + __device__ __host__ __forceinline__ int64_t operator()(const int64_t &lhs, const int64_t &rhs) { + return floorl(static_cast(lhs) / static_cast(rhs)); + } +}; +template <> +struct FloorDivFunc { + __device__ __host__ __forceinline__ int32_t operator()(const int32_t &lhs, const int32_t &rhs) { + return floorl(static_cast(lhs) / static_cast(rhs)); + } +}; +template <> +struct FloorDivFunc { + __device__ __host__ __forceinline__ int64_t operator()(const uint64_t &lhs, const uint64_t &rhs) { + return floorl(static_cast(lhs) / static_cast(rhs)); + } +}; +template <> +struct FloorDivFunc { + __device__ __host__ __forceinline__ uint32_t operator()(const uint32_t &lhs, const uint32_t &rhs) { + return floorl(static_cast(lhs) / static_cast(rhs)); + } +}; + template <> struct FloorDivFunc { __device__ __host__ __forceinline__ half operator()(const half &lhs, const half &rhs) { @@ -326,10 +351,28 @@ struct FloorModFunc { // because of a 'more than one instance of overloaded function "std::abs"' // error. I realize the specializations are exactly the same, but I found // no good alternative. +template <> +struct FloorModFunc { + __device__ __host__ __forceinline__ int32_t operator()(const int32_t &lhs, const int32_t &rhs) { + int32_t res = lhs - floorl(static_cast(lhs) / static_cast(rhs)) * rhs; + res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res; + return res; + } +}; + +template <> +struct FloorModFunc { + __device__ __host__ __forceinline__ int64_t operator()(const int64_t &lhs, const int64_t &rhs) { + int64_t res = lhs - floorl(static_cast(lhs) / static_cast(rhs)) * rhs; + res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res; + return res; + } +}; + template <> struct FloorModFunc { __device__ __host__ __forceinline__ int32_t operator()(const int32_t &lhs, const int32_t &rhs) { - int32_t res = lhs - floorf(lhs / rhs) * rhs; + int32_t res = lhs - floorl(static_cast(lhs) / static_cast(rhs)) * rhs; res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res; return res; } @@ -338,7 +381,7 @@ struct FloorModFunc { template <> struct FloorModFunc { __device__ __host__ __forceinline__ int64_t operator()(const int64_t &lhs, const int64_t &rhs) { - int64_t res = lhs - floorf(lhs / rhs) * rhs; + int64_t res = lhs - floorl(static_cast(lhs) / static_cast(rhs)) * rhs; res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res; return res; } From 195da73f7158e71805b5475cedfac7b4f25718e8 Mon Sep 17 00:00:00 2001 From: zong_shuai Date: Tue, 14 Sep 2021 17:58:07 +0800 Subject: [PATCH 3/8] int64 --- .../gpu/cuda_impl/broadcast_impl.cu | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu index b76260c8297..b0ca8701e54 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu @@ -229,25 +229,25 @@ struct FloorDivFunc { template <> struct FloorDivFunc { __device__ __host__ __forceinline__ int64_t operator()(const int64_t &lhs, const int64_t &rhs) { - return floorl(static_cast(lhs) / static_cast(rhs)); + return floorl(static_cast(lhs) / static_cast(rhs)); } }; template <> struct FloorDivFunc { __device__ __host__ __forceinline__ int32_t operator()(const int32_t &lhs, const int32_t &rhs) { - return floorl(static_cast(lhs) / static_cast(rhs)); + return floorl(static_cast(lhs) / static_cast(rhs)); } }; template <> struct FloorDivFunc { __device__ __host__ __forceinline__ int64_t operator()(const uint64_t &lhs, const uint64_t &rhs) { - return floorl(static_cast(lhs) / static_cast(rhs)); + return floorl(static_cast(lhs) / static_cast(rhs)); } }; template <> -struct FloorDivFunc { +struct FloorDivFunc { __device__ __host__ __forceinline__ uint32_t operator()(const uint32_t &lhs, const uint32_t &rhs) { - return floorl(static_cast(lhs) / static_cast(rhs)); + return floorl(static_cast(lhs) / static_cast(rhs)); } }; @@ -354,7 +354,7 @@ struct FloorModFunc { template <> struct FloorModFunc { __device__ __host__ __forceinline__ int32_t operator()(const int32_t &lhs, const int32_t &rhs) { - int32_t res = lhs - floorl(static_cast(lhs) / static_cast(rhs)) * rhs; + int32_t res = lhs - floorl(static_cast(lhs) / static_cast(rhs)) * rhs; res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res; return res; } @@ -363,7 +363,7 @@ struct FloorModFunc { template <> struct FloorModFunc { __device__ __host__ __forceinline__ int64_t operator()(const int64_t &lhs, const int64_t &rhs) { - int64_t res = lhs - floorl(static_cast(lhs) / static_cast(rhs)) * rhs; + int64_t res = lhs - floorl(static_cast(lhs) / static_cast(rhs)) * rhs; res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res; return res; } @@ -372,7 +372,7 @@ struct FloorModFunc { template <> struct FloorModFunc { __device__ __host__ __forceinline__ int32_t operator()(const int32_t &lhs, const int32_t &rhs) { - int32_t res = lhs - floorl(static_cast(lhs) / static_cast(rhs)) * rhs; + int32_t res = lhs - floorl(static_cast(lhs) / static_cast(rhs)) * rhs; res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res; return res; } @@ -381,7 +381,7 @@ struct FloorModFunc { template <> struct FloorModFunc { __device__ __host__ __forceinline__ int64_t operator()(const int64_t &lhs, const int64_t &rhs) { - int64_t res = lhs - floorl(static_cast(lhs) / static_cast(rhs)) * rhs; + int64_t res = lhs - floorl(static_cast(lhs) / static_cast(rhs)) * rhs; res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res; return res; } From b860b197fd53d719d906c0b6ae1ad65e4bf13b66 Mon Sep 17 00:00:00 2001 From: zong_shuai Date: Tue, 14 Sep 2021 18:01:01 +0800 Subject: [PATCH 4/8] int 64 --- .../gpu/cuda_impl/broadcast_impl.cu | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu index b0ca8701e54..42d03b7f15b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu @@ -229,25 +229,25 @@ struct FloorDivFunc { template <> struct FloorDivFunc { __device__ __host__ __forceinline__ int64_t operator()(const int64_t &lhs, const int64_t &rhs) { - return floorl(static_cast(lhs) / static_cast(rhs)); + return floor(static_cast(lhs) / static_cast(rhs)); } }; template <> struct FloorDivFunc { __device__ __host__ __forceinline__ int32_t operator()(const int32_t &lhs, const int32_t &rhs) { - return floorl(static_cast(lhs) / static_cast(rhs)); + return floor(static_cast(lhs) / static_cast(rhs)); } }; template <> struct FloorDivFunc { __device__ __host__ __forceinline__ int64_t operator()(const uint64_t &lhs, const uint64_t &rhs) { - return floorl(static_cast(lhs) / static_cast(rhs)); + return floor(static_cast(lhs) / static_cast(rhs)); } }; template <> struct FloorDivFunc { __device__ __host__ __forceinline__ uint32_t operator()(const uint32_t &lhs, const uint32_t &rhs) { - return floorl(static_cast(lhs) / static_cast(rhs)); + return floor(static_cast(lhs) / static_cast(rhs)); } }; @@ -354,7 +354,7 @@ struct FloorModFunc { template <> struct FloorModFunc { __device__ __host__ __forceinline__ int32_t operator()(const int32_t &lhs, const int32_t &rhs) { - int32_t res = lhs - floorl(static_cast(lhs) / static_cast(rhs)) * rhs; + int32_t res = lhs - floor(static_cast(lhs) / static_cast(rhs)) * rhs; res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res; return res; } @@ -363,7 +363,7 @@ struct FloorModFunc { template <> struct FloorModFunc { __device__ __host__ __forceinline__ int64_t operator()(const int64_t &lhs, const int64_t &rhs) { - int64_t res = lhs - floorl(static_cast(lhs) / static_cast(rhs)) * rhs; + int64_t res = lhs - floor(static_cast(lhs) / static_cast(rhs)) * rhs; res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res; return res; } @@ -372,7 +372,7 @@ struct FloorModFunc { template <> struct FloorModFunc { __device__ __host__ __forceinline__ int32_t operator()(const int32_t &lhs, const int32_t &rhs) { - int32_t res = lhs - floorl(static_cast(lhs) / static_cast(rhs)) * rhs; + int32_t res = lhs - floor(static_cast(lhs) / static_cast(rhs)) * rhs; res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res; return res; } @@ -381,7 +381,7 @@ struct FloorModFunc { template <> struct FloorModFunc { __device__ __host__ __forceinline__ int64_t operator()(const int64_t &lhs, const int64_t &rhs) { - int64_t res = lhs - floorl(static_cast(lhs) / static_cast(rhs)) * rhs; + int64_t res = lhs - floor(static_cast(lhs) / static_cast(rhs)) * rhs; res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res; return res; } From 20bf48310990158c49857b5e0ba397a8b77e4e58 Mon Sep 17 00:00:00 2001 From: zong_shuai Date: Tue, 14 Sep 2021 18:17:45 +0800 Subject: [PATCH 5/8] int64 --- .../backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu index 42d03b7f15b..46cefddfb56 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu @@ -275,8 +275,8 @@ struct ModFunc { T data_div = lhs / rhs; T data_div_min = data_div < 0.0 ? data_div : 0.0; T data_div_max = data_div > 0.0 ? data_div : 0.0; - T data_div_max_floor = static_cast(floor(static_cast(data_div_max))); - T data_div_min_ceil = static_cast(ceil(static_cast(data_div_min))); + T data_div_max_floor = floorf(data_div_max); + T data_div_min_ceil = ceilf(data_div_min); T data_div_res = data_div_max_floor + data_div_min_ceil; return lhs - data_div_res * rhs; } From 42c2d198602a818759604a369f3373d5b6afe3f8 Mon Sep 17 00:00:00 2001 From: zong_shuai Date: Tue, 14 Sep 2021 18:19:55 +0800 Subject: [PATCH 6/8] precise --- .../backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu index 46cefddfb56..65343d3fb7a 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu @@ -316,7 +316,7 @@ struct ModFunc { template struct FloorModFunc { __device__ __host__ __forceinline__ T operator()(const T &lhs, const T &rhs) { - T res = lhs - static_cast(floor(static_cast(lhs) / static_cast(rhs))) * rhs; + T res = lhs - floorf(lhs / rhs) * rhs; res = (std::abs(res) > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res; return res; } From a472484732f0913527030c51e9d815dc2f37e2b8 Mon Sep 17 00:00:00 2001 From: zong_shuai Date: Tue, 14 Sep 2021 18:33:10 +0800 Subject: [PATCH 7/8] int --- .../backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu index 65343d3fb7a..2e3e19f465d 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu @@ -371,7 +371,7 @@ struct FloorModFunc { template <> struct FloorModFunc { - __device__ __host__ __forceinline__ int32_t operator()(const int32_t &lhs, const int32_t &rhs) { + __device__ __host__ __forceinline__ int32_t operator()(const uint32_t &lhs, const uint32_t &rhs) { int32_t res = lhs - floor(static_cast(lhs) / static_cast(rhs)) * rhs; res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res; return res; From 890090436b4fbde839b729d237f1992b92edaa88 Mon Sep 17 00:00:00 2001 From: zong_shuai Date: Tue, 14 Sep 2021 18:38:30 +0800 Subject: [PATCH 8/8] int64_precise --- .../backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu index 2e3e19f465d..94e7c927bcd 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu @@ -380,7 +380,7 @@ struct FloorModFunc { template <> struct FloorModFunc { - __device__ __host__ __forceinline__ int64_t operator()(const int64_t &lhs, const int64_t &rhs) { + __device__ __host__ __forceinline__ int64_t operator()(const uint64_t &lhs, const uint64_t &rhs) { int64_t res = lhs - floor(static_cast(lhs) / static_cast(rhs)) * rhs; res = (res > 1e-9) && ((res < 0.0) != (rhs < 0.0)) ? res + rhs : res; return res;