!45046 fix gpu pow issue

Merge pull request !45046 from chenweifeng/gpu-pow-integer
2022-11-03 08:55:20 +00:00 · 2022-11-03 08:55:20 +00:00 · f0607ba90e
parent 446bf7da80 7ae73813c6
commit f0607ba90e
1 changed files with 27 additions and 0 deletions
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/broadcast_impl.cu
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/broadcast_impl.cu
@ -169,6 +169,33 @@ struct PowerFunc<half2> {
  }
 };

+#define POW_INTEGER_IMPL(T)                                                         \
+  template <>                                                                       \
+  struct PowerFunc<T> {                                                             \
+    __device__ __host__ __forceinline__ T operator()(const T &lhs, const T &rhs) {  \
+      T ret = 1;                                                                    \
+      T base = lhs;                                                                 \
+      T exp = rhs;                                                                  \
+      while (exp) {                                                                 \
+        if (exp & 1) {                                                              \
+          ret *= base;                                                              \
+        }                                                                           \
+        base *= base;                                                               \
+        exp /= 2;                                                                   \
+      }                                                                             \
+      return ret;                                                                   \
+    }                                                                               \
+  };
+
+POW_INTEGER_IMPL(int8_t)
+POW_INTEGER_IMPL(int16_t)
+POW_INTEGER_IMPL(int32_t)
+POW_INTEGER_IMPL(int64_t)
+POW_INTEGER_IMPL(uint8_t)
+POW_INTEGER_IMPL(uint16_t)
+POW_INTEGER_IMPL(uint32_t)
+POW_INTEGER_IMPL(uint64_t)
+
 template <typename T>
 struct RealDivFunc {
  __device__ __host__ __forceinline__ T operator()(const T &lhs, const T &rhs) { return (lhs / rhs); }