add gpu mirror_pad data type

2022-11-21 21:31:25 +08:00 · 2022-11-21 21:31:25 +08:00 · 4c0b6141a7
parent e231e8d824
commit 4c0b6141a7
3 changed files with 114 additions and 42 deletions
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/mirror_pad_impl.cu
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/mirror_pad_impl.cu
@ -17,6 +17,7 @@
 #include <stdio.h>
 #include <stdint.h>
 #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/mirror_pad_impl.cuh"
+#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
 #include "include/cuda_fp16.h"

 // check for existence in current padded array on X and Y dims
@ -253,33 +254,44 @@ void CalMirrorPadGrad(const size_t dx_size, const size_t interim_dy_size, T *dy,
    mode, dx);
 }

-template CUDA_LIB_EXPORT void CalMirrorPad<float>(const size_t size, const float *input, const int old_batch,
-                                                  const int old_channel, const int old_height, const int old_width,
-                                                  const int padded_height, const int padded_width, int padd_num,
-                                                  const int64_t *paddings, int mode, float *output,
-                                                  cudaStream_t cuda_stream);
-template CUDA_LIB_EXPORT void CalMirrorPad<half>(const size_t size, const half *input, const int old_batch,
-                                                 const int old_channel, const int old_height, const int old_width,
-                                                 const int padded_height, const int padded_width, int padd_num,
-                                                 const int64_t *paddings, int mode, half *output,
-                                                 cudaStream_t cuda_stream);
-template CUDA_LIB_EXPORT void CalMirrorPad<int>(const size_t size, const int *input, const int old_batch,
-                                                const int old_channel, const int old_height, const int old_width,
-                                                const int padded_height, const int padded_width, int padd_num,
-                                                const int64_t *paddings, int mode, int *output,
-                                                cudaStream_t cuda_stream);
-template CUDA_LIB_EXPORT void CalMirrorPadGrad<float>(const size_t dx_size, const size_t dy_size, float *dy,
-                                                      float *interim_dy, const int dx_batches, const int dx_channels,
-                                                      const int dx_height, const int dx_width, const int dy_height,
-                                                      const int dy_width, const int padd_dim, const int64_t *paddings,
-                                                      int mode, float *dx, cudaStream_t cuda_stream);
-template CUDA_LIB_EXPORT void CalMirrorPadGrad<half>(const size_t dx_size, const size_t dy_size, half *dy,
-                                                     half *interim_dy, const int dx_batches, const int dx_channels,
-                                                     const int dx_height, const int dx_width, const int dy_height,
-                                                     const int dy_width, const int padd_dim, const int64_t *paddings,
-                                                     int mode, half *dx, cudaStream_t cuda_stream);
-template CUDA_LIB_EXPORT void CalMirrorPadGrad<int>(const size_t dx_size, const size_t dy_size, int *dy,
-                                                    int *interim_dy, const int dx_batches, const int dx_channels,
-                                                    const int dx_height, const int dx_width, const int dy_height,
-                                                    const int dy_width, const int padd_dim, const int64_t *paddings,
-                                                    int mode, int *dx, cudaStream_t cuda_stream);
+#define REG_MIRROR_PAD_CUDA(type)                                                                                \
+  template CUDA_LIB_EXPORT void CalMirrorPad<type>(                                                              \
+    const size_t size, const type *input, const int old_batch, const int old_channel, const int old_height,      \
+    const int old_width, const int padded_height, const int padded_width, int padd_num, const int64_t *paddings, \
+    int mode, type *output, cudaStream_t cuda_stream)
+
+REG_MIRROR_PAD_CUDA(half);
+REG_MIRROR_PAD_CUDA(float);
+REG_MIRROR_PAD_CUDA(double);
+REG_MIRROR_PAD_CUDA(uint8_t);
+REG_MIRROR_PAD_CUDA(uint16_t);
+REG_MIRROR_PAD_CUDA(uint32_t);
+REG_MIRROR_PAD_CUDA(uint64_t);
+REG_MIRROR_PAD_CUDA(int8_t);
+REG_MIRROR_PAD_CUDA(int16_t);
+REG_MIRROR_PAD_CUDA(int32_t);
+REG_MIRROR_PAD_CUDA(int64_t);
+REG_MIRROR_PAD_CUDA(bool);
+REG_MIRROR_PAD_CUDA(Complex<float>);
+REG_MIRROR_PAD_CUDA(Complex<double>);
+
+#define REG_MIRROR_PAD_GRAD_CUDA(type)                                                                       \
+  template CUDA_LIB_EXPORT void CalMirrorPadGrad<type>(                                                      \
+    const size_t dx_size, const size_t dy_size, type *dy, type *interim_dy, const int dx_batches,            \
+    const int dx_channels, const int dx_height, const int dx_width, const int dy_height, const int dy_width, \
+    const int padd_dim, const int64_t *paddings, int mode, type *dx, cudaStream_t cuda_stream);
+
+REG_MIRROR_PAD_GRAD_CUDA(half);
+REG_MIRROR_PAD_GRAD_CUDA(float);
+REG_MIRROR_PAD_GRAD_CUDA(double);
+REG_MIRROR_PAD_GRAD_CUDA(uint8_t);
+REG_MIRROR_PAD_GRAD_CUDA(uint16_t);
+REG_MIRROR_PAD_GRAD_CUDA(uint32_t);
+REG_MIRROR_PAD_GRAD_CUDA(uint64_t);
+REG_MIRROR_PAD_GRAD_CUDA(int8_t);
+REG_MIRROR_PAD_GRAD_CUDA(int16_t);
+REG_MIRROR_PAD_GRAD_CUDA(int32_t);
+REG_MIRROR_PAD_GRAD_CUDA(int64_t);
+REG_MIRROR_PAD_GRAD_CUDA(bool);
+REG_MIRROR_PAD_GRAD_CUDA(Complex<float>);
+REG_MIRROR_PAD_GRAD_CUDA(Complex<double>);
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_gpu_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_gpu_kernel.cc
@ -15,6 +15,7 @@
 */

 #include "plugin/device/gpu/kernel/nn/mirror_pad_gpu_kernel.h"
+#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
 #include "ops/mirror_pad.h"

 namespace mindspore {
@ -102,16 +103,45 @@ int MirrorPadGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const st
  return static_cast<int>(KRET_OK);
 }

+#define REG_MIRROR_PAD_GPU_KERNEL(TypeId1, TypeId2, Type1)                           \
+  {                                                                                  \
+    KernelAttr().AddInputAttr(TypeId1).AddInputAttr(TypeId2).AddOutputAttr(TypeId1), \
+      &MirrorPadGpuKernelMod::LaunchKernel<Type1>                                    \
+  }
+
 using KernelRunFunc = MirrorPadGpuKernelMod::KernelRunFunc;
 // int the python api description, input data type is number but CalExtractImagePatchesNHWC only support four type.
 const std::vector<std::pair<KernelAttr, KernelRunFunc>> &MirrorPadGpuKernelMod::GetFuncList() const {
  static const std::vector<std::pair<KernelAttr, KernelRunFunc>> func_list = {
-    {KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32),
-     &MirrorPadGpuKernelMod::LaunchKernel<float>},
-    {KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat16),
-     &MirrorPadGpuKernelMod::LaunchKernel<half>},
-    {KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32),
-     &MirrorPadGpuKernelMod::LaunchKernel<int>},
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat64, kNumberTypeInt64, double),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat32, kNumberTypeInt64, float),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat16, kNumberTypeInt64, half),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt64, kNumberTypeInt64, int64_t),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt32, kNumberTypeInt64, int32_t),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt16, kNumberTypeInt64, int16_t),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt8, kNumberTypeInt64, int8_t),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt64, kNumberTypeInt64, uint64_t),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt32, kNumberTypeInt64, uint32_t),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt16, kNumberTypeInt64, uint16_t),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt8, kNumberTypeInt64, uint8_t),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeBool, kNumberTypeInt64, bool),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeComplex64, kNumberTypeInt64, utils::Complex<float>),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeComplex128, kNumberTypeInt64, utils::Complex<double>),
+
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat64, kNumberTypeInt32, double),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat32, kNumberTypeInt32, float),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat16, kNumberTypeInt32, half),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt64, kNumberTypeInt32, int64_t),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt32, kNumberTypeInt32, int32_t),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt16, kNumberTypeInt32, int16_t),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt8, kNumberTypeInt32, int8_t),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt64, kNumberTypeInt32, uint64_t),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt32, kNumberTypeInt32, uint32_t),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt16, kNumberTypeInt32, uint16_t),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt8, kNumberTypeInt32, uint8_t),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeBool, kNumberTypeInt32, bool),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeComplex64, kNumberTypeInt32, utils::Complex<float>),
+    REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeComplex128, kNumberTypeInt32, utils::Complex<double>),
  };
  return func_list;
 }
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_grad_gpu_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_grad_gpu_kernel.cc
@ -15,6 +15,7 @@
 */

 #include "plugin/device/gpu/kernel/nn/mirror_pad_grad_gpu_kernel.h"
+#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"

 namespace mindspore {
 namespace kernel {
@ -171,14 +172,43 @@ int MirrorPadGradGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, cons
  return static_cast<int>(ret);
 }

+#define REG_MIRROR_PAD_GRAD_GPU_KERNEL(TypeId1, TypeId2, Type1)                      \
+  {                                                                                  \
+    KernelAttr().AddInputAttr(TypeId1).AddInputAttr(TypeId2).AddOutputAttr(TypeId1), \
+      &MirrorPadGradGpuKernelMod::LaunchKernel<Type1>                                \
+  }
+
 std::vector<std::pair<KernelAttr, MirrorPadGradGpuKernelMod::MirrorPadGradLaunchFunc>>
  MirrorPadGradGpuKernelMod::func_list_ = {
-    {KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32),
-     &MirrorPadGradGpuKernelMod::LaunchKernel<float>},
-    {KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat16),
-     &MirrorPadGradGpuKernelMod::LaunchKernel<half>},
-    {KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32),
-     &MirrorPadGradGpuKernelMod::LaunchKernel<int>},
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat64, kNumberTypeInt64, double),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat32, kNumberTypeInt64, float),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat16, kNumberTypeInt64, half),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt64, kNumberTypeInt64, int64_t),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt32, kNumberTypeInt64, int32_t),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt16, kNumberTypeInt64, int16_t),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt8, kNumberTypeInt64, int8_t),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt64, kNumberTypeInt64, uint64_t),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt32, kNumberTypeInt64, uint32_t),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt16, kNumberTypeInt64, uint16_t),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt8, kNumberTypeInt64, uint8_t),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeBool, kNumberTypeInt64, bool),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeComplex64, kNumberTypeInt64, utils::Complex<float>),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeComplex128, kNumberTypeInt64, utils::Complex<double>),
+
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat64, kNumberTypeInt32, double),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat32, kNumberTypeInt32, float),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat16, kNumberTypeInt32, half),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt64, kNumberTypeInt32, int64_t),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt32, kNumberTypeInt32, int32_t),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt16, kNumberTypeInt32, int16_t),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt8, kNumberTypeInt32, int8_t),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt64, kNumberTypeInt32, uint64_t),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt32, kNumberTypeInt32, uint32_t),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt16, kNumberTypeInt32, uint16_t),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt8, kNumberTypeInt32, uint8_t),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeBool, kNumberTypeInt32, bool),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeComplex64, kNumberTypeInt32, utils::Complex<float>),
+    REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeComplex128, kNumberTypeInt32, utils::Complex<double>),
 };

 std::vector<KernelAttr> MirrorPadGradGpuKernelMod::GetOpSupport() {