diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/mirror_pad_impl.cu b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/mirror_pad_impl.cu index ad5170ac1e1..d474805c791 100755 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/mirror_pad_impl.cu +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/mirror_pad_impl.cu @@ -17,6 +17,7 @@ #include #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/mirror_pad_impl.cuh" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h" #include "include/cuda_fp16.h" // check for existence in current padded array on X and Y dims @@ -253,33 +254,44 @@ void CalMirrorPadGrad(const size_t dx_size, const size_t interim_dy_size, T *dy, mode, dx); } -template CUDA_LIB_EXPORT void CalMirrorPad(const size_t size, const float *input, const int old_batch, - const int old_channel, const int old_height, const int old_width, - const int padded_height, const int padded_width, int padd_num, - const int64_t *paddings, int mode, float *output, - cudaStream_t cuda_stream); -template CUDA_LIB_EXPORT void CalMirrorPad(const size_t size, const half *input, const int old_batch, - const int old_channel, const int old_height, const int old_width, - const int padded_height, const int padded_width, int padd_num, - const int64_t *paddings, int mode, half *output, - cudaStream_t cuda_stream); -template CUDA_LIB_EXPORT void CalMirrorPad(const size_t size, const int *input, const int old_batch, - const int old_channel, const int old_height, const int old_width, - const int padded_height, const int padded_width, int padd_num, - const int64_t *paddings, int mode, int *output, - cudaStream_t cuda_stream); -template CUDA_LIB_EXPORT void CalMirrorPadGrad(const size_t dx_size, const size_t dy_size, float *dy, - float *interim_dy, const int dx_batches, const int dx_channels, - const int dx_height, const int dx_width, const int dy_height, - const int dy_width, const int padd_dim, const int64_t *paddings, - int mode, float *dx, cudaStream_t cuda_stream); -template CUDA_LIB_EXPORT void CalMirrorPadGrad(const size_t dx_size, const size_t dy_size, half *dy, - half *interim_dy, const int dx_batches, const int dx_channels, - const int dx_height, const int dx_width, const int dy_height, - const int dy_width, const int padd_dim, const int64_t *paddings, - int mode, half *dx, cudaStream_t cuda_stream); -template CUDA_LIB_EXPORT void CalMirrorPadGrad(const size_t dx_size, const size_t dy_size, int *dy, - int *interim_dy, const int dx_batches, const int dx_channels, - const int dx_height, const int dx_width, const int dy_height, - const int dy_width, const int padd_dim, const int64_t *paddings, - int mode, int *dx, cudaStream_t cuda_stream); +#define REG_MIRROR_PAD_CUDA(type) \ + template CUDA_LIB_EXPORT void CalMirrorPad( \ + const size_t size, const type *input, const int old_batch, const int old_channel, const int old_height, \ + const int old_width, const int padded_height, const int padded_width, int padd_num, const int64_t *paddings, \ + int mode, type *output, cudaStream_t cuda_stream) + +REG_MIRROR_PAD_CUDA(half); +REG_MIRROR_PAD_CUDA(float); +REG_MIRROR_PAD_CUDA(double); +REG_MIRROR_PAD_CUDA(uint8_t); +REG_MIRROR_PAD_CUDA(uint16_t); +REG_MIRROR_PAD_CUDA(uint32_t); +REG_MIRROR_PAD_CUDA(uint64_t); +REG_MIRROR_PAD_CUDA(int8_t); +REG_MIRROR_PAD_CUDA(int16_t); +REG_MIRROR_PAD_CUDA(int32_t); +REG_MIRROR_PAD_CUDA(int64_t); +REG_MIRROR_PAD_CUDA(bool); +REG_MIRROR_PAD_CUDA(Complex); +REG_MIRROR_PAD_CUDA(Complex); + +#define REG_MIRROR_PAD_GRAD_CUDA(type) \ + template CUDA_LIB_EXPORT void CalMirrorPadGrad( \ + const size_t dx_size, const size_t dy_size, type *dy, type *interim_dy, const int dx_batches, \ + const int dx_channels, const int dx_height, const int dx_width, const int dy_height, const int dy_width, \ + const int padd_dim, const int64_t *paddings, int mode, type *dx, cudaStream_t cuda_stream); + +REG_MIRROR_PAD_GRAD_CUDA(half); +REG_MIRROR_PAD_GRAD_CUDA(float); +REG_MIRROR_PAD_GRAD_CUDA(double); +REG_MIRROR_PAD_GRAD_CUDA(uint8_t); +REG_MIRROR_PAD_GRAD_CUDA(uint16_t); +REG_MIRROR_PAD_GRAD_CUDA(uint32_t); +REG_MIRROR_PAD_GRAD_CUDA(uint64_t); +REG_MIRROR_PAD_GRAD_CUDA(int8_t); +REG_MIRROR_PAD_GRAD_CUDA(int16_t); +REG_MIRROR_PAD_GRAD_CUDA(int32_t); +REG_MIRROR_PAD_GRAD_CUDA(int64_t); +REG_MIRROR_PAD_GRAD_CUDA(bool); +REG_MIRROR_PAD_GRAD_CUDA(Complex); +REG_MIRROR_PAD_GRAD_CUDA(Complex); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_gpu_kernel.cc index 82e856b9fc9..4dfc32c975a 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_gpu_kernel.cc @@ -15,6 +15,7 @@ */ #include "plugin/device/gpu/kernel/nn/mirror_pad_gpu_kernel.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h" #include "ops/mirror_pad.h" namespace mindspore { @@ -102,16 +103,45 @@ int MirrorPadGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const st return static_cast(KRET_OK); } +#define REG_MIRROR_PAD_GPU_KERNEL(TypeId1, TypeId2, Type1) \ + { \ + KernelAttr().AddInputAttr(TypeId1).AddInputAttr(TypeId2).AddOutputAttr(TypeId1), \ + &MirrorPadGpuKernelMod::LaunchKernel \ + } + using KernelRunFunc = MirrorPadGpuKernelMod::KernelRunFunc; // int the python api description, input data type is number but CalExtractImagePatchesNHWC only support four type. const std::vector> &MirrorPadGpuKernelMod::GetFuncList() const { static const std::vector> func_list = { - {KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32), - &MirrorPadGpuKernelMod::LaunchKernel}, - {KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat16), - &MirrorPadGpuKernelMod::LaunchKernel}, - {KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32), - &MirrorPadGpuKernelMod::LaunchKernel}, + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat64, kNumberTypeInt64, double), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat32, kNumberTypeInt64, float), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat16, kNumberTypeInt64, half), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt64, kNumberTypeInt64, int64_t), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt32, kNumberTypeInt64, int32_t), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt16, kNumberTypeInt64, int16_t), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt8, kNumberTypeInt64, int8_t), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt64, kNumberTypeInt64, uint64_t), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt32, kNumberTypeInt64, uint32_t), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt16, kNumberTypeInt64, uint16_t), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt8, kNumberTypeInt64, uint8_t), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeBool, kNumberTypeInt64, bool), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeComplex64, kNumberTypeInt64, utils::Complex), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeComplex128, kNumberTypeInt64, utils::Complex), + + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat64, kNumberTypeInt32, double), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat32, kNumberTypeInt32, float), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat16, kNumberTypeInt32, half), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt64, kNumberTypeInt32, int64_t), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt32, kNumberTypeInt32, int32_t), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt16, kNumberTypeInt32, int16_t), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt8, kNumberTypeInt32, int8_t), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt64, kNumberTypeInt32, uint64_t), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt32, kNumberTypeInt32, uint32_t), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt16, kNumberTypeInt32, uint16_t), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt8, kNumberTypeInt32, uint8_t), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeBool, kNumberTypeInt32, bool), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeComplex64, kNumberTypeInt32, utils::Complex), + REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeComplex128, kNumberTypeInt32, utils::Complex), }; return func_list; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_grad_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_grad_gpu_kernel.cc index 55f7addf144..31254f39e92 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_grad_gpu_kernel.cc @@ -15,6 +15,7 @@ */ #include "plugin/device/gpu/kernel/nn/mirror_pad_grad_gpu_kernel.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h" namespace mindspore { namespace kernel { @@ -185,14 +186,43 @@ int MirrorPadGradGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, cons return static_cast(ret); } +#define REG_MIRROR_PAD_GRAD_GPU_KERNEL(TypeId1, TypeId2, Type1) \ + { \ + KernelAttr().AddInputAttr(TypeId1).AddInputAttr(TypeId2).AddOutputAttr(TypeId1), \ + &MirrorPadGradGpuKernelMod::LaunchKernel \ + } + std::vector> MirrorPadGradGpuKernelMod::func_list_ = { - {KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32), - &MirrorPadGradGpuKernelMod::LaunchKernel}, - {KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat16), - &MirrorPadGradGpuKernelMod::LaunchKernel}, - {KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32), - &MirrorPadGradGpuKernelMod::LaunchKernel}, + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat64, kNumberTypeInt64, double), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat32, kNumberTypeInt64, float), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat16, kNumberTypeInt64, half), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt64, kNumberTypeInt64, int64_t), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt32, kNumberTypeInt64, int32_t), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt16, kNumberTypeInt64, int16_t), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt8, kNumberTypeInt64, int8_t), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt64, kNumberTypeInt64, uint64_t), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt32, kNumberTypeInt64, uint32_t), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt16, kNumberTypeInt64, uint16_t), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt8, kNumberTypeInt64, uint8_t), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeBool, kNumberTypeInt64, bool), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeComplex64, kNumberTypeInt64, utils::Complex), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeComplex128, kNumberTypeInt64, utils::Complex), + + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat64, kNumberTypeInt32, double), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat32, kNumberTypeInt32, float), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat16, kNumberTypeInt32, half), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt64, kNumberTypeInt32, int64_t), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt32, kNumberTypeInt32, int32_t), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt16, kNumberTypeInt32, int16_t), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt8, kNumberTypeInt32, int8_t), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt64, kNumberTypeInt32, uint64_t), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt32, kNumberTypeInt32, uint32_t), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt16, kNumberTypeInt32, uint16_t), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt8, kNumberTypeInt32, uint8_t), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeBool, kNumberTypeInt32, bool), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeComplex64, kNumberTypeInt32, utils::Complex), + REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeComplex128, kNumberTypeInt32, utils::Complex), }; std::vector MirrorPadGradGpuKernelMod::GetOpSupport() {