add gpu mirror_pad data type

This commit is contained in:
fanjibin 2022-11-21 21:31:25 +08:00
parent e231e8d824
commit 4c0b6141a7
3 changed files with 114 additions and 42 deletions

View File

@ -17,6 +17,7 @@
#include <stdio.h>
#include <stdint.h>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/mirror_pad_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
#include "include/cuda_fp16.h"
// check for existence in current padded array on X and Y dims
@ -253,33 +254,44 @@ void CalMirrorPadGrad(const size_t dx_size, const size_t interim_dy_size, T *dy,
mode, dx);
}
template CUDA_LIB_EXPORT void CalMirrorPad<float>(const size_t size, const float *input, const int old_batch,
const int old_channel, const int old_height, const int old_width,
const int padded_height, const int padded_width, int padd_num,
const int64_t *paddings, int mode, float *output,
cudaStream_t cuda_stream);
template CUDA_LIB_EXPORT void CalMirrorPad<half>(const size_t size, const half *input, const int old_batch,
const int old_channel, const int old_height, const int old_width,
const int padded_height, const int padded_width, int padd_num,
const int64_t *paddings, int mode, half *output,
cudaStream_t cuda_stream);
template CUDA_LIB_EXPORT void CalMirrorPad<int>(const size_t size, const int *input, const int old_batch,
const int old_channel, const int old_height, const int old_width,
const int padded_height, const int padded_width, int padd_num,
const int64_t *paddings, int mode, int *output,
cudaStream_t cuda_stream);
template CUDA_LIB_EXPORT void CalMirrorPadGrad<float>(const size_t dx_size, const size_t dy_size, float *dy,
float *interim_dy, const int dx_batches, const int dx_channels,
const int dx_height, const int dx_width, const int dy_height,
const int dy_width, const int padd_dim, const int64_t *paddings,
int mode, float *dx, cudaStream_t cuda_stream);
template CUDA_LIB_EXPORT void CalMirrorPadGrad<half>(const size_t dx_size, const size_t dy_size, half *dy,
half *interim_dy, const int dx_batches, const int dx_channels,
const int dx_height, const int dx_width, const int dy_height,
const int dy_width, const int padd_dim, const int64_t *paddings,
int mode, half *dx, cudaStream_t cuda_stream);
template CUDA_LIB_EXPORT void CalMirrorPadGrad<int>(const size_t dx_size, const size_t dy_size, int *dy,
int *interim_dy, const int dx_batches, const int dx_channels,
const int dx_height, const int dx_width, const int dy_height,
const int dy_width, const int padd_dim, const int64_t *paddings,
int mode, int *dx, cudaStream_t cuda_stream);
#define REG_MIRROR_PAD_CUDA(type) \
template CUDA_LIB_EXPORT void CalMirrorPad<type>( \
const size_t size, const type *input, const int old_batch, const int old_channel, const int old_height, \
const int old_width, const int padded_height, const int padded_width, int padd_num, const int64_t *paddings, \
int mode, type *output, cudaStream_t cuda_stream)
REG_MIRROR_PAD_CUDA(half);
REG_MIRROR_PAD_CUDA(float);
REG_MIRROR_PAD_CUDA(double);
REG_MIRROR_PAD_CUDA(uint8_t);
REG_MIRROR_PAD_CUDA(uint16_t);
REG_MIRROR_PAD_CUDA(uint32_t);
REG_MIRROR_PAD_CUDA(uint64_t);
REG_MIRROR_PAD_CUDA(int8_t);
REG_MIRROR_PAD_CUDA(int16_t);
REG_MIRROR_PAD_CUDA(int32_t);
REG_MIRROR_PAD_CUDA(int64_t);
REG_MIRROR_PAD_CUDA(bool);
REG_MIRROR_PAD_CUDA(Complex<float>);
REG_MIRROR_PAD_CUDA(Complex<double>);
#define REG_MIRROR_PAD_GRAD_CUDA(type) \
template CUDA_LIB_EXPORT void CalMirrorPadGrad<type>( \
const size_t dx_size, const size_t dy_size, type *dy, type *interim_dy, const int dx_batches, \
const int dx_channels, const int dx_height, const int dx_width, const int dy_height, const int dy_width, \
const int padd_dim, const int64_t *paddings, int mode, type *dx, cudaStream_t cuda_stream);
REG_MIRROR_PAD_GRAD_CUDA(half);
REG_MIRROR_PAD_GRAD_CUDA(float);
REG_MIRROR_PAD_GRAD_CUDA(double);
REG_MIRROR_PAD_GRAD_CUDA(uint8_t);
REG_MIRROR_PAD_GRAD_CUDA(uint16_t);
REG_MIRROR_PAD_GRAD_CUDA(uint32_t);
REG_MIRROR_PAD_GRAD_CUDA(uint64_t);
REG_MIRROR_PAD_GRAD_CUDA(int8_t);
REG_MIRROR_PAD_GRAD_CUDA(int16_t);
REG_MIRROR_PAD_GRAD_CUDA(int32_t);
REG_MIRROR_PAD_GRAD_CUDA(int64_t);
REG_MIRROR_PAD_GRAD_CUDA(bool);
REG_MIRROR_PAD_GRAD_CUDA(Complex<float>);
REG_MIRROR_PAD_GRAD_CUDA(Complex<double>);

View File

@ -15,6 +15,7 @@
*/
#include "plugin/device/gpu/kernel/nn/mirror_pad_gpu_kernel.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
#include "ops/mirror_pad.h"
namespace mindspore {
@ -102,16 +103,45 @@ int MirrorPadGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const st
return static_cast<int>(KRET_OK);
}
#define REG_MIRROR_PAD_GPU_KERNEL(TypeId1, TypeId2, Type1) \
{ \
KernelAttr().AddInputAttr(TypeId1).AddInputAttr(TypeId2).AddOutputAttr(TypeId1), \
&MirrorPadGpuKernelMod::LaunchKernel<Type1> \
}
using KernelRunFunc = MirrorPadGpuKernelMod::KernelRunFunc;
// int the python api description, input data type is number but CalExtractImagePatchesNHWC only support four type.
const std::vector<std::pair<KernelAttr, KernelRunFunc>> &MirrorPadGpuKernelMod::GetFuncList() const {
static const std::vector<std::pair<KernelAttr, KernelRunFunc>> func_list = {
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32),
&MirrorPadGpuKernelMod::LaunchKernel<float>},
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat16),
&MirrorPadGpuKernelMod::LaunchKernel<half>},
{KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32),
&MirrorPadGpuKernelMod::LaunchKernel<int>},
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat64, kNumberTypeInt64, double),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat32, kNumberTypeInt64, float),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat16, kNumberTypeInt64, half),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt64, kNumberTypeInt64, int64_t),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt32, kNumberTypeInt64, int32_t),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt16, kNumberTypeInt64, int16_t),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt8, kNumberTypeInt64, int8_t),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt64, kNumberTypeInt64, uint64_t),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt32, kNumberTypeInt64, uint32_t),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt16, kNumberTypeInt64, uint16_t),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt8, kNumberTypeInt64, uint8_t),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeBool, kNumberTypeInt64, bool),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeComplex64, kNumberTypeInt64, utils::Complex<float>),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeComplex128, kNumberTypeInt64, utils::Complex<double>),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat64, kNumberTypeInt32, double),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat32, kNumberTypeInt32, float),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat16, kNumberTypeInt32, half),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt64, kNumberTypeInt32, int64_t),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt32, kNumberTypeInt32, int32_t),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt16, kNumberTypeInt32, int16_t),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt8, kNumberTypeInt32, int8_t),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt64, kNumberTypeInt32, uint64_t),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt32, kNumberTypeInt32, uint32_t),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt16, kNumberTypeInt32, uint16_t),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt8, kNumberTypeInt32, uint8_t),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeBool, kNumberTypeInt32, bool),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeComplex64, kNumberTypeInt32, utils::Complex<float>),
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeComplex128, kNumberTypeInt32, utils::Complex<double>),
};
return func_list;
}

View File

@ -15,6 +15,7 @@
*/
#include "plugin/device/gpu/kernel/nn/mirror_pad_grad_gpu_kernel.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
namespace mindspore {
namespace kernel {
@ -171,14 +172,43 @@ int MirrorPadGradGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, cons
return static_cast<int>(ret);
}
#define REG_MIRROR_PAD_GRAD_GPU_KERNEL(TypeId1, TypeId2, Type1) \
{ \
KernelAttr().AddInputAttr(TypeId1).AddInputAttr(TypeId2).AddOutputAttr(TypeId1), \
&MirrorPadGradGpuKernelMod::LaunchKernel<Type1> \
}
std::vector<std::pair<KernelAttr, MirrorPadGradGpuKernelMod::MirrorPadGradLaunchFunc>>
MirrorPadGradGpuKernelMod::func_list_ = {
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32),
&MirrorPadGradGpuKernelMod::LaunchKernel<float>},
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat16),
&MirrorPadGradGpuKernelMod::LaunchKernel<half>},
{KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32),
&MirrorPadGradGpuKernelMod::LaunchKernel<int>},
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat64, kNumberTypeInt64, double),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat32, kNumberTypeInt64, float),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat16, kNumberTypeInt64, half),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt64, kNumberTypeInt64, int64_t),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt32, kNumberTypeInt64, int32_t),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt16, kNumberTypeInt64, int16_t),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt8, kNumberTypeInt64, int8_t),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt64, kNumberTypeInt64, uint64_t),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt32, kNumberTypeInt64, uint32_t),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt16, kNumberTypeInt64, uint16_t),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt8, kNumberTypeInt64, uint8_t),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeBool, kNumberTypeInt64, bool),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeComplex64, kNumberTypeInt64, utils::Complex<float>),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeComplex128, kNumberTypeInt64, utils::Complex<double>),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat64, kNumberTypeInt32, double),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat32, kNumberTypeInt32, float),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat16, kNumberTypeInt32, half),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt64, kNumberTypeInt32, int64_t),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt32, kNumberTypeInt32, int32_t),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt16, kNumberTypeInt32, int16_t),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt8, kNumberTypeInt32, int8_t),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt64, kNumberTypeInt32, uint64_t),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt32, kNumberTypeInt32, uint32_t),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt16, kNumberTypeInt32, uint16_t),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt8, kNumberTypeInt32, uint8_t),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeBool, kNumberTypeInt32, bool),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeComplex64, kNumberTypeInt32, utils::Complex<float>),
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeComplex128, kNumberTypeInt32, utils::Complex<double>),
};
std::vector<KernelAttr> MirrorPadGradGpuKernelMod::GetOpSupport() {