forked from mindspore-Ecosystem/mindspore
add gpu mirror_pad data type
This commit is contained in:
parent
e231e8d824
commit
4c0b6141a7
|
@ -17,6 +17,7 @@
|
|||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/mirror_pad_impl.cuh"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
|
||||
#include "include/cuda_fp16.h"
|
||||
|
||||
// check for existence in current padded array on X and Y dims
|
||||
|
@ -253,33 +254,44 @@ void CalMirrorPadGrad(const size_t dx_size, const size_t interim_dy_size, T *dy,
|
|||
mode, dx);
|
||||
}
|
||||
|
||||
template CUDA_LIB_EXPORT void CalMirrorPad<float>(const size_t size, const float *input, const int old_batch,
|
||||
const int old_channel, const int old_height, const int old_width,
|
||||
const int padded_height, const int padded_width, int padd_num,
|
||||
const int64_t *paddings, int mode, float *output,
|
||||
cudaStream_t cuda_stream);
|
||||
template CUDA_LIB_EXPORT void CalMirrorPad<half>(const size_t size, const half *input, const int old_batch,
|
||||
const int old_channel, const int old_height, const int old_width,
|
||||
const int padded_height, const int padded_width, int padd_num,
|
||||
const int64_t *paddings, int mode, half *output,
|
||||
cudaStream_t cuda_stream);
|
||||
template CUDA_LIB_EXPORT void CalMirrorPad<int>(const size_t size, const int *input, const int old_batch,
|
||||
const int old_channel, const int old_height, const int old_width,
|
||||
const int padded_height, const int padded_width, int padd_num,
|
||||
const int64_t *paddings, int mode, int *output,
|
||||
cudaStream_t cuda_stream);
|
||||
template CUDA_LIB_EXPORT void CalMirrorPadGrad<float>(const size_t dx_size, const size_t dy_size, float *dy,
|
||||
float *interim_dy, const int dx_batches, const int dx_channels,
|
||||
const int dx_height, const int dx_width, const int dy_height,
|
||||
const int dy_width, const int padd_dim, const int64_t *paddings,
|
||||
int mode, float *dx, cudaStream_t cuda_stream);
|
||||
template CUDA_LIB_EXPORT void CalMirrorPadGrad<half>(const size_t dx_size, const size_t dy_size, half *dy,
|
||||
half *interim_dy, const int dx_batches, const int dx_channels,
|
||||
const int dx_height, const int dx_width, const int dy_height,
|
||||
const int dy_width, const int padd_dim, const int64_t *paddings,
|
||||
int mode, half *dx, cudaStream_t cuda_stream);
|
||||
template CUDA_LIB_EXPORT void CalMirrorPadGrad<int>(const size_t dx_size, const size_t dy_size, int *dy,
|
||||
int *interim_dy, const int dx_batches, const int dx_channels,
|
||||
const int dx_height, const int dx_width, const int dy_height,
|
||||
const int dy_width, const int padd_dim, const int64_t *paddings,
|
||||
int mode, int *dx, cudaStream_t cuda_stream);
|
||||
#define REG_MIRROR_PAD_CUDA(type) \
|
||||
template CUDA_LIB_EXPORT void CalMirrorPad<type>( \
|
||||
const size_t size, const type *input, const int old_batch, const int old_channel, const int old_height, \
|
||||
const int old_width, const int padded_height, const int padded_width, int padd_num, const int64_t *paddings, \
|
||||
int mode, type *output, cudaStream_t cuda_stream)
|
||||
|
||||
REG_MIRROR_PAD_CUDA(half);
|
||||
REG_MIRROR_PAD_CUDA(float);
|
||||
REG_MIRROR_PAD_CUDA(double);
|
||||
REG_MIRROR_PAD_CUDA(uint8_t);
|
||||
REG_MIRROR_PAD_CUDA(uint16_t);
|
||||
REG_MIRROR_PAD_CUDA(uint32_t);
|
||||
REG_MIRROR_PAD_CUDA(uint64_t);
|
||||
REG_MIRROR_PAD_CUDA(int8_t);
|
||||
REG_MIRROR_PAD_CUDA(int16_t);
|
||||
REG_MIRROR_PAD_CUDA(int32_t);
|
||||
REG_MIRROR_PAD_CUDA(int64_t);
|
||||
REG_MIRROR_PAD_CUDA(bool);
|
||||
REG_MIRROR_PAD_CUDA(Complex<float>);
|
||||
REG_MIRROR_PAD_CUDA(Complex<double>);
|
||||
|
||||
#define REG_MIRROR_PAD_GRAD_CUDA(type) \
|
||||
template CUDA_LIB_EXPORT void CalMirrorPadGrad<type>( \
|
||||
const size_t dx_size, const size_t dy_size, type *dy, type *interim_dy, const int dx_batches, \
|
||||
const int dx_channels, const int dx_height, const int dx_width, const int dy_height, const int dy_width, \
|
||||
const int padd_dim, const int64_t *paddings, int mode, type *dx, cudaStream_t cuda_stream);
|
||||
|
||||
REG_MIRROR_PAD_GRAD_CUDA(half);
|
||||
REG_MIRROR_PAD_GRAD_CUDA(float);
|
||||
REG_MIRROR_PAD_GRAD_CUDA(double);
|
||||
REG_MIRROR_PAD_GRAD_CUDA(uint8_t);
|
||||
REG_MIRROR_PAD_GRAD_CUDA(uint16_t);
|
||||
REG_MIRROR_PAD_GRAD_CUDA(uint32_t);
|
||||
REG_MIRROR_PAD_GRAD_CUDA(uint64_t);
|
||||
REG_MIRROR_PAD_GRAD_CUDA(int8_t);
|
||||
REG_MIRROR_PAD_GRAD_CUDA(int16_t);
|
||||
REG_MIRROR_PAD_GRAD_CUDA(int32_t);
|
||||
REG_MIRROR_PAD_GRAD_CUDA(int64_t);
|
||||
REG_MIRROR_PAD_GRAD_CUDA(bool);
|
||||
REG_MIRROR_PAD_GRAD_CUDA(Complex<float>);
|
||||
REG_MIRROR_PAD_GRAD_CUDA(Complex<double>);
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/nn/mirror_pad_gpu_kernel.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
|
||||
#include "ops/mirror_pad.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -102,16 +103,45 @@ int MirrorPadGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const st
|
|||
return static_cast<int>(KRET_OK);
|
||||
}
|
||||
|
||||
#define REG_MIRROR_PAD_GPU_KERNEL(TypeId1, TypeId2, Type1) \
|
||||
{ \
|
||||
KernelAttr().AddInputAttr(TypeId1).AddInputAttr(TypeId2).AddOutputAttr(TypeId1), \
|
||||
&MirrorPadGpuKernelMod::LaunchKernel<Type1> \
|
||||
}
|
||||
|
||||
using KernelRunFunc = MirrorPadGpuKernelMod::KernelRunFunc;
|
||||
// int the python api description, input data type is number but CalExtractImagePatchesNHWC only support four type.
|
||||
const std::vector<std::pair<KernelAttr, KernelRunFunc>> &MirrorPadGpuKernelMod::GetFuncList() const {
|
||||
static const std::vector<std::pair<KernelAttr, KernelRunFunc>> func_list = {
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32),
|
||||
&MirrorPadGpuKernelMod::LaunchKernel<float>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat16),
|
||||
&MirrorPadGpuKernelMod::LaunchKernel<half>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32),
|
||||
&MirrorPadGpuKernelMod::LaunchKernel<int>},
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat64, kNumberTypeInt64, double),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat32, kNumberTypeInt64, float),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat16, kNumberTypeInt64, half),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt64, kNumberTypeInt64, int64_t),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt32, kNumberTypeInt64, int32_t),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt16, kNumberTypeInt64, int16_t),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt8, kNumberTypeInt64, int8_t),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt64, kNumberTypeInt64, uint64_t),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt32, kNumberTypeInt64, uint32_t),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt16, kNumberTypeInt64, uint16_t),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt8, kNumberTypeInt64, uint8_t),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeBool, kNumberTypeInt64, bool),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeComplex64, kNumberTypeInt64, utils::Complex<float>),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeComplex128, kNumberTypeInt64, utils::Complex<double>),
|
||||
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat64, kNumberTypeInt32, double),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat32, kNumberTypeInt32, float),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeFloat16, kNumberTypeInt32, half),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt64, kNumberTypeInt32, int64_t),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt32, kNumberTypeInt32, int32_t),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt16, kNumberTypeInt32, int16_t),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeInt8, kNumberTypeInt32, int8_t),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt64, kNumberTypeInt32, uint64_t),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt32, kNumberTypeInt32, uint32_t),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt16, kNumberTypeInt32, uint16_t),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeUInt8, kNumberTypeInt32, uint8_t),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeBool, kNumberTypeInt32, bool),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeComplex64, kNumberTypeInt32, utils::Complex<float>),
|
||||
REG_MIRROR_PAD_GPU_KERNEL(kNumberTypeComplex128, kNumberTypeInt32, utils::Complex<double>),
|
||||
};
|
||||
return func_list;
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/nn/mirror_pad_grad_gpu_kernel.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
@ -171,14 +172,43 @@ int MirrorPadGradGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, cons
|
|||
return static_cast<int>(ret);
|
||||
}
|
||||
|
||||
#define REG_MIRROR_PAD_GRAD_GPU_KERNEL(TypeId1, TypeId2, Type1) \
|
||||
{ \
|
||||
KernelAttr().AddInputAttr(TypeId1).AddInputAttr(TypeId2).AddOutputAttr(TypeId1), \
|
||||
&MirrorPadGradGpuKernelMod::LaunchKernel<Type1> \
|
||||
}
|
||||
|
||||
std::vector<std::pair<KernelAttr, MirrorPadGradGpuKernelMod::MirrorPadGradLaunchFunc>>
|
||||
MirrorPadGradGpuKernelMod::func_list_ = {
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32),
|
||||
&MirrorPadGradGpuKernelMod::LaunchKernel<float>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat16),
|
||||
&MirrorPadGradGpuKernelMod::LaunchKernel<half>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32),
|
||||
&MirrorPadGradGpuKernelMod::LaunchKernel<int>},
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat64, kNumberTypeInt64, double),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat32, kNumberTypeInt64, float),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat16, kNumberTypeInt64, half),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt64, kNumberTypeInt64, int64_t),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt32, kNumberTypeInt64, int32_t),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt16, kNumberTypeInt64, int16_t),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt8, kNumberTypeInt64, int8_t),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt64, kNumberTypeInt64, uint64_t),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt32, kNumberTypeInt64, uint32_t),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt16, kNumberTypeInt64, uint16_t),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt8, kNumberTypeInt64, uint8_t),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeBool, kNumberTypeInt64, bool),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeComplex64, kNumberTypeInt64, utils::Complex<float>),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeComplex128, kNumberTypeInt64, utils::Complex<double>),
|
||||
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat64, kNumberTypeInt32, double),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat32, kNumberTypeInt32, float),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeFloat16, kNumberTypeInt32, half),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt64, kNumberTypeInt32, int64_t),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt32, kNumberTypeInt32, int32_t),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt16, kNumberTypeInt32, int16_t),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeInt8, kNumberTypeInt32, int8_t),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt64, kNumberTypeInt32, uint64_t),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt32, kNumberTypeInt32, uint32_t),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt16, kNumberTypeInt32, uint16_t),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeUInt8, kNumberTypeInt32, uint8_t),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeBool, kNumberTypeInt32, bool),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeComplex64, kNumberTypeInt32, utils::Complex<float>),
|
||||
REG_MIRROR_PAD_GRAD_GPU_KERNEL(kNumberTypeComplex128, kNumberTypeInt32, utils::Complex<double>),
|
||||
};
|
||||
|
||||
std::vector<KernelAttr> MirrorPadGradGpuKernelMod::GetOpSupport() {
|
||||
|
|
Loading…
Reference in New Issue