diff --git a/mindspore/ccsrc/include/common/utils/utils.h b/mindspore/ccsrc/include/common/utils/utils.h index d82ec9f9ce0..fe1ea8bb513 100644 --- a/mindspore/ccsrc/include/common/utils/utils.h +++ b/mindspore/ccsrc/include/common/utils/utils.h @@ -792,9 +792,19 @@ const std::set DynamicShapeConstInputToAttrCPU = { kCastOpName, kExpandDimsOpName, kEmbeddingLookupOpName, kReduceMinOpName, kReduceMeanOpName, kReduceMaxOpName, kReduceAllOpName, kReduceAnyOpName, kConcatOpName, kReduceSumOpName}; -const std::set DynamicShapeConstInputToAttrGPU = { - kCastOpName, kExpandDimsOpName, kReshapeOpName, kEmbeddingLookupOpName, kTransposeOpName, kReduceSumOpName, - kReduceMinOpName, kReduceMeanOpName, kReduceMaxOpName, kReduceAllOpName, kReduceAnyOpName, kConcatOpName}; +const std::set DynamicShapeConstInputToAttrGPU = {kCastOpName, + kExpandDimsOpName, + kReshapeOpName, + kEmbeddingLookupOpName, + kTransposeOpName, + kReduceSumOpName, + kReduceMinOpName, + kReduceMeanOpName, + kReduceMaxOpName, + kReduceAllOpName, + kReduceAnyOpName, + kConcatOpName, + kConv2DBackpropFilterOpName}; // The map between kernel's output and input ref relationship. // Key is the output index while the value is input index which will be used as the reference of output. diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/strided_slice_grad_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/strided_slice_grad_gpu_kernel.cc index 36104634aaf..4d2c0b4bb33 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/strided_slice_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/strided_slice_grad_gpu_kernel.cc @@ -42,5 +42,104 @@ MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeUIn StridedSliceGradGpuKernelMod, uchar) MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), StridedSliceGradGpuKernelMod, bool) +MS_REG_GPU_KERNEL_TWO(StridedSliceGrad, + KernelAttr() + .AddInputAttr(kNumberTypeFloat64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeFloat64), + StridedSliceGradGpuKernelMod, double, int64_t) +MS_REG_GPU_KERNEL_TWO(StridedSliceGrad, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeFloat32), + StridedSliceGradGpuKernelMod, float, int64_t) +MS_REG_GPU_KERNEL_TWO(StridedSliceGrad, + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeInt64), + StridedSliceGradGpuKernelMod, int64_t, int64_t) +MS_REG_GPU_KERNEL_TWO(StridedSliceGrad, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeInt32), + StridedSliceGradGpuKernelMod, int, int64_t) +MS_REG_GPU_KERNEL_TWO(StridedSliceGrad, + KernelAttr() + .AddInputAttr(kNumberTypeInt16) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeInt16), + StridedSliceGradGpuKernelMod, short, int64_t) // NOLINT +MS_REG_GPU_KERNEL_TWO(StridedSliceGrad, + KernelAttr() + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeInt8), + StridedSliceGradGpuKernelMod, int8_t, int64_t) +MS_REG_GPU_KERNEL_TWO(StridedSliceGrad, + KernelAttr() + .AddInputAttr(kNumberTypeUInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeUInt64), + StridedSliceGradGpuKernelMod, uint64_t, int64_t) +MS_REG_GPU_KERNEL_TWO(StridedSliceGrad, + KernelAttr() + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeUInt32), + StridedSliceGradGpuKernelMod, uint32_t, int64_t) +MS_REG_GPU_KERNEL_TWO(StridedSliceGrad, + KernelAttr() + .AddInputAttr(kNumberTypeUInt16) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeUInt16), + StridedSliceGradGpuKernelMod, uint16_t, int64_t) +MS_REG_GPU_KERNEL_TWO(StridedSliceGrad, + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeUInt8), + StridedSliceGradGpuKernelMod, uchar, int64_t) +MS_REG_GPU_KERNEL_TWO(StridedSliceGrad, + KernelAttr() + .AddInputAttr(kNumberTypeBool) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeBool), + StridedSliceGradGpuKernelMod, bool, int64_t) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/strided_slice_grad_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/strided_slice_grad_gpu_kernel.h index e5777d95dcb..7293d5224c6 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/strided_slice_grad_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/strided_slice_grad_gpu_kernel.h @@ -27,7 +27,8 @@ namespace mindspore { namespace kernel { -template +constexpr int DynamicInputNum = 5; +template class StridedSliceGradGpuKernelMod : public NativeGpuKernelMod, public StridedSliceGpuCommon { public: StridedSliceGradGpuKernelMod() = default; @@ -49,8 +50,19 @@ class StridedSliceGradGpuKernelMod : public NativeGpuKernelMod, public StridedSl bool Init(const CNodePtr &kernel_node) override { auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); kernel_node_ = kernel_node; - std::vector shapex = GetAttr>(kernel_node, "shapex"); - for (auto x : shapex) { + size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num == DynamicInputNum) { + is_dynamic_attr_ = true; + } + if (is_dynamic_attr_) { + GetDynamicAttrIntValue(kernel_node, kShapexIndex_, &shapex_); + GetDynamicAttrIntValue(kernel_node, kBeginIndex_, &begin_); + GetDynamicAttrIntValue(kernel_node, kEndIndex_, &end_); + GetDynamicAttrIntValue(kernel_node, kStrideIndex_, &strides_); + } else { + shapex_ = GetAttr>(kernel_node, "shapex"); + } + for (auto x : shapex_) { input_shape_.push_back(static_cast(x)); } if (input_shape_.size() > MAX_DIMS) { @@ -58,10 +70,19 @@ class StridedSliceGradGpuKernelMod : public NativeGpuKernelMod, public StridedSl << ", but got " << input_shape_.size(); } - CollectInfo(kernel_node); + CollectInfo(kernel_node, is_dynamic_attr_); InitSizeLists(); return true; } + void ResetResource() noexcept override { + ResetSizeLists(); + begin_.clear(); + end_.clear(); + strides_.clear(); + input_shape_.clear(); + output_shape_.clear(); + is_dynamic_attr_ = false; + } protected: void InitSizeLists() override { @@ -77,6 +98,14 @@ class StridedSliceGradGpuKernelMod : public NativeGpuKernelMod, public StridedSl } output_size_list_.push_back(size1); } + bool is_null_input_{false}; + bool is_dynamic_attr_{false}; + bool get_dynamic_attr_value_{false}; + static constexpr size_t kShapexIndex_{1}; + static constexpr size_t kBeginIndex_{2}; + static constexpr size_t kEndIndex_{3}; + static constexpr size_t kStrideIndex_{4}; + std::vector shapex_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_sum_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_sum_gpu_kernel.cc index cd943507917..8cda80863c7 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_sum_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unsorted_segment_sum_gpu_kernel.cc @@ -21,7 +21,7 @@ namespace kernel { MS_REG_GPU_KERNEL_TWO( UnsortedSegmentSum, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat64), - UnsortedSegmentSumGpuKernelMod, double, int) + UnsortedSegmentSumGpuKernelMod, double, int32_t) MS_REG_GPU_KERNEL_TWO( UnsortedSegmentSum, @@ -31,7 +31,7 @@ MS_REG_GPU_KERNEL_TWO( MS_REG_GPU_KERNEL_TWO( UnsortedSegmentSum, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), - UnsortedSegmentSumGpuKernelMod, float, int) + UnsortedSegmentSumGpuKernelMod, float, int32_t) MS_REG_GPU_KERNEL_TWO( UnsortedSegmentSum, @@ -41,7 +41,7 @@ MS_REG_GPU_KERNEL_TWO( MS_REG_GPU_KERNEL_TWO( UnsortedSegmentSum, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat16), - UnsortedSegmentSumGpuKernelMod, half, int) + UnsortedSegmentSumGpuKernelMod, half, int32_t) MS_REG_GPU_KERNEL_TWO( UnsortedSegmentSum, @@ -51,29 +51,78 @@ MS_REG_GPU_KERNEL_TWO( MS_REG_GPU_KERNEL_TWO( UnsortedSegmentSum, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), - UnsortedSegmentSumGpuKernelMod, int, int) + UnsortedSegmentSumGpuKernelMod, int32_t, int32_t) MS_REG_GPU_KERNEL_TWO( UnsortedSegmentSum, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32), UnsortedSegmentSumGpuKernelMod, int, int64_t) - -// Re-registration with 3 inputs - dynamic shape mode - sets of Int64/Int32 num segments types +// Re-registration with 3 inputs - dynamic shape mode +// Int32 +MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, + KernelAttr() + .AddInputAttr(kNumberTypeFloat64) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat64), + UnsortedSegmentSumGpuKernelMod, double, int32_t) +MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, + KernelAttr() + .AddInputAttr(kNumberTypeFloat64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat64), + UnsortedSegmentSumGpuKernelMod, double, int64_t) +MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32), + UnsortedSegmentSumGpuKernelMod, float, int32_t) +MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32), + UnsortedSegmentSumGpuKernelMod, float, int64_t) +MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat16), + UnsortedSegmentSumGpuKernelMod, half, int32_t) +MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat16), + UnsortedSegmentSumGpuKernelMod, half, int64_t) +MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + UnsortedSegmentSumGpuKernelMod, int32_t, int32_t) +MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + UnsortedSegmentSumGpuKernelMod, int32_t, int64_t) +// Int64 MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, KernelAttr() .AddInputAttr(kNumberTypeFloat64) .AddInputAttr(kNumberTypeInt32) .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeFloat64), - UnsortedSegmentSumGpuKernelMod, double, int) -MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, - KernelAttr() - .AddInputAttr(kNumberTypeFloat64) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt32) - .AddOutputAttr(kNumberTypeFloat64), - UnsortedSegmentSumGpuKernelMod, double, int) - + UnsortedSegmentSumGpuKernelMod, double, int32_t) MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, KernelAttr() .AddInputAttr(kNumberTypeFloat64) @@ -81,29 +130,13 @@ MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeFloat64), UnsortedSegmentSumGpuKernelMod, double, int64_t) -MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, - KernelAttr() - .AddInputAttr(kNumberTypeFloat64) - .AddInputAttr(kNumberTypeInt64) - .AddInputAttr(kNumberTypeInt32) - .AddOutputAttr(kNumberTypeFloat64), - UnsortedSegmentSumGpuKernelMod, double, int64_t) - MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, KernelAttr() .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeInt32) .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeFloat32), - UnsortedSegmentSumGpuKernelMod, float, int) -MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, - KernelAttr() - .AddInputAttr(kNumberTypeFloat32) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt32) - .AddOutputAttr(kNumberTypeFloat32), - UnsortedSegmentSumGpuKernelMod, float, int) - + UnsortedSegmentSumGpuKernelMod, float, int32_t) MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, KernelAttr() .AddInputAttr(kNumberTypeFloat32) @@ -111,29 +144,13 @@ MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeFloat32), UnsortedSegmentSumGpuKernelMod, float, int64_t) -MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, - KernelAttr() - .AddInputAttr(kNumberTypeFloat32) - .AddInputAttr(kNumberTypeInt64) - .AddInputAttr(kNumberTypeInt32) - .AddOutputAttr(kNumberTypeFloat32), - UnsortedSegmentSumGpuKernelMod, float, int64_t) - MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, KernelAttr() .AddInputAttr(kNumberTypeFloat16) .AddInputAttr(kNumberTypeInt32) .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeFloat16), - UnsortedSegmentSumGpuKernelMod, half, int) -MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, - KernelAttr() - .AddInputAttr(kNumberTypeFloat16) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt32) - .AddOutputAttr(kNumberTypeFloat16), - UnsortedSegmentSumGpuKernelMod, half, int) - + UnsortedSegmentSumGpuKernelMod, half, int32_t) MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, KernelAttr() .AddInputAttr(kNumberTypeFloat16) @@ -141,42 +158,19 @@ MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeFloat16), UnsortedSegmentSumGpuKernelMod, half, int64_t) -MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, - KernelAttr() - .AddInputAttr(kNumberTypeFloat16) - .AddInputAttr(kNumberTypeInt64) - .AddInputAttr(kNumberTypeInt32) - .AddOutputAttr(kNumberTypeFloat16), - UnsortedSegmentSumGpuKernelMod, half, int64_t) - MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, KernelAttr() .AddInputAttr(kNumberTypeInt32) .AddInputAttr(kNumberTypeInt32) .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeInt32), - UnsortedSegmentSumGpuKernelMod, int, int) -MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, - KernelAttr() - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt32) - .AddOutputAttr(kNumberTypeInt32), - UnsortedSegmentSumGpuKernelMod, int, int) - + UnsortedSegmentSumGpuKernelMod, int32_t, int32_t) MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, KernelAttr() .AddInputAttr(kNumberTypeInt32) .AddInputAttr(kNumberTypeInt64) .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeInt32), - UnsortedSegmentSumGpuKernelMod, int, int64_t) -MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum, - KernelAttr() - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt64) - .AddInputAttr(kNumberTypeInt32) - .AddOutputAttr(kNumberTypeInt32), - UnsortedSegmentSumGpuKernelMod, int, int64_t) + UnsortedSegmentSumGpuKernelMod, int32_t, int64_t) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_gpu_kernel.cc index cd31f56742b..24e3cc54a62 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_gpu_kernel.cc @@ -26,5 +26,19 @@ MS_REG_GPU_KERNEL_ONE( Conv2D, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), Conv2dFwdGpuKernelMod, half) +MS_REG_GPU_KERNEL_ONE(Conv2D, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeFloat32), + Conv2dFwdGpuKernelMod, float) +MS_REG_GPU_KERNEL_ONE(Conv2D, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeFloat16), + Conv2dFwdGpuKernelMod, half) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_filter_gpu_kernel.h index d8767294e9f..b3c27ac9ae4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_filter_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_filter_gpu_kernel.h @@ -228,6 +228,41 @@ class ConvGradFilterBkwGpuKernelMod : public NativeGpuKernelMod { "cudnnDestroyTensorDescriptor failed"); } + void ResetResource() noexcept override { + cudnn_handle_ = nullptr; + dw_desc_ = nullptr; + conv_desc_ = nullptr; + dy_desc_ = nullptr; + x_desc_ = nullptr; + padded_descriptor_ = nullptr; + algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0; + pad_mode_ = ""; + data_format_ = kOpFormat_NCHW; + format_attr_ = kOpFormat_NCHW; + cudnn_data_type_ = CUDNN_DATA_FLOAT; + compute_format_ = CUDNN_TENSOR_NCHW; + old_height_ = 0; + old_width_ = 0; + pad_height_ = 0; + pad_width_ = 0; + pad_top_ = 0; + pad_left_ = 0; + n_ = 0; + c_ = 0; + group_ = 1; + is_null_input_ = false; + kernel_name_ = "Conv2dGradFilter"; + input_size_ = 0; + dy_size_ = 0; + output_size_ = 0; + padded_size_ = 0; + workspace_size_ = 0; + use_pad_ = 0; + input_size_list_.clear(); + output_size_list_.clear(); + workspace_size_list_.clear(); + } + protected: void InitResource() override { cudnn_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_input_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_input_gpu_kernel.cc index b2e207490d4..7152c2f624d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_input_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_input_gpu_kernel.cc @@ -26,5 +26,19 @@ MS_REG_GPU_KERNEL_ONE( Conv2DBackpropInput, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), ConvGradInputBkwGpuKernelMod, half) +MS_REG_GPU_KERNEL_TWO(Conv2DBackpropInput, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeFloat32), + ConvGradInputBkwGpuKernelMod, float, int64_t) +MS_REG_GPU_KERNEL_TWO(Conv2DBackpropInput, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeFloat16), + ConvGradInputBkwGpuKernelMod, half, int64_t) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_input_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_input_gpu_kernel.h index 50bb70253a2..363301a306f 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_input_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv2d_grad_input_gpu_kernel.h @@ -47,8 +47,10 @@ constexpr size_t kWidth2DStrideIndex = 1; constexpr size_t k2DDilationSize = 4; constexpr size_t kHeight2DDilationIndex = 2; constexpr size_t kWidth2DDilationIndex = 3; +constexpr auto StaticInput = 2; +constexpr auto DynamicInput = 3; -template +template class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod { public: ConvGradInputBkwGpuKernelMod() @@ -129,21 +131,21 @@ class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod { bool Init(const CNodePtr &kernel_node) override { kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - kernel_node_ = kernel_node; InitResource(); (void)CheckParam(kernel_node); - cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); - data_format_ = AnfAlgo::GetInputFormat(kernel_node, 0); - auto format_attr = GetAttr(kernel_node, "format"); - if (format_attr == kOpFormat_NHWC) { - data_format_ = kOpFormat_NHWC; + if (is_dynamic_attr_ && !get_dynamic_attr_value_) { + return true; } + auto dy_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); auto filter_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); if (CheckNull(dy_shape, filter_shape)) { return true; } - + auto format_attr = GetAttr(kernel_node, "format"); + if (format_attr == kOpFormat_NHWC) { + data_format_ = kOpFormat_NHWC; + } std::vector input_shape; GetInputShape(kernel_node, &input_shape); if (data_format_ == kOpFormat_NHWC) { @@ -155,7 +157,6 @@ class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod { CheckTensorSize({input_shape, dy_shape, filter_shape}); SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format_); Set4DDesc(dy_shape, input_shape, filter_shape); - group_ = static_cast(GetAttr(kernel_node, "group")); CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetConvolutionGroupCount(conv_desc_, group_), "cudnnSetConvGroupCount failed"); @@ -243,6 +244,44 @@ class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod { "cudnnDestroyTensorDescriptor failed"); } + void ResetResource() noexcept override { + MS_LOG(ERROR) << "czrrr conv2d_grad_input ResetResource"; + cudnn_handle_ = nullptr; + w_desc_ = nullptr; + conv_desc_ = nullptr; + dy_desc_ = nullptr; + dx_desc_ = nullptr; + padded_descriptor_ = nullptr; + algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0; + pad_mode_ = ""; + data_format_ = kOpFormat_NCHW; + cudnn_data_type_ = CUDNN_DATA_FLOAT; + compute_format_ = CUDNN_TENSOR_NCHW; + old_height_ = 0; + old_width_ = 0; + pad_height_ = 0; + pad_width_ = 0; + pad_top_ = 0; + pad_left_ = 0; + n_ = 0; + c_ = 0; + stride_.clear(); + dilation_.clear(); + group_ = 1; + is_null_input_ = false; + kernel_name_ = "Conv2dGradInput"; + dy_size_ = 0; + w_size_ = 0; + output_size_ = 0; + padded_size_ = 0; + workspace_size_ = 0; + use_pad_ = 0; + beta_ = 0; + input_size_list_.clear(); + output_size_list_.clear(); + workspace_size_list_.clear(); + } + protected: void InitResource() override { cudnn_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle(); @@ -293,14 +332,28 @@ class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod { private: void CheckParam(const CNodePtr &kernel_node) { + kernel_node_ = kernel_node; size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node); - if (input_num != 2) { - MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num; + if (input_num != StaticInput && input_num != DynamicInput) { + MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2 or 3, but got " << input_num; + } + if (input_num == DynamicInput) { + is_dynamic_attr_ = true; + } + if (GetDynamicAttrIntValue(kernel_node, kShapeIndex_, &input_shape_)) { + get_dynamic_attr_value_ = true; + } + if (is_dynamic_attr_ && !get_dynamic_attr_value_) { + input_size_list_.push_back(0); + input_size_list_.push_back(0); + output_size_list_.push_back(0); } size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num; } + cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))); + data_format_ = AnfAlgo::GetInputFormat(kernel_node, 0); } void SelectAlgorithm(cudnnTensorDescriptor_t dx_desc_real) { constexpr int requested_algo_count = 1; @@ -418,6 +471,10 @@ class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod { size_t workspace_size_; bool use_pad_; float beta_; + bool is_dynamic_attr_{false}; + bool get_dynamic_attr_value_{false}; + std::vector input_shape_; + static constexpr size_t kShapeIndex_{2}; }; } // namespace kernel } // namespace mindspore