From 09b53bb7ed99000335c5b0bb8d013cfcdab58516 Mon Sep 17 00:00:00 2001 From: looop5 Date: Thu, 6 Oct 2022 14:28:50 +0800 Subject: [PATCH] refine some kernels to support dynamic shape --- .../cpu/kernel/concat_offset_cpu_kernel.cc | 52 +-- .../cpu/kernel/concat_offset_cpu_kernel.h | 2 +- .../kernel/eigen/cholesky_solve_cpu_kernel.cc | 6 +- .../extract_volume_patches_cpu_kernel.cc | 4 +- .../mkldnn/conv_grad_filter_cpu_kernel.cc | 4 +- .../mkldnn/conv_grad_input_cpu_kernel.cc | 4 +- .../device/cpu/kernel/one_hot_cpu_kernel.cc | 298 +++++++++++++++++- .../device/cpu/kernel/one_hot_cpu_kernel.h | 2 + .../sparse_tensor_dense_add_cpu_kernel.cc | 15 +- .../sparse_tensor_dense_matmul_cpu_kernel.cc | 14 +- .../kernel/arrays/dynamic_shape_gpu_kernel.h | 2 +- .../kernel/math/cholesky_solve_gpu_kernel.h | 9 + .../device/gpu/kernel/nn/conv3d_gpu_kernel.cc | 8 +- .../st/ops/dynamic_shape/grad/test_addcmul.py | 4 +- .../dynamic_shape/grad/test_batch_matmul.py | 4 +- .../grad/test_conv3d_transpose.py | 4 +- .../ops/dynamic_shape/grad/test_divnonan.py | 4 +- tests/st/ops/dynamic_shape/grad/test_sub.py | 4 +- tests/st/ops/dynamic_shape/grad/test_topk.py | 4 +- tests/st/ops/dynamic_shape/grad/test_trace.py | 4 +- 20 files changed, 385 insertions(+), 63 deletions(-) diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/concat_offset_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/concat_offset_cpu_kernel.cc index c265604ab9d..e4a19bdcaeb 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/concat_offset_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/concat_offset_cpu_kernel.cc @@ -17,6 +17,7 @@ #include "plugin/device/cpu/kernel/concat_offset_cpu_kernel.h" #include #include +#include #include "plugin/device/cpu/hal/device/cpu_device_address.h" namespace mindspore { @@ -29,19 +30,7 @@ void ConcatOffsetCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { cnode_ptr_ = kernel_node; MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); - - int64_t axis = common::AnfAlgo::GetNodeAttr(kernel_node, AXIS); - auto input_1_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - if (axis < 0) { - axis_ = LongToSize(axis + input_1_shape.size()); - } else { - axis_ = LongToSize(axis); - } - if (axis_ >= input_1_shape.size()) { - MS_LOG(EXCEPTION) << "For '" << kernel_name_ - << "', the 'axis' must be less than the dimension of 'input_x', but got 'axis': " << axis_ - << ", and the dimension of 'input_x': " << input_1_shape.size(); - } + axis_ = common::AnfAlgo::GetNodeAttr(kernel_node, AXIS); auto kernel_attr = GetKernelAttrFromNode(kernel_node); auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport()); @@ -61,19 +50,38 @@ bool ConcatOffsetCpuKernelMod::LaunchKernel(const std::vector(outputs[0]->addr); size_t input_num = common::AnfAlgo::GetInputTensorNum(node_); + if (input_num == 0) { + MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", input tensors can not be empty"; + } + // check input shapes + std::vector input_shapes; + for (size_t i = 0; i < input_num; i++) { + ShapeVector input_shape_i = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, i); + input_shapes.push_back(input_shape_i); + if (input_shape_i.size() != input_shapes[0].size()) { + MS_LOG(EXCEPTION) << "For '" << kernel_name_ + << "', input tensors shape's rank must be equal, but got input[0] shape's rank = " + << input_shapes[0].size() << ", input[" << i << "] shape's rank = " << input_shape_i.size(); + } + } + // check axis + auto x_rank = SizeToLong(input_shapes[0].size()); + if (axis_ < -x_rank || axis_ >= x_rank) { + MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", 'axis' must be in range [-" << x_rank << ", " << x_rank + << "), but got " << axis_; + } + if (axis_ < 0) { + axis_ += x_rank; + } + auto axis = LongToSize(axis_); + ShapeVector offset{0}; - auto all_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, 0)[axis_]; + auto all_shape = input_shapes[0][axis]; // cal offset for (size_t i = 1; i < input_num; i++) { - auto input_shape_i = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, i); - if (axis_ >= input_shape_i.size()) { - MS_LOG(EXCEPTION) << "For '" << kernel_name_ - << "', the 'axis' must be less than the dimension of input, but got 'axis': " << axis_ - << ", and the dimension of the " << i << "'th input: " << input_shape_i.size(); - } offset.emplace_back(all_shape); - all_shape += input_shape_i[axis_]; + all_shape += input_shapes[i][axis]; } auto output_shape = common::AnfAlgo::GetOutputInferShape(node_, 0); if (output_shape.size() != kConcatOffsetOutputShapeSize) { @@ -90,7 +98,7 @@ bool ConcatOffsetCpuKernelMod::LaunchKernel(const std::vector &)>; static std::vector> func_list_; ConcatOffsetFunc kernel_func_; - size_t axis_{0}; + int64_t axis_{0}; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/cholesky_solve_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/cholesky_solve_cpu_kernel.cc index 22ccc477b35..3f59437a1d4 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/cholesky_solve_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/cholesky_solve_cpu_kernel.cc @@ -33,9 +33,13 @@ constexpr size_t kCholeskySolveOutputNum = 1; void CholeskySolveCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); + auto shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex0); + if (IsDynamic(shape)) { + return; + } kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, kInputIndex0); - std::vector x1_shape = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex0)); + std::vector x1_shape = Convert2SizeT(shape); size_t rank = x1_shape.size(); if (rank == kDefalutRank) { dim = x1_shape[rank - kRowIndex]; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/extract_volume_patches_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/extract_volume_patches_cpu_kernel.cc index 2bffa698e70..021c421f7cc 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/extract_volume_patches_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/eigen/extract_volume_patches_cpu_kernel.cc @@ -49,8 +49,6 @@ bool ExtractVolumePatchesKernelMod::Init(const BaseOperatorPtr &base_operator, kernel_size_ = kernel_ptr->get_kernel_size(); strides_ = kernel_ptr->get_strides(); padding_ = kernel_ptr->get_padding(); - input_shape_ = inputs[0]->GetShapeVector(); - output_shape_ = outputs[0]->GetShapeVector(); if (!MatchKernelFunc(base_operator, inputs, outputs)) { return false; } @@ -65,6 +63,8 @@ int ExtractVolumePatchesKernelMod::Resize(const BaseOperatorPtr &base_operator, if (ret != 0) { return ret; } + input_shape_ = inputs[0]->GetShapeVector(); + output_shape_ = outputs[0]->GetShapeVector(); return static_cast(KRET_OK); } diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_grad_filter_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_grad_filter_cpu_kernel.cc index f89a7e6c1d8..aeba3b3530d 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_grad_filter_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_grad_filter_cpu_kernel.cc @@ -135,9 +135,11 @@ std::vector ConvGradFilterCpuKernelMod::GetOpSupport() { .AddInputAttr(kNumberTypeInt32) .AddOutputAttr(kNumberTypeFloat32)}}, {kConv3DBackpropFilter, - {KernelAttr() + {KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + KernelAttr() .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeFloat32)}}}; auto iter = support_list_map.find(kernel_type_); if (iter == support_list_map.end()) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_grad_input_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_grad_input_cpu_kernel.cc index ecf72da54d4..41d15078a00 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_grad_input_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/mkldnn/conv_grad_input_cpu_kernel.cc @@ -130,9 +130,11 @@ std::vector ConvGradInputCpuKernelMod::GetOpSupport() { .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeFloat32)}}, {kConv3DBackpropInput, - {KernelAttr() + {KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + KernelAttr() .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeFloat32)}}}; auto iter = support_list_map.find(kernel_type_); if (iter == support_list_map.end()) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/one_hot_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/one_hot_cpu_kernel.cc index 19e11a2da84..c2ccf6da36a 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/one_hot_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/one_hot_cpu_kernel.cc @@ -24,6 +24,7 @@ namespace mindspore { namespace kernel { namespace { constexpr size_t kOneHotInputsNum = 3; +constexpr size_t kOneHotDynamicInputsNum = 4; constexpr size_t kOneHotOutputsNum = 1; #define INPUT_COMPUTE_CASE(DTYPE, TYPE, ODTYPE, INPUTS, OUTPUTS) \ case (DTYPE): { \ @@ -62,13 +63,21 @@ constexpr size_t kOneHotOutputsNum = 1; } } // namespace +inline void check_input_num(size_t input_num, const std::string &kernel_name) { + if (input_num != kOneHotInputsNum && input_num != kOneHotDynamicInputsNum) { + MS_LOG_EXCEPTION << "For " << kernel_name << ", input num must be " << kOneHotInputsNum << " or " + << kOneHotDynamicInputsNum << ", but got " << input_num; + } +} + bool OneHotCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector &inputs, const std::vector &outputs) { - constexpr size_t input_num = 3; constexpr size_t output_num = 1; kernel_name_ = base_operator->GetPrim()->name(); - CHECK_KERNEL_INPUTS_NUM(inputs.size(), input_num, kernel_name_); + auto input_size = inputs.size(); + check_input_num(input_size, kernel_name_); CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), output_num, kernel_name_); + SetOnValueInputIndex(input_size); input_dtype_ = inputs[kIndex0]->GetDtype(); output_dtype_ = outputs[kIndex0]->GetDtype(); @@ -112,7 +121,7 @@ int OneHotCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std:: bool OneHotCpuKernelMod::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs) { - CHECK_KERNEL_INPUTS_NUM(inputs.size(), kOneHotInputsNum, kernel_name_); + check_input_num(inputs.size(), kernel_name_); CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOneHotOutputsNum, kernel_name_); switch (input_dtype_) { INPUT_COMPUTE_CASE(kNumberTypeUInt8, uint8_t, output_dtype_, inputs, outputs); @@ -126,11 +135,18 @@ bool OneHotCpuKernelMod::Launch(const std::vector &inputs, c return true; } +void OneHotCpuKernelMod::SetOnValueInputIndex(size_t input_num) { + constexpr size_t kDynamicOnValueInputIndex = 2; + if (input_num == kOneHotDynamicInputsNum) { + on_value_input_index_ = kDynamicOnValueInputIndex; + } +} + template void OneHotCpuKernelMod::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { const auto *indices = reinterpret_cast(inputs[0]->addr); - auto on_value = reinterpret_cast(inputs[1]->addr)[0]; - auto off_value = reinterpret_cast(inputs[2]->addr)[0]; + auto on_value = reinterpret_cast(inputs[on_value_input_index_]->addr)[0]; + auto off_value = reinterpret_cast(inputs[on_value_input_index_ + 1]->addr)[0]; auto *output = reinterpret_cast(outputs[0]->addr); size_t elem_num = inputs[0]->size / sizeof(ID); auto task = [this, &indices, &on_value, &off_value, &output](size_t start, size_t end) { @@ -372,11 +388,281 @@ std::vector OneHotCpuKernelMod::support_list_ = {KernelAttr() .AddInputAttr(kNumberTypeComplex128) .AddOutputAttr(kNumberTypeComplex128), KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kObjectTypeString) + .AddInputAttr(kObjectTypeString) + .AddOutputAttr(kObjectTypeString), + // depth is a input with int64 type: + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeUInt8) + .AddOutputAttr(kNumberTypeUInt8), + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt16) + .AddInputAttr(kNumberTypeUInt16) + .AddOutputAttr(kNumberTypeUInt16), + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeUInt32) + .AddOutputAttr(kNumberTypeUInt32), + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt64) + .AddInputAttr(kNumberTypeUInt64) + .AddOutputAttr(kNumberTypeUInt64), + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeInt8) + .AddOutputAttr(kNumberTypeInt8), + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt16) + .AddInputAttr(kNumberTypeInt16) + .AddOutputAttr(kNumberTypeInt16), + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeInt64), + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat64) + .AddInputAttr(kNumberTypeFloat64) + .AddOutputAttr(kNumberTypeFloat64), + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeBool) + .AddInputAttr(kNumberTypeBool) + .AddOutputAttr(kNumberTypeBool), + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeComplex64) + .AddInputAttr(kNumberTypeComplex64) + .AddOutputAttr(kNumberTypeComplex64), + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeComplex128) + .AddInputAttr(kNumberTypeComplex128) + .AddOutputAttr(kNumberTypeComplex128), + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kObjectTypeString) + .AddInputAttr(kObjectTypeString) + .AddOutputAttr(kObjectTypeString), + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeUInt8) + .AddOutputAttr(kNumberTypeUInt8), + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt16) + .AddInputAttr(kNumberTypeUInt16) + .AddOutputAttr(kNumberTypeUInt16), + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeUInt32) + .AddOutputAttr(kNumberTypeUInt32), + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt64) + .AddInputAttr(kNumberTypeUInt64) + .AddOutputAttr(kNumberTypeUInt64), + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeInt8) + .AddOutputAttr(kNumberTypeInt8), + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt16) + .AddInputAttr(kNumberTypeInt16) + .AddOutputAttr(kNumberTypeInt16), + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeInt64), + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat64) + .AddInputAttr(kNumberTypeFloat64) + .AddOutputAttr(kNumberTypeFloat64), + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeBool) + .AddInputAttr(kNumberTypeBool) + .AddOutputAttr(kNumberTypeBool), + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeComplex64) + .AddInputAttr(kNumberTypeComplex64) + .AddOutputAttr(kNumberTypeComplex64), + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeComplex128) + .AddInputAttr(kNumberTypeComplex128) + .AddOutputAttr(kNumberTypeComplex128), + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kObjectTypeString) + .AddInputAttr(kObjectTypeString) + .AddOutputAttr(kObjectTypeString), + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeUInt8) + .AddOutputAttr(kNumberTypeUInt8), + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt16) + .AddInputAttr(kNumberTypeUInt16) + .AddOutputAttr(kNumberTypeUInt16), + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt32) + .AddInputAttr(kNumberTypeUInt32) + .AddOutputAttr(kNumberTypeUInt32), + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt64) + .AddInputAttr(kNumberTypeUInt64) + .AddOutputAttr(kNumberTypeUInt64), + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeInt8) + .AddOutputAttr(kNumberTypeInt8), + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt16) + .AddInputAttr(kNumberTypeInt16) + .AddOutputAttr(kNumberTypeInt16), + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeInt64), + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat64) + .AddInputAttr(kNumberTypeFloat64) + .AddOutputAttr(kNumberTypeFloat64), + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeBool) + .AddInputAttr(kNumberTypeBool) + .AddOutputAttr(kNumberTypeBool), + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeComplex64) + .AddInputAttr(kNumberTypeComplex64) + .AddOutputAttr(kNumberTypeComplex64), + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeComplex128) + .AddInputAttr(kNumberTypeComplex128) + .AddOutputAttr(kNumberTypeComplex128), + KernelAttr() + .AddInputAttr(kNumberTypeInt64) .AddInputAttr(kNumberTypeInt64) .AddInputAttr(kObjectTypeString) .AddInputAttr(kObjectTypeString) .AddOutputAttr(kObjectTypeString)}; - std::vector OneHotCpuKernelMod::GetOpSupport() { return support_list_; } MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, OneHot, OneHotCpuKernelMod); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/one_hot_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/one_hot_cpu_kernel.h index 5272ae392c6..a01c917bb12 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/one_hot_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/one_hot_cpu_kernel.h @@ -45,6 +45,7 @@ class OneHotCpuKernelMod : public NativeCpuKernelMod { private: template void LaunchKernel(const std::vector &inputs, const std::vector &outputs); + void SetOnValueInputIndex(size_t input_num); TypeId input_dtype_{kTypeUnknown}; TypeId output_dtype_{kTypeUnknown}; @@ -52,6 +53,7 @@ class OneHotCpuKernelMod : public NativeCpuKernelMod { size_t stride_{0}; size_t axis_{0}; static std::vector support_list_; + size_t on_value_input_index_{1}; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_tensor_dense_add_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_tensor_dense_add_cpu_kernel.cc index 481d4452a65..ff931d42104 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_tensor_dense_add_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_tensor_dense_add_cpu_kernel.cc @@ -36,24 +36,29 @@ void SparseTensorDenseAddCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex0); + auto values_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex1); + auto shape_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex2); + auto x2_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex3); + if (AnfAlgo::IsShapesDynamic({values_shape, indices_shape, shape_shape, x2_shape})) { + return; + } + if (indices_shape.size() != kIndicesShapeSize) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it requires 'x1_indices' must be a " << kIndicesShapeSize << "-D Tensor, but got " << indices_shape.size() << "-D"; } - auto values_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex1); if (values_shape.size() != 1 || values_shape[0] != indices_shape[0]) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it requires 'x1_values' must be a 1-D Tensor and the first dimension length " << "must be equal to the first dimension length of 'indices', but got 'x1_values' shape: " << Vector2Str(values_shape) << " and 'x1_indices' shape: " << Vector2Str(indices_shape); } - auto shape_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex2); - x2_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex3); - size_t x1_rank = static_cast(shape_shape_[0]); + x2_shape_ = x2_shape; + size_t x1_rank = static_cast(shape_shape[0]); size_t x2_rank = x2_shape_.size(); if (x1_rank != x2_rank) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ - << "', x1 and x2 must have same ranks, but got 'x1' shape: " << Vector2Str(shape_shape_) + << "', x1 and x2 must have same ranks, but got 'x1' shape: " << Vector2Str(shape_shape) << "and 'x2' shape: " << Vector2Str(x2_shape_); } values_size_ = static_cast(values_shape[0]); diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_tensor_dense_matmul_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_tensor_dense_matmul_cpu_kernel.cc index 6b0f1948b8e..d5bb8ce3ae0 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_tensor_dense_matmul_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_tensor_dense_matmul_cpu_kernel.cc @@ -38,25 +38,27 @@ void SparseTensorDenseMatmulCpuKernelMod::InitKernel(const CNodePtr &kernel_node adj_st_ = common::AnfAlgo::GetNodeAttr(kernel_node, ADJ_ST); adj_dt_ = common::AnfAlgo::GetNodeAttr(kernel_node, ADJ_dT); auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, INDICES); + auto values_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, VALUES); + auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto b_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, static_cast(DENSE)); + if (AnfAlgo::IsShapesDynamic({values_shape, indices_shape, output_shape, b_shape})) { + return; + } if (indices_shape.size() != kIndicesSizeNum && indices_shape[1] != kIndices2rdDimNum) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it requires 'indices' must be a 2-D Tensor and the second dimension length " "must be 2, but got 'indices' shape: " << Vector2Str(indices_shape); } - auto values_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, VALUES); - if (AnfAlgo::IsShapesDynamic({values_shape, indices_shape})) { - return; - } if (values_shape.size() != 1 || values_shape[0] != indices_shape[0]) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it requires 'values' must be a 1-D Tensor and the first dimension length " " must be equal to the first dimension length of 'indices', but got 'values' shape: " << Vector2Str(values_shape) << " and 'indices' shape: " << Vector2Str(indices_shape); } - output_shape_ = Convert2SizeT(common::AnfAlgo::GetOutputInferShape(kernel_node, 0)); + output_shape_ = Convert2SizeT(output_shape); values_size_ = LongToSize(values_shape[0]); - b_shape_ = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, static_cast(DENSE))); + b_shape_ = Convert2SizeT(b_shape); if (b_shape_.size() != kSparseTensorDenseMatmulDenseShapeSize) { MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of 'dense' must be " << kSparseTensorDenseMatmulDenseShapeSize << "-D, but got " << b_shape_.size() << "-D"; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/dynamic_shape_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/dynamic_shape_gpu_kernel.h index bf0a06789c0..c58471deb64 100755 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/dynamic_shape_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/dynamic_shape_gpu_kernel.h @@ -34,7 +34,7 @@ class TensorShapeGpuKernelMod : public DeprecatedNativeGpuKernelMod { bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) override { - if (is_null_input_) { + if (is_null_input_ || prev_node_output_shape_.empty()) { return true; } S *output_device_address = GetDeviceAddress(outputs, 0); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/cholesky_solve_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/cholesky_solve_gpu_kernel.h index 59f507ea14c..b071c1a2030 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/cholesky_solve_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/cholesky_solve_gpu_kernel.h @@ -121,6 +121,15 @@ class CholeskySolveGpuKernelMod : public DeprecatedNativeGpuKernelMod { return true; } + void ResetResource() noexcept override { + is_null_input_ = false; + input_size_list_.clear(); + workspace_size_list_.clear(); + output_size_list_.clear(); + h_b_array_.clear(); + h_a_array_.clear(); + } + protected: void InitSizeLists() override { size_t input_size = outer_batch_ * m_ * lda_ * unit_size_; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_gpu_kernel.cc index 730a6637cdf..5991650ca56 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/conv3d_gpu_kernel.cc @@ -137,9 +137,11 @@ int Conv3dGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std:: } pad_mode_ = kernel_ptr->get_pad_mode(); SetPad(pad_list); - std::vector stride_me = kernel_ptr->get_stride(); - std::vector dilation_me = kernel_ptr->get_dilation(); - SetStrideAndDilation(stride_me, dilation_me); + if (!IsDynamicRank(in_shape) && !IsDynamicRank(filter_shape)) { + std::vector stride_me = kernel_ptr->get_stride(); + std::vector dilation_me = kernel_ptr->get_dilation(); + SetStrideAndDilation(stride_me, dilation_me); + } auto input_descriptor_real = GetInputDescReal(pad_list); if (cudnn_data_type_ == CUDNN_DATA_HALF) { CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE(cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH), diff --git a/tests/st/ops/dynamic_shape/grad/test_addcmul.py b/tests/st/ops/dynamic_shape/grad/test_addcmul.py index 004fcbb0181..46a9e5d4e25 100644 --- a/tests/st/ops/dynamic_shape/grad/test_addcmul.py +++ b/tests/st/ops/dynamic_shape/grad/test_addcmul.py @@ -19,8 +19,6 @@ import mindspore as ms from mindspore import ops, nn, context, Tensor from .test_grad_of_dynamic import TestDynamicGrad -context.set_context(mode=context.PYNATIVE_MODE) - class NetAddcmul(nn.Cell): def __init__(self): @@ -49,6 +47,7 @@ def test_addcmul_dyn_shape(): Description: Test case of dynamic shape for Addcmul grad operator. Expectation: success. """ + context.set_context(mode=context.PYNATIVE_MODE) addcmul_test(False) @@ -61,4 +60,5 @@ def test_addcmul_dyn_rank(): Description: Test case of dynamic rank for Addcmul grad operator. Expectation: success. """ + context.set_context(mode=context.PYNATIVE_MODE) addcmul_test(True) diff --git a/tests/st/ops/dynamic_shape/grad/test_batch_matmul.py b/tests/st/ops/dynamic_shape/grad/test_batch_matmul.py index 6ab15f17054..c687bbe876a 100644 --- a/tests/st/ops/dynamic_shape/grad/test_batch_matmul.py +++ b/tests/st/ops/dynamic_shape/grad/test_batch_matmul.py @@ -19,8 +19,6 @@ import mindspore.ops.operations.math_ops as M from mindspore import nn, context, Tensor from .test_grad_of_dynamic import TestDynamicGrad -context.set_context(mode=context.PYNATIVE_MODE) - class NetBatchMatMul(nn.Cell): def __init__(self): @@ -40,6 +38,7 @@ def test_batch_matmul_dynamic_shape(): Description: Test case of dynamic shape for BatchMatMul grad operator on GPU. Expectation: success. """ + context.set_context(mode=context.PYNATIVE_MODE) test_dynamic = TestDynamicGrad(NetBatchMatMul(), skip_convert_out_ids=[0]) x = Tensor(np.ones(shape=[2, 4, 1, 3]), mindspore.float32) y = Tensor(np.ones(shape=[2, 4, 3, 4]), mindspore.float32) @@ -56,6 +55,7 @@ def test_batch_matmul_dynamic_rank(): Description: Test case of dynamic rank for BatchMatMul grad operator on GPU. Expectation: success. """ + context.set_context(mode=context.PYNATIVE_MODE) test_dynamic = TestDynamicGrad(NetBatchMatMul(), skip_convert_out_ids=[0]) x = Tensor(np.ones(shape=[2, 4, 1, 3]), mindspore.float32) y = Tensor(np.ones(shape=[2, 4, 3, 4]), mindspore.float32) diff --git a/tests/st/ops/dynamic_shape/grad/test_conv3d_transpose.py b/tests/st/ops/dynamic_shape/grad/test_conv3d_transpose.py index 340ad8ea68a..8db9920b3af 100644 --- a/tests/st/ops/dynamic_shape/grad/test_conv3d_transpose.py +++ b/tests/st/ops/dynamic_shape/grad/test_conv3d_transpose.py @@ -98,7 +98,7 @@ def test_gpu_grad_dynamic_rank_2(): @pytest.mark.skip(reason="CPU无Conv3DBackpropFilter, Conv3DBackpropInput, kernel实现") -@pytest.mark.level1 +@pytest.mark.level2 @pytest.mark.platform_x86_cpu @pytest.mark.env_onecard def test_cpu_grad_dynamic_shape(): @@ -112,7 +112,7 @@ def test_cpu_grad_dynamic_shape(): @pytest.mark.skip(reason="CPU无Conv3DBackpropFilter, Conv3DBackpropInput, kernel实现") -@pytest.mark.level1 +@pytest.mark.level2 @pytest.mark.platform_x86_cpu @pytest.mark.env_onecard def test_cpu_grad_dynamic_rank(): diff --git a/tests/st/ops/dynamic_shape/grad/test_divnonan.py b/tests/st/ops/dynamic_shape/grad/test_divnonan.py index a93337ca809..de7d7c7908d 100644 --- a/tests/st/ops/dynamic_shape/grad/test_divnonan.py +++ b/tests/st/ops/dynamic_shape/grad/test_divnonan.py @@ -18,8 +18,6 @@ import pytest from mindspore import ops, nn, context, Tensor from .test_grad_of_dynamic import TestDynamicGrad -context.set_context(mode=context.PYNATIVE_MODE) - class NetDivNoNan(nn.Cell): def __init__(self): @@ -47,6 +45,7 @@ def test_divnonan_dyn_shape(): Description: Test case of dynamic shape for DivNoNan grad operator. Expectation: success. """ + context.set_context(mode=context.PYNATIVE_MODE) divnonan_test(False) @@ -60,4 +59,5 @@ def test_divnonan_dyn_rank(): Description: Test case of dynamic rank for DivNoNan grad operator. Expectation: success. """ + context.set_context(mode=context.PYNATIVE_MODE) divnonan_test(True) diff --git a/tests/st/ops/dynamic_shape/grad/test_sub.py b/tests/st/ops/dynamic_shape/grad/test_sub.py index e4ead44ee6b..7f84ee4b50c 100644 --- a/tests/st/ops/dynamic_shape/grad/test_sub.py +++ b/tests/st/ops/dynamic_shape/grad/test_sub.py @@ -18,8 +18,6 @@ import pytest from mindspore import ops, nn, context, Tensor from .test_grad_of_dynamic import TestDynamicGrad -context.set_context(mode=context.PYNATIVE_MODE) - class NetSub(nn.Cell): def __init__(self): @@ -47,6 +45,7 @@ def test_sub_dyn_shape(): Description: Test case of dynamic shape for Sub grad operator. Expectation: success. """ + context.set_context(mode=context.PYNATIVE_MODE) sub_test(False) @@ -60,4 +59,5 @@ def test_sub_dyn_rank(): Description: Test case of dynamic rank for Sub grad operator. Expectation: success. """ + context.set_context(mode=context.PYNATIVE_MODE) sub_test(True) diff --git a/tests/st/ops/dynamic_shape/grad/test_topk.py b/tests/st/ops/dynamic_shape/grad/test_topk.py index 13551ff3d8a..71432f0b26d 100644 --- a/tests/st/ops/dynamic_shape/grad/test_topk.py +++ b/tests/st/ops/dynamic_shape/grad/test_topk.py @@ -18,8 +18,6 @@ import pytest from mindspore import ops, nn, context, Tensor from .test_grad_of_dynamic import TestDynamicGrad -context.set_context(mode=context.PYNATIVE_MODE) - class NetTopK(nn.Cell): def __init__(self, k): @@ -47,6 +45,7 @@ def test_topk_dyn_shape(): Description: Test case of dynamic shape for TopK grad operator. Expectation: success. """ + context.set_context(mode=context.PYNATIVE_MODE) topk_test(False) @@ -59,4 +58,5 @@ def test_topk_dyn_rank(): Description: Test case of dynamic rank for TopK grad operator. Expectation: success. """ + context.set_context(mode=context.PYNATIVE_MODE) topk_test(True) diff --git a/tests/st/ops/dynamic_shape/grad/test_trace.py b/tests/st/ops/dynamic_shape/grad/test_trace.py index e1215c6469d..32af176d1c4 100644 --- a/tests/st/ops/dynamic_shape/grad/test_trace.py +++ b/tests/st/ops/dynamic_shape/grad/test_trace.py @@ -19,8 +19,6 @@ import mindspore.ops.operations.math_ops as M from mindspore import nn, context, Tensor from .test_grad_of_dynamic import TestDynamicGrad -context.set_context(mode=context.PYNATIVE_MODE) - class NetTrace(nn.Cell): def __init__(self): @@ -41,6 +39,7 @@ def test_trace_dynamic_shape(): Description: Test case of dynamic shape for Trace grad operator on CPU and GPU. Expectation: success. """ + context.set_context(mode=context.PYNATIVE_MODE) test_dynamic = TestDynamicGrad(NetTrace()) x = Tensor(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), mindspore.float32) test_dynamic.test_dynamic_grad_net(x, False) @@ -56,6 +55,7 @@ def test_trace_dynamic_shape_rank(): Description: Test case of dynamic rank for Trace grad operator on CPU and GPU. Expectation: success. """ + context.set_context(mode=context.PYNATIVE_MODE) test_dynamic = TestDynamicGrad(NetTrace()) x = Tensor(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), mindspore.float32) test_dynamic.test_dynamic_grad_net(x, True)