!43308 refine some kernels to support dynamic shape

Merge pull request !43308 from looop5/dyn_shape_commit_split_kernel
This commit is contained in:
i-robot 2022-10-06 09:59:51 +00:00 committed by Gitee
commit 5edf920ac0
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
20 changed files with 385 additions and 63 deletions

View File

@ -17,6 +17,7 @@
#include "plugin/device/cpu/kernel/concat_offset_cpu_kernel.h"
#include <algorithm>
#include <utility>
#include <vector>
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
namespace mindspore {
@ -29,19 +30,7 @@ void ConcatOffsetCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
cnode_ptr_ = kernel_node;
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
int64_t axis = common::AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
auto input_1_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
if (axis < 0) {
axis_ = LongToSize(axis + input_1_shape.size());
} else {
axis_ = LongToSize(axis);
}
if (axis_ >= input_1_shape.size()) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the 'axis' must be less than the dimension of 'input_x', but got 'axis': " << axis_
<< ", and the dimension of 'input_x': " << input_1_shape.size();
}
axis_ = common::AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
auto kernel_attr = GetKernelAttrFromNode(kernel_node);
auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport());
@ -61,19 +50,38 @@ bool ConcatOffsetCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr
}
auto output_addr = reinterpret_cast<int64_t *>(outputs[0]->addr);
size_t input_num = common::AnfAlgo::GetInputTensorNum(node_);
if (input_num == 0) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", input tensors can not be empty";
}
// check input shapes
std::vector<ShapeVector> input_shapes;
for (size_t i = 0; i < input_num; i++) {
ShapeVector input_shape_i = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, i);
input_shapes.push_back(input_shape_i);
if (input_shape_i.size() != input_shapes[0].size()) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', input tensors shape's rank must be equal, but got input[0] shape's rank = "
<< input_shapes[0].size() << ", input[" << i << "] shape's rank = " << input_shape_i.size();
}
}
// check axis
auto x_rank = SizeToLong(input_shapes[0].size());
if (axis_ < -x_rank || axis_ >= x_rank) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", 'axis' must be in range [-" << x_rank << ", " << x_rank
<< "), but got " << axis_;
}
if (axis_ < 0) {
axis_ += x_rank;
}
auto axis = LongToSize(axis_);
ShapeVector offset{0};
auto all_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, 0)[axis_];
auto all_shape = input_shapes[0][axis];
// cal offset
for (size_t i = 1; i < input_num; i++) {
auto input_shape_i = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, i);
if (axis_ >= input_shape_i.size()) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the 'axis' must be less than the dimension of input, but got 'axis': " << axis_
<< ", and the dimension of the " << i << "'th input: " << input_shape_i.size();
}
offset.emplace_back(all_shape);
all_shape += input_shape_i[axis_];
all_shape += input_shapes[i][axis];
}
auto output_shape = common::AnfAlgo::GetOutputInferShape(node_, 0);
if (output_shape.size() != kConcatOffsetOutputShapeSize) {
@ -90,7 +98,7 @@ bool ConcatOffsetCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr
size_t idx = 0;
for (size_t i = 0; i < input_num; ++i) {
for (size_t j = 0; j < rank; ++j) {
if (j == axis_) {
if (j == axis) {
output_addr[idx] = offset[i];
} else {
output_addr[idx] = 0;

View File

@ -46,7 +46,7 @@ class ConcatOffsetCpuKernelMod : public DeprecatedNativeCpuKernelMod {
const std::vector<kernel::AddressPtr> &)>;
static std::vector<std::pair<KernelAttr, ConcatOffsetFunc>> func_list_;
ConcatOffsetFunc kernel_func_;
size_t axis_{0};
int64_t axis_{0};
};
} // namespace kernel
} // namespace mindspore

View File

@ -33,9 +33,13 @@ constexpr size_t kCholeskySolveOutputNum = 1;
void CholeskySolveCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
auto shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex0);
if (IsDynamic(shape)) {
return;
}
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, kInputIndex0);
std::vector<size_t> x1_shape = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex0));
std::vector<size_t> x1_shape = Convert2SizeT(shape);
size_t rank = x1_shape.size();
if (rank == kDefalutRank) {
dim = x1_shape[rank - kRowIndex];

View File

@ -49,8 +49,6 @@ bool ExtractVolumePatchesKernelMod::Init(const BaseOperatorPtr &base_operator,
kernel_size_ = kernel_ptr->get_kernel_size();
strides_ = kernel_ptr->get_strides();
padding_ = kernel_ptr->get_padding();
input_shape_ = inputs[0]->GetShapeVector();
output_shape_ = outputs[0]->GetShapeVector();
if (!MatchKernelFunc(base_operator, inputs, outputs)) {
return false;
}
@ -65,6 +63,8 @@ int ExtractVolumePatchesKernelMod::Resize(const BaseOperatorPtr &base_operator,
if (ret != 0) {
return ret;
}
input_shape_ = inputs[0]->GetShapeVector();
output_shape_ = outputs[0]->GetShapeVector();
return static_cast<int>(KRET_OK);
}

View File

@ -135,9 +135,11 @@ std::vector<KernelAttr> ConvGradFilterCpuKernelMod::GetOpSupport() {
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32)}},
{kConv3DBackpropFilter,
{KernelAttr()
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32)}}};
auto iter = support_list_map.find(kernel_type_);
if (iter == support_list_map.end()) {

View File

@ -130,9 +130,11 @@ std::vector<KernelAttr> ConvGradInputCpuKernelMod::GetOpSupport() {
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32)}},
{kConv3DBackpropInput,
{KernelAttr()
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32)}}};
auto iter = support_list_map.find(kernel_type_);
if (iter == support_list_map.end()) {

View File

@ -24,6 +24,7 @@ namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kOneHotInputsNum = 3;
constexpr size_t kOneHotDynamicInputsNum = 4;
constexpr size_t kOneHotOutputsNum = 1;
#define INPUT_COMPUTE_CASE(DTYPE, TYPE, ODTYPE, INPUTS, OUTPUTS) \
case (DTYPE): { \
@ -62,13 +63,21 @@ constexpr size_t kOneHotOutputsNum = 1;
}
} // namespace
inline void check_input_num(size_t input_num, const std::string &kernel_name) {
if (input_num != kOneHotInputsNum && input_num != kOneHotDynamicInputsNum) {
MS_LOG_EXCEPTION << "For " << kernel_name << ", input num must be " << kOneHotInputsNum << " or "
<< kOneHotDynamicInputsNum << ", but got " << input_num;
}
}
bool OneHotCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs) {
constexpr size_t input_num = 3;
constexpr size_t output_num = 1;
kernel_name_ = base_operator->GetPrim()->name();
CHECK_KERNEL_INPUTS_NUM(inputs.size(), input_num, kernel_name_);
auto input_size = inputs.size();
check_input_num(input_size, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), output_num, kernel_name_);
SetOnValueInputIndex(input_size);
input_dtype_ = inputs[kIndex0]->GetDtype();
output_dtype_ = outputs[kIndex0]->GetDtype();
@ -112,7 +121,7 @@ int OneHotCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::
bool OneHotCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kOneHotInputsNum, kernel_name_);
check_input_num(inputs.size(), kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOneHotOutputsNum, kernel_name_);
switch (input_dtype_) {
INPUT_COMPUTE_CASE(kNumberTypeUInt8, uint8_t, output_dtype_, inputs, outputs);
@ -126,11 +135,18 @@ bool OneHotCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs, c
return true;
}
void OneHotCpuKernelMod::SetOnValueInputIndex(size_t input_num) {
constexpr size_t kDynamicOnValueInputIndex = 2;
if (input_num == kOneHotDynamicInputsNum) {
on_value_input_index_ = kDynamicOnValueInputIndex;
}
}
template <typename ID, typename OD>
void OneHotCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
const auto *indices = reinterpret_cast<ID *>(inputs[0]->addr);
auto on_value = reinterpret_cast<OD *>(inputs[1]->addr)[0];
auto off_value = reinterpret_cast<OD *>(inputs[2]->addr)[0];
auto on_value = reinterpret_cast<OD *>(inputs[on_value_input_index_]->addr)[0];
auto off_value = reinterpret_cast<OD *>(inputs[on_value_input_index_ + 1]->addr)[0];
auto *output = reinterpret_cast<OD *>(outputs[0]->addr);
size_t elem_num = inputs[0]->size / sizeof(ID);
auto task = [this, &indices, &on_value, &off_value, &output](size_t start, size_t end) {
@ -372,11 +388,281 @@ std::vector<KernelAttr> OneHotCpuKernelMod::support_list_ = {KernelAttr()
.AddInputAttr(kNumberTypeComplex128)
.AddOutputAttr(kNumberTypeComplex128),
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kObjectTypeString)
.AddInputAttr(kObjectTypeString)
.AddOutputAttr(kObjectTypeString),
// depth is a input with int64 type:
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeUInt8)
.AddOutputAttr(kNumberTypeUInt8),
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeUInt16)
.AddInputAttr(kNumberTypeUInt16)
.AddOutputAttr(kNumberTypeUInt16),
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeUInt32)
.AddInputAttr(kNumberTypeUInt32)
.AddOutputAttr(kNumberTypeUInt32),
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeUInt64)
.AddInputAttr(kNumberTypeUInt64)
.AddOutputAttr(kNumberTypeUInt64),
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt8)
.AddInputAttr(kNumberTypeInt8)
.AddOutputAttr(kNumberTypeInt8),
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt16)
.AddInputAttr(kNumberTypeInt16)
.AddOutputAttr(kNumberTypeInt16),
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt64),
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16),
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat64)
.AddInputAttr(kNumberTypeFloat64)
.AddOutputAttr(kNumberTypeFloat64),
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeBool)
.AddInputAttr(kNumberTypeBool)
.AddOutputAttr(kNumberTypeBool),
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeComplex64)
.AddInputAttr(kNumberTypeComplex64)
.AddOutputAttr(kNumberTypeComplex64),
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeComplex128)
.AddInputAttr(kNumberTypeComplex128)
.AddOutputAttr(kNumberTypeComplex128),
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kObjectTypeString)
.AddInputAttr(kObjectTypeString)
.AddOutputAttr(kObjectTypeString),
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeUInt8)
.AddOutputAttr(kNumberTypeUInt8),
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeUInt16)
.AddInputAttr(kNumberTypeUInt16)
.AddOutputAttr(kNumberTypeUInt16),
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeUInt32)
.AddInputAttr(kNumberTypeUInt32)
.AddOutputAttr(kNumberTypeUInt32),
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeUInt64)
.AddInputAttr(kNumberTypeUInt64)
.AddOutputAttr(kNumberTypeUInt64),
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt8)
.AddInputAttr(kNumberTypeInt8)
.AddOutputAttr(kNumberTypeInt8),
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt16)
.AddInputAttr(kNumberTypeInt16)
.AddOutputAttr(kNumberTypeInt16),
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt64),
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16),
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat64)
.AddInputAttr(kNumberTypeFloat64)
.AddOutputAttr(kNumberTypeFloat64),
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeBool)
.AddInputAttr(kNumberTypeBool)
.AddOutputAttr(kNumberTypeBool),
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeComplex64)
.AddInputAttr(kNumberTypeComplex64)
.AddOutputAttr(kNumberTypeComplex64),
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeComplex128)
.AddInputAttr(kNumberTypeComplex128)
.AddOutputAttr(kNumberTypeComplex128),
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kObjectTypeString)
.AddInputAttr(kObjectTypeString)
.AddOutputAttr(kObjectTypeString),
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeUInt8)
.AddOutputAttr(kNumberTypeUInt8),
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeUInt16)
.AddInputAttr(kNumberTypeUInt16)
.AddOutputAttr(kNumberTypeUInt16),
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeUInt32)
.AddInputAttr(kNumberTypeUInt32)
.AddOutputAttr(kNumberTypeUInt32),
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeUInt64)
.AddInputAttr(kNumberTypeUInt64)
.AddOutputAttr(kNumberTypeUInt64),
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt8)
.AddInputAttr(kNumberTypeInt8)
.AddOutputAttr(kNumberTypeInt8),
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt16)
.AddInputAttr(kNumberTypeInt16)
.AddOutputAttr(kNumberTypeInt16),
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt64),
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16),
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat64)
.AddInputAttr(kNumberTypeFloat64)
.AddOutputAttr(kNumberTypeFloat64),
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeBool)
.AddInputAttr(kNumberTypeBool)
.AddOutputAttr(kNumberTypeBool),
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeComplex64)
.AddInputAttr(kNumberTypeComplex64)
.AddOutputAttr(kNumberTypeComplex64),
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeComplex128)
.AddInputAttr(kNumberTypeComplex128)
.AddOutputAttr(kNumberTypeComplex128),
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kObjectTypeString)
.AddInputAttr(kObjectTypeString)
.AddOutputAttr(kObjectTypeString)};
std::vector<KernelAttr> OneHotCpuKernelMod::GetOpSupport() { return support_list_; }
MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, OneHot, OneHotCpuKernelMod);

View File

@ -45,6 +45,7 @@ class OneHotCpuKernelMod : public NativeCpuKernelMod {
private:
template <typename ID, typename OD>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
void SetOnValueInputIndex(size_t input_num);
TypeId input_dtype_{kTypeUnknown};
TypeId output_dtype_{kTypeUnknown};
@ -52,6 +53,7 @@ class OneHotCpuKernelMod : public NativeCpuKernelMod {
size_t stride_{0};
size_t axis_{0};
static std::vector<KernelAttr> support_list_;
size_t on_value_input_index_{1};
};
} // namespace kernel
} // namespace mindspore

View File

@ -36,24 +36,29 @@ void SparseTensorDenseAddCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex0);
auto values_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex1);
auto shape_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex2);
auto x2_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex3);
if (AnfAlgo::IsShapesDynamic({values_shape, indices_shape, shape_shape, x2_shape})) {
return;
}
if (indices_shape.size() != kIndicesShapeSize) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it requires 'x1_indices' must be a " << kIndicesShapeSize
<< "-D Tensor, but got " << indices_shape.size() << "-D";
}
auto values_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex1);
if (values_shape.size() != 1 || values_shape[0] != indices_shape[0]) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', it requires 'x1_values' must be a 1-D Tensor and the first dimension length "
<< "must be equal to the first dimension length of 'indices', but got 'x1_values' shape: "
<< Vector2Str(values_shape) << " and 'x1_indices' shape: " << Vector2Str(indices_shape);
}
auto shape_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex2);
x2_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex3);
size_t x1_rank = static_cast<size_t>(shape_shape_[0]);
x2_shape_ = x2_shape;
size_t x1_rank = static_cast<size_t>(shape_shape[0]);
size_t x2_rank = x2_shape_.size();
if (x1_rank != x2_rank) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', x1 and x2 must have same ranks, but got 'x1' shape: " << Vector2Str(shape_shape_)
<< "', x1 and x2 must have same ranks, but got 'x1' shape: " << Vector2Str(shape_shape)
<< "and 'x2' shape: " << Vector2Str(x2_shape_);
}
values_size_ = static_cast<size_t>(values_shape[0]);

View File

@ -38,25 +38,27 @@ void SparseTensorDenseMatmulCpuKernelMod::InitKernel(const CNodePtr &kernel_node
adj_st_ = common::AnfAlgo::GetNodeAttr<bool>(kernel_node, ADJ_ST);
adj_dt_ = common::AnfAlgo::GetNodeAttr<bool>(kernel_node, ADJ_dT);
auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, INDICES);
auto values_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, VALUES);
auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0);
auto b_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, static_cast<size_t>(DENSE));
if (AnfAlgo::IsShapesDynamic({values_shape, indices_shape, output_shape, b_shape})) {
return;
}
if (indices_shape.size() != kIndicesSizeNum && indices_shape[1] != kIndices2rdDimNum) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', it requires 'indices' must be a 2-D Tensor and the second dimension length "
"must be 2, but got 'indices' shape: "
<< Vector2Str(indices_shape);
}
auto values_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, VALUES);
if (AnfAlgo::IsShapesDynamic({values_shape, indices_shape})) {
return;
}
if (values_shape.size() != 1 || values_shape[0] != indices_shape[0]) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', it requires 'values' must be a 1-D Tensor and the first dimension length "
" must be equal to the first dimension length of 'indices', but got 'values' shape: "
<< Vector2Str(values_shape) << " and 'indices' shape: " << Vector2Str(indices_shape);
}
output_shape_ = Convert2SizeT(common::AnfAlgo::GetOutputInferShape(kernel_node, 0));
output_shape_ = Convert2SizeT(output_shape);
values_size_ = LongToSize(values_shape[0]);
b_shape_ = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, static_cast<size_t>(DENSE)));
b_shape_ = Convert2SizeT(b_shape);
if (b_shape_.size() != kSparseTensorDenseMatmulDenseShapeSize) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of 'dense' must be "
<< kSparseTensorDenseMatmulDenseShapeSize << "-D, but got " << b_shape_.size() << "-D";

View File

@ -34,7 +34,7 @@ class TensorShapeGpuKernelMod : public DeprecatedNativeGpuKernelMod {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
if (is_null_input_ || prev_node_output_shape_.empty()) {
return true;
}
S *output_device_address = GetDeviceAddress<S>(outputs, 0);

View File

@ -121,6 +121,15 @@ class CholeskySolveGpuKernelMod : public DeprecatedNativeGpuKernelMod {
return true;
}
void ResetResource() noexcept override {
is_null_input_ = false;
input_size_list_.clear();
workspace_size_list_.clear();
output_size_list_.clear();
h_b_array_.clear();
h_a_array_.clear();
}
protected:
void InitSizeLists() override {
size_t input_size = outer_batch_ * m_ * lda_ * unit_size_;

View File

@ -137,9 +137,11 @@ int Conv3dGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::
}
pad_mode_ = kernel_ptr->get_pad_mode();
SetPad(pad_list);
if (!IsDynamicRank(in_shape) && !IsDynamicRank(filter_shape)) {
std::vector<int64_t> stride_me = kernel_ptr->get_stride();
std::vector<int64_t> dilation_me = kernel_ptr->get_dilation();
SetStrideAndDilation(stride_me, dilation_me);
}
auto input_descriptor_real = GetInputDescReal(pad_list);
if (cudnn_data_type_ == CUDNN_DATA_HALF) {
CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE(cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH),

View File

@ -19,8 +19,6 @@ import mindspore as ms
from mindspore import ops, nn, context, Tensor
from .test_grad_of_dynamic import TestDynamicGrad
context.set_context(mode=context.PYNATIVE_MODE)
class NetAddcmul(nn.Cell):
def __init__(self):
@ -49,6 +47,7 @@ def test_addcmul_dyn_shape():
Description: Test case of dynamic shape for Addcmul grad operator.
Expectation: success.
"""
context.set_context(mode=context.PYNATIVE_MODE)
addcmul_test(False)
@ -61,4 +60,5 @@ def test_addcmul_dyn_rank():
Description: Test case of dynamic rank for Addcmul grad operator.
Expectation: success.
"""
context.set_context(mode=context.PYNATIVE_MODE)
addcmul_test(True)

View File

@ -19,8 +19,6 @@ import mindspore.ops.operations.math_ops as M
from mindspore import nn, context, Tensor
from .test_grad_of_dynamic import TestDynamicGrad
context.set_context(mode=context.PYNATIVE_MODE)
class NetBatchMatMul(nn.Cell):
def __init__(self):
@ -40,6 +38,7 @@ def test_batch_matmul_dynamic_shape():
Description: Test case of dynamic shape for BatchMatMul grad operator on GPU.
Expectation: success.
"""
context.set_context(mode=context.PYNATIVE_MODE)
test_dynamic = TestDynamicGrad(NetBatchMatMul(), skip_convert_out_ids=[0])
x = Tensor(np.ones(shape=[2, 4, 1, 3]), mindspore.float32)
y = Tensor(np.ones(shape=[2, 4, 3, 4]), mindspore.float32)
@ -56,6 +55,7 @@ def test_batch_matmul_dynamic_rank():
Description: Test case of dynamic rank for BatchMatMul grad operator on GPU.
Expectation: success.
"""
context.set_context(mode=context.PYNATIVE_MODE)
test_dynamic = TestDynamicGrad(NetBatchMatMul(), skip_convert_out_ids=[0])
x = Tensor(np.ones(shape=[2, 4, 1, 3]), mindspore.float32)
y = Tensor(np.ones(shape=[2, 4, 3, 4]), mindspore.float32)

View File

@ -98,7 +98,7 @@ def test_gpu_grad_dynamic_rank_2():
@pytest.mark.skip(reason="CPU无Conv3DBackpropFilter, Conv3DBackpropInput, kernel实现")
@pytest.mark.level1
@pytest.mark.level2
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_cpu_grad_dynamic_shape():
@ -112,7 +112,7 @@ def test_cpu_grad_dynamic_shape():
@pytest.mark.skip(reason="CPU无Conv3DBackpropFilter, Conv3DBackpropInput, kernel实现")
@pytest.mark.level1
@pytest.mark.level2
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_cpu_grad_dynamic_rank():

View File

@ -18,8 +18,6 @@ import pytest
from mindspore import ops, nn, context, Tensor
from .test_grad_of_dynamic import TestDynamicGrad
context.set_context(mode=context.PYNATIVE_MODE)
class NetDivNoNan(nn.Cell):
def __init__(self):
@ -47,6 +45,7 @@ def test_divnonan_dyn_shape():
Description: Test case of dynamic shape for DivNoNan grad operator.
Expectation: success.
"""
context.set_context(mode=context.PYNATIVE_MODE)
divnonan_test(False)
@ -60,4 +59,5 @@ def test_divnonan_dyn_rank():
Description: Test case of dynamic rank for DivNoNan grad operator.
Expectation: success.
"""
context.set_context(mode=context.PYNATIVE_MODE)
divnonan_test(True)

View File

@ -18,8 +18,6 @@ import pytest
from mindspore import ops, nn, context, Tensor
from .test_grad_of_dynamic import TestDynamicGrad
context.set_context(mode=context.PYNATIVE_MODE)
class NetSub(nn.Cell):
def __init__(self):
@ -47,6 +45,7 @@ def test_sub_dyn_shape():
Description: Test case of dynamic shape for Sub grad operator.
Expectation: success.
"""
context.set_context(mode=context.PYNATIVE_MODE)
sub_test(False)
@ -60,4 +59,5 @@ def test_sub_dyn_rank():
Description: Test case of dynamic rank for Sub grad operator.
Expectation: success.
"""
context.set_context(mode=context.PYNATIVE_MODE)
sub_test(True)

View File

@ -18,8 +18,6 @@ import pytest
from mindspore import ops, nn, context, Tensor
from .test_grad_of_dynamic import TestDynamicGrad
context.set_context(mode=context.PYNATIVE_MODE)
class NetTopK(nn.Cell):
def __init__(self, k):
@ -47,6 +45,7 @@ def test_topk_dyn_shape():
Description: Test case of dynamic shape for TopK grad operator.
Expectation: success.
"""
context.set_context(mode=context.PYNATIVE_MODE)
topk_test(False)
@ -59,4 +58,5 @@ def test_topk_dyn_rank():
Description: Test case of dynamic rank for TopK grad operator.
Expectation: success.
"""
context.set_context(mode=context.PYNATIVE_MODE)
topk_test(True)

View File

@ -19,8 +19,6 @@ import mindspore.ops.operations.math_ops as M
from mindspore import nn, context, Tensor
from .test_grad_of_dynamic import TestDynamicGrad
context.set_context(mode=context.PYNATIVE_MODE)
class NetTrace(nn.Cell):
def __init__(self):
@ -41,6 +39,7 @@ def test_trace_dynamic_shape():
Description: Test case of dynamic shape for Trace grad operator on CPU and GPU.
Expectation: success.
"""
context.set_context(mode=context.PYNATIVE_MODE)
test_dynamic = TestDynamicGrad(NetTrace())
x = Tensor(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), mindspore.float32)
test_dynamic.test_dynamic_grad_net(x, False)
@ -56,6 +55,7 @@ def test_trace_dynamic_shape_rank():
Description: Test case of dynamic rank for Trace grad operator on CPU and GPU.
Expectation: success.
"""
context.set_context(mode=context.PYNATIVE_MODE)
test_dynamic = TestDynamicGrad(NetTrace())
x = Tensor(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), mindspore.float32)
test_dynamic.test_dynamic_grad_net(x, True)