forked from mindspore-Ecosystem/mindspore
!43308 refine some kernels to support dynamic shape
Merge pull request !43308 from looop5/dyn_shape_commit_split_kernel
This commit is contained in:
commit
5edf920ac0
|
@ -17,6 +17,7 @@
|
|||
#include "plugin/device/cpu/kernel/concat_offset_cpu_kernel.h"
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -29,19 +30,7 @@ void ConcatOffsetCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
|||
cnode_ptr_ = kernel_node;
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
|
||||
int64_t axis = common::AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
|
||||
auto input_1_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
if (axis < 0) {
|
||||
axis_ = LongToSize(axis + input_1_shape.size());
|
||||
} else {
|
||||
axis_ = LongToSize(axis);
|
||||
}
|
||||
if (axis_ >= input_1_shape.size()) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', the 'axis' must be less than the dimension of 'input_x', but got 'axis': " << axis_
|
||||
<< ", and the dimension of 'input_x': " << input_1_shape.size();
|
||||
}
|
||||
axis_ = common::AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
|
||||
|
||||
auto kernel_attr = GetKernelAttrFromNode(kernel_node);
|
||||
auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport());
|
||||
|
@ -61,19 +50,38 @@ bool ConcatOffsetCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr
|
|||
}
|
||||
auto output_addr = reinterpret_cast<int64_t *>(outputs[0]->addr);
|
||||
size_t input_num = common::AnfAlgo::GetInputTensorNum(node_);
|
||||
if (input_num == 0) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", input tensors can not be empty";
|
||||
}
|
||||
// check input shapes
|
||||
std::vector<ShapeVector> input_shapes;
|
||||
for (size_t i = 0; i < input_num; i++) {
|
||||
ShapeVector input_shape_i = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, i);
|
||||
input_shapes.push_back(input_shape_i);
|
||||
if (input_shape_i.size() != input_shapes[0].size()) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', input tensors shape's rank must be equal, but got input[0] shape's rank = "
|
||||
<< input_shapes[0].size() << ", input[" << i << "] shape's rank = " << input_shape_i.size();
|
||||
}
|
||||
}
|
||||
// check axis
|
||||
auto x_rank = SizeToLong(input_shapes[0].size());
|
||||
if (axis_ < -x_rank || axis_ >= x_rank) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", 'axis' must be in range [-" << x_rank << ", " << x_rank
|
||||
<< "), but got " << axis_;
|
||||
}
|
||||
if (axis_ < 0) {
|
||||
axis_ += x_rank;
|
||||
}
|
||||
auto axis = LongToSize(axis_);
|
||||
|
||||
ShapeVector offset{0};
|
||||
auto all_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, 0)[axis_];
|
||||
auto all_shape = input_shapes[0][axis];
|
||||
|
||||
// cal offset
|
||||
for (size_t i = 1; i < input_num; i++) {
|
||||
auto input_shape_i = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, i);
|
||||
if (axis_ >= input_shape_i.size()) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', the 'axis' must be less than the dimension of input, but got 'axis': " << axis_
|
||||
<< ", and the dimension of the " << i << "'th input: " << input_shape_i.size();
|
||||
}
|
||||
offset.emplace_back(all_shape);
|
||||
all_shape += input_shape_i[axis_];
|
||||
all_shape += input_shapes[i][axis];
|
||||
}
|
||||
auto output_shape = common::AnfAlgo::GetOutputInferShape(node_, 0);
|
||||
if (output_shape.size() != kConcatOffsetOutputShapeSize) {
|
||||
|
@ -90,7 +98,7 @@ bool ConcatOffsetCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr
|
|||
size_t idx = 0;
|
||||
for (size_t i = 0; i < input_num; ++i) {
|
||||
for (size_t j = 0; j < rank; ++j) {
|
||||
if (j == axis_) {
|
||||
if (j == axis) {
|
||||
output_addr[idx] = offset[i];
|
||||
} else {
|
||||
output_addr[idx] = 0;
|
||||
|
|
|
@ -46,7 +46,7 @@ class ConcatOffsetCpuKernelMod : public DeprecatedNativeCpuKernelMod {
|
|||
const std::vector<kernel::AddressPtr> &)>;
|
||||
static std::vector<std::pair<KernelAttr, ConcatOffsetFunc>> func_list_;
|
||||
ConcatOffsetFunc kernel_func_;
|
||||
size_t axis_{0};
|
||||
int64_t axis_{0};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -33,9 +33,13 @@ constexpr size_t kCholeskySolveOutputNum = 1;
|
|||
|
||||
void CholeskySolveCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
auto shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex0);
|
||||
if (IsDynamic(shape)) {
|
||||
return;
|
||||
}
|
||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, kInputIndex0);
|
||||
std::vector<size_t> x1_shape = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kInputIndex0));
|
||||
std::vector<size_t> x1_shape = Convert2SizeT(shape);
|
||||
size_t rank = x1_shape.size();
|
||||
if (rank == kDefalutRank) {
|
||||
dim = x1_shape[rank - kRowIndex];
|
||||
|
|
|
@ -49,8 +49,6 @@ bool ExtractVolumePatchesKernelMod::Init(const BaseOperatorPtr &base_operator,
|
|||
kernel_size_ = kernel_ptr->get_kernel_size();
|
||||
strides_ = kernel_ptr->get_strides();
|
||||
padding_ = kernel_ptr->get_padding();
|
||||
input_shape_ = inputs[0]->GetShapeVector();
|
||||
output_shape_ = outputs[0]->GetShapeVector();
|
||||
if (!MatchKernelFunc(base_operator, inputs, outputs)) {
|
||||
return false;
|
||||
}
|
||||
|
@ -65,6 +63,8 @@ int ExtractVolumePatchesKernelMod::Resize(const BaseOperatorPtr &base_operator,
|
|||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
input_shape_ = inputs[0]->GetShapeVector();
|
||||
output_shape_ = outputs[0]->GetShapeVector();
|
||||
return static_cast<int>(KRET_OK);
|
||||
}
|
||||
|
||||
|
|
|
@ -135,9 +135,11 @@ std::vector<KernelAttr> ConvGradFilterCpuKernelMod::GetOpSupport() {
|
|||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)}},
|
||||
{kConv3DBackpropFilter,
|
||||
{KernelAttr()
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32)}}};
|
||||
auto iter = support_list_map.find(kernel_type_);
|
||||
if (iter == support_list_map.end()) {
|
||||
|
|
|
@ -130,9 +130,11 @@ std::vector<KernelAttr> ConvGradInputCpuKernelMod::GetOpSupport() {
|
|||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32)}},
|
||||
{kConv3DBackpropInput,
|
||||
{KernelAttr()
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32)}}};
|
||||
auto iter = support_list_map.find(kernel_type_);
|
||||
if (iter == support_list_map.end()) {
|
||||
|
|
|
@ -24,6 +24,7 @@ namespace mindspore {
|
|||
namespace kernel {
|
||||
namespace {
|
||||
constexpr size_t kOneHotInputsNum = 3;
|
||||
constexpr size_t kOneHotDynamicInputsNum = 4;
|
||||
constexpr size_t kOneHotOutputsNum = 1;
|
||||
#define INPUT_COMPUTE_CASE(DTYPE, TYPE, ODTYPE, INPUTS, OUTPUTS) \
|
||||
case (DTYPE): { \
|
||||
|
@ -62,13 +63,21 @@ constexpr size_t kOneHotOutputsNum = 1;
|
|||
}
|
||||
} // namespace
|
||||
|
||||
inline void check_input_num(size_t input_num, const std::string &kernel_name) {
|
||||
if (input_num != kOneHotInputsNum && input_num != kOneHotDynamicInputsNum) {
|
||||
MS_LOG_EXCEPTION << "For " << kernel_name << ", input num must be " << kOneHotInputsNum << " or "
|
||||
<< kOneHotDynamicInputsNum << ", but got " << input_num;
|
||||
}
|
||||
}
|
||||
|
||||
bool OneHotCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) {
|
||||
constexpr size_t input_num = 3;
|
||||
constexpr size_t output_num = 1;
|
||||
kernel_name_ = base_operator->GetPrim()->name();
|
||||
CHECK_KERNEL_INPUTS_NUM(inputs.size(), input_num, kernel_name_);
|
||||
auto input_size = inputs.size();
|
||||
check_input_num(input_size, kernel_name_);
|
||||
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), output_num, kernel_name_);
|
||||
SetOnValueInputIndex(input_size);
|
||||
|
||||
input_dtype_ = inputs[kIndex0]->GetDtype();
|
||||
output_dtype_ = outputs[kIndex0]->GetDtype();
|
||||
|
@ -112,7 +121,7 @@ int OneHotCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::
|
|||
|
||||
bool OneHotCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kOneHotInputsNum, kernel_name_);
|
||||
check_input_num(inputs.size(), kernel_name_);
|
||||
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOneHotOutputsNum, kernel_name_);
|
||||
switch (input_dtype_) {
|
||||
INPUT_COMPUTE_CASE(kNumberTypeUInt8, uint8_t, output_dtype_, inputs, outputs);
|
||||
|
@ -126,11 +135,18 @@ bool OneHotCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs, c
|
|||
return true;
|
||||
}
|
||||
|
||||
void OneHotCpuKernelMod::SetOnValueInputIndex(size_t input_num) {
|
||||
constexpr size_t kDynamicOnValueInputIndex = 2;
|
||||
if (input_num == kOneHotDynamicInputsNum) {
|
||||
on_value_input_index_ = kDynamicOnValueInputIndex;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ID, typename OD>
|
||||
void OneHotCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
|
||||
const auto *indices = reinterpret_cast<ID *>(inputs[0]->addr);
|
||||
auto on_value = reinterpret_cast<OD *>(inputs[1]->addr)[0];
|
||||
auto off_value = reinterpret_cast<OD *>(inputs[2]->addr)[0];
|
||||
auto on_value = reinterpret_cast<OD *>(inputs[on_value_input_index_]->addr)[0];
|
||||
auto off_value = reinterpret_cast<OD *>(inputs[on_value_input_index_ + 1]->addr)[0];
|
||||
auto *output = reinterpret_cast<OD *>(outputs[0]->addr);
|
||||
size_t elem_num = inputs[0]->size / sizeof(ID);
|
||||
auto task = [this, &indices, &on_value, &off_value, &output](size_t start, size_t end) {
|
||||
|
@ -372,11 +388,281 @@ std::vector<KernelAttr> OneHotCpuKernelMod::support_list_ = {KernelAttr()
|
|||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddOutputAttr(kNumberTypeComplex128),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kObjectTypeString)
|
||||
.AddInputAttr(kObjectTypeString)
|
||||
.AddOutputAttr(kObjectTypeString),
|
||||
// depth is a input with int64 type:
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddOutputAttr(kNumberTypeUInt8),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeUInt16)
|
||||
.AddInputAttr(kNumberTypeUInt16)
|
||||
.AddOutputAttr(kNumberTypeUInt16),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeUInt32)
|
||||
.AddInputAttr(kNumberTypeUInt32)
|
||||
.AddOutputAttr(kNumberTypeUInt32),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeUInt64)
|
||||
.AddInputAttr(kNumberTypeUInt64)
|
||||
.AddOutputAttr(kNumberTypeUInt64),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddOutputAttr(kNumberTypeInt8),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt16)
|
||||
.AddInputAttr(kNumberTypeInt16)
|
||||
.AddOutputAttr(kNumberTypeInt16),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeInt64),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat64)
|
||||
.AddInputAttr(kNumberTypeFloat64)
|
||||
.AddOutputAttr(kNumberTypeFloat64),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeBool)
|
||||
.AddInputAttr(kNumberTypeBool)
|
||||
.AddOutputAttr(kNumberTypeBool),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeComplex64)
|
||||
.AddInputAttr(kNumberTypeComplex64)
|
||||
.AddOutputAttr(kNumberTypeComplex64),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddOutputAttr(kNumberTypeComplex128),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kObjectTypeString)
|
||||
.AddInputAttr(kObjectTypeString)
|
||||
.AddOutputAttr(kObjectTypeString),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddOutputAttr(kNumberTypeUInt8),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeUInt16)
|
||||
.AddInputAttr(kNumberTypeUInt16)
|
||||
.AddOutputAttr(kNumberTypeUInt16),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeUInt32)
|
||||
.AddInputAttr(kNumberTypeUInt32)
|
||||
.AddOutputAttr(kNumberTypeUInt32),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeUInt64)
|
||||
.AddInputAttr(kNumberTypeUInt64)
|
||||
.AddOutputAttr(kNumberTypeUInt64),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddOutputAttr(kNumberTypeInt8),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt16)
|
||||
.AddInputAttr(kNumberTypeInt16)
|
||||
.AddOutputAttr(kNumberTypeInt16),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeInt64),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat64)
|
||||
.AddInputAttr(kNumberTypeFloat64)
|
||||
.AddOutputAttr(kNumberTypeFloat64),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeBool)
|
||||
.AddInputAttr(kNumberTypeBool)
|
||||
.AddOutputAttr(kNumberTypeBool),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeComplex64)
|
||||
.AddInputAttr(kNumberTypeComplex64)
|
||||
.AddOutputAttr(kNumberTypeComplex64),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddOutputAttr(kNumberTypeComplex128),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kObjectTypeString)
|
||||
.AddInputAttr(kObjectTypeString)
|
||||
.AddOutputAttr(kObjectTypeString),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddOutputAttr(kNumberTypeUInt8),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeUInt16)
|
||||
.AddInputAttr(kNumberTypeUInt16)
|
||||
.AddOutputAttr(kNumberTypeUInt16),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeUInt32)
|
||||
.AddInputAttr(kNumberTypeUInt32)
|
||||
.AddOutputAttr(kNumberTypeUInt32),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeUInt64)
|
||||
.AddInputAttr(kNumberTypeUInt64)
|
||||
.AddOutputAttr(kNumberTypeUInt64),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddOutputAttr(kNumberTypeInt8),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt16)
|
||||
.AddInputAttr(kNumberTypeInt16)
|
||||
.AddOutputAttr(kNumberTypeInt16),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeInt64),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat64)
|
||||
.AddInputAttr(kNumberTypeFloat64)
|
||||
.AddOutputAttr(kNumberTypeFloat64),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeBool)
|
||||
.AddInputAttr(kNumberTypeBool)
|
||||
.AddOutputAttr(kNumberTypeBool),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeComplex64)
|
||||
.AddInputAttr(kNumberTypeComplex64)
|
||||
.AddOutputAttr(kNumberTypeComplex64),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddOutputAttr(kNumberTypeComplex128),
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kObjectTypeString)
|
||||
.AddInputAttr(kObjectTypeString)
|
||||
.AddOutputAttr(kObjectTypeString)};
|
||||
|
||||
std::vector<KernelAttr> OneHotCpuKernelMod::GetOpSupport() { return support_list_; }
|
||||
|
||||
MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, OneHot, OneHotCpuKernelMod);
|
||||
|
|
|
@ -45,6 +45,7 @@ class OneHotCpuKernelMod : public NativeCpuKernelMod {
|
|||
private:
|
||||
template <typename ID, typename OD>
|
||||
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
|
||||
void SetOnValueInputIndex(size_t input_num);
|
||||
|
||||
TypeId input_dtype_{kTypeUnknown};
|
||||
TypeId output_dtype_{kTypeUnknown};
|
||||
|
@ -52,6 +53,7 @@ class OneHotCpuKernelMod : public NativeCpuKernelMod {
|
|||
size_t stride_{0};
|
||||
size_t axis_{0};
|
||||
static std::vector<KernelAttr> support_list_;
|
||||
size_t on_value_input_index_{1};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -36,24 +36,29 @@ void SparseTensorDenseAddCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
|||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex0);
|
||||
auto values_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex1);
|
||||
auto shape_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex2);
|
||||
auto x2_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex3);
|
||||
if (AnfAlgo::IsShapesDynamic({values_shape, indices_shape, shape_shape, x2_shape})) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (indices_shape.size() != kIndicesShapeSize) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it requires 'x1_indices' must be a " << kIndicesShapeSize
|
||||
<< "-D Tensor, but got " << indices_shape.size() << "-D";
|
||||
}
|
||||
auto values_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex1);
|
||||
if (values_shape.size() != 1 || values_shape[0] != indices_shape[0]) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', it requires 'x1_values' must be a 1-D Tensor and the first dimension length "
|
||||
<< "must be equal to the first dimension length of 'indices', but got 'x1_values' shape: "
|
||||
<< Vector2Str(values_shape) << " and 'x1_indices' shape: " << Vector2Str(indices_shape);
|
||||
}
|
||||
auto shape_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex2);
|
||||
x2_shape_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, kIndex3);
|
||||
size_t x1_rank = static_cast<size_t>(shape_shape_[0]);
|
||||
x2_shape_ = x2_shape;
|
||||
size_t x1_rank = static_cast<size_t>(shape_shape[0]);
|
||||
size_t x2_rank = x2_shape_.size();
|
||||
if (x1_rank != x2_rank) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', x1 and x2 must have same ranks, but got 'x1' shape: " << Vector2Str(shape_shape_)
|
||||
<< "', x1 and x2 must have same ranks, but got 'x1' shape: " << Vector2Str(shape_shape)
|
||||
<< "and 'x2' shape: " << Vector2Str(x2_shape_);
|
||||
}
|
||||
values_size_ = static_cast<size_t>(values_shape[0]);
|
||||
|
|
|
@ -38,25 +38,27 @@ void SparseTensorDenseMatmulCpuKernelMod::InitKernel(const CNodePtr &kernel_node
|
|||
adj_st_ = common::AnfAlgo::GetNodeAttr<bool>(kernel_node, ADJ_ST);
|
||||
adj_dt_ = common::AnfAlgo::GetNodeAttr<bool>(kernel_node, ADJ_dT);
|
||||
auto indices_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, INDICES);
|
||||
auto values_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, VALUES);
|
||||
auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
auto b_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, static_cast<size_t>(DENSE));
|
||||
if (AnfAlgo::IsShapesDynamic({values_shape, indices_shape, output_shape, b_shape})) {
|
||||
return;
|
||||
}
|
||||
if (indices_shape.size() != kIndicesSizeNum && indices_shape[1] != kIndices2rdDimNum) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', it requires 'indices' must be a 2-D Tensor and the second dimension length "
|
||||
"must be 2, but got 'indices' shape: "
|
||||
<< Vector2Str(indices_shape);
|
||||
}
|
||||
auto values_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, VALUES);
|
||||
if (AnfAlgo::IsShapesDynamic({values_shape, indices_shape})) {
|
||||
return;
|
||||
}
|
||||
if (values_shape.size() != 1 || values_shape[0] != indices_shape[0]) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', it requires 'values' must be a 1-D Tensor and the first dimension length "
|
||||
" must be equal to the first dimension length of 'indices', but got 'values' shape: "
|
||||
<< Vector2Str(values_shape) << " and 'indices' shape: " << Vector2Str(indices_shape);
|
||||
}
|
||||
output_shape_ = Convert2SizeT(common::AnfAlgo::GetOutputInferShape(kernel_node, 0));
|
||||
output_shape_ = Convert2SizeT(output_shape);
|
||||
values_size_ = LongToSize(values_shape[0]);
|
||||
b_shape_ = Convert2SizeT(common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, static_cast<size_t>(DENSE)));
|
||||
b_shape_ = Convert2SizeT(b_shape);
|
||||
if (b_shape_.size() != kSparseTensorDenseMatmulDenseShapeSize) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of 'dense' must be "
|
||||
<< kSparseTensorDenseMatmulDenseShapeSize << "-D, but got " << b_shape_.size() << "-D";
|
||||
|
|
|
@ -34,7 +34,7 @@ class TensorShapeGpuKernelMod : public DeprecatedNativeGpuKernelMod {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
if (is_null_input_ || prev_node_output_shape_.empty()) {
|
||||
return true;
|
||||
}
|
||||
S *output_device_address = GetDeviceAddress<S>(outputs, 0);
|
||||
|
|
|
@ -121,6 +121,15 @@ class CholeskySolveGpuKernelMod : public DeprecatedNativeGpuKernelMod {
|
|||
return true;
|
||||
}
|
||||
|
||||
void ResetResource() noexcept override {
|
||||
is_null_input_ = false;
|
||||
input_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
h_b_array_.clear();
|
||||
h_a_array_.clear();
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {
|
||||
size_t input_size = outer_batch_ * m_ * lda_ * unit_size_;
|
||||
|
|
|
@ -137,9 +137,11 @@ int Conv3dGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::
|
|||
}
|
||||
pad_mode_ = kernel_ptr->get_pad_mode();
|
||||
SetPad(pad_list);
|
||||
std::vector<int64_t> stride_me = kernel_ptr->get_stride();
|
||||
std::vector<int64_t> dilation_me = kernel_ptr->get_dilation();
|
||||
SetStrideAndDilation(stride_me, dilation_me);
|
||||
if (!IsDynamicRank(in_shape) && !IsDynamicRank(filter_shape)) {
|
||||
std::vector<int64_t> stride_me = kernel_ptr->get_stride();
|
||||
std::vector<int64_t> dilation_me = kernel_ptr->get_dilation();
|
||||
SetStrideAndDilation(stride_me, dilation_me);
|
||||
}
|
||||
auto input_descriptor_real = GetInputDescReal(pad_list);
|
||||
if (cudnn_data_type_ == CUDNN_DATA_HALF) {
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE(cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH),
|
||||
|
|
|
@ -19,8 +19,6 @@ import mindspore as ms
|
|||
from mindspore import ops, nn, context, Tensor
|
||||
from .test_grad_of_dynamic import TestDynamicGrad
|
||||
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
|
||||
|
||||
class NetAddcmul(nn.Cell):
|
||||
def __init__(self):
|
||||
|
@ -49,6 +47,7 @@ def test_addcmul_dyn_shape():
|
|||
Description: Test case of dynamic shape for Addcmul grad operator.
|
||||
Expectation: success.
|
||||
"""
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
addcmul_test(False)
|
||||
|
||||
|
||||
|
@ -61,4 +60,5 @@ def test_addcmul_dyn_rank():
|
|||
Description: Test case of dynamic rank for Addcmul grad operator.
|
||||
Expectation: success.
|
||||
"""
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
addcmul_test(True)
|
||||
|
|
|
@ -19,8 +19,6 @@ import mindspore.ops.operations.math_ops as M
|
|||
from mindspore import nn, context, Tensor
|
||||
from .test_grad_of_dynamic import TestDynamicGrad
|
||||
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
|
||||
|
||||
class NetBatchMatMul(nn.Cell):
|
||||
def __init__(self):
|
||||
|
@ -40,6 +38,7 @@ def test_batch_matmul_dynamic_shape():
|
|||
Description: Test case of dynamic shape for BatchMatMul grad operator on GPU.
|
||||
Expectation: success.
|
||||
"""
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
test_dynamic = TestDynamicGrad(NetBatchMatMul(), skip_convert_out_ids=[0])
|
||||
x = Tensor(np.ones(shape=[2, 4, 1, 3]), mindspore.float32)
|
||||
y = Tensor(np.ones(shape=[2, 4, 3, 4]), mindspore.float32)
|
||||
|
@ -56,6 +55,7 @@ def test_batch_matmul_dynamic_rank():
|
|||
Description: Test case of dynamic rank for BatchMatMul grad operator on GPU.
|
||||
Expectation: success.
|
||||
"""
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
test_dynamic = TestDynamicGrad(NetBatchMatMul(), skip_convert_out_ids=[0])
|
||||
x = Tensor(np.ones(shape=[2, 4, 1, 3]), mindspore.float32)
|
||||
y = Tensor(np.ones(shape=[2, 4, 3, 4]), mindspore.float32)
|
||||
|
|
|
@ -98,7 +98,7 @@ def test_gpu_grad_dynamic_rank_2():
|
|||
|
||||
|
||||
@pytest.mark.skip(reason="CPU无Conv3DBackpropFilter, Conv3DBackpropInput, kernel实现")
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.level2
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_cpu_grad_dynamic_shape():
|
||||
|
@ -112,7 +112,7 @@ def test_cpu_grad_dynamic_shape():
|
|||
|
||||
|
||||
@pytest.mark.skip(reason="CPU无Conv3DBackpropFilter, Conv3DBackpropInput, kernel实现")
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.level2
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_cpu_grad_dynamic_rank():
|
||||
|
|
|
@ -18,8 +18,6 @@ import pytest
|
|||
from mindspore import ops, nn, context, Tensor
|
||||
from .test_grad_of_dynamic import TestDynamicGrad
|
||||
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
|
||||
|
||||
class NetDivNoNan(nn.Cell):
|
||||
def __init__(self):
|
||||
|
@ -47,6 +45,7 @@ def test_divnonan_dyn_shape():
|
|||
Description: Test case of dynamic shape for DivNoNan grad operator.
|
||||
Expectation: success.
|
||||
"""
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
divnonan_test(False)
|
||||
|
||||
|
||||
|
@ -60,4 +59,5 @@ def test_divnonan_dyn_rank():
|
|||
Description: Test case of dynamic rank for DivNoNan grad operator.
|
||||
Expectation: success.
|
||||
"""
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
divnonan_test(True)
|
||||
|
|
|
@ -18,8 +18,6 @@ import pytest
|
|||
from mindspore import ops, nn, context, Tensor
|
||||
from .test_grad_of_dynamic import TestDynamicGrad
|
||||
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
|
||||
|
||||
class NetSub(nn.Cell):
|
||||
def __init__(self):
|
||||
|
@ -47,6 +45,7 @@ def test_sub_dyn_shape():
|
|||
Description: Test case of dynamic shape for Sub grad operator.
|
||||
Expectation: success.
|
||||
"""
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
sub_test(False)
|
||||
|
||||
|
||||
|
@ -60,4 +59,5 @@ def test_sub_dyn_rank():
|
|||
Description: Test case of dynamic rank for Sub grad operator.
|
||||
Expectation: success.
|
||||
"""
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
sub_test(True)
|
||||
|
|
|
@ -18,8 +18,6 @@ import pytest
|
|||
from mindspore import ops, nn, context, Tensor
|
||||
from .test_grad_of_dynamic import TestDynamicGrad
|
||||
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
|
||||
|
||||
class NetTopK(nn.Cell):
|
||||
def __init__(self, k):
|
||||
|
@ -47,6 +45,7 @@ def test_topk_dyn_shape():
|
|||
Description: Test case of dynamic shape for TopK grad operator.
|
||||
Expectation: success.
|
||||
"""
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
topk_test(False)
|
||||
|
||||
|
||||
|
@ -59,4 +58,5 @@ def test_topk_dyn_rank():
|
|||
Description: Test case of dynamic rank for TopK grad operator.
|
||||
Expectation: success.
|
||||
"""
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
topk_test(True)
|
||||
|
|
|
@ -19,8 +19,6 @@ import mindspore.ops.operations.math_ops as M
|
|||
from mindspore import nn, context, Tensor
|
||||
from .test_grad_of_dynamic import TestDynamicGrad
|
||||
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
|
||||
|
||||
class NetTrace(nn.Cell):
|
||||
def __init__(self):
|
||||
|
@ -41,6 +39,7 @@ def test_trace_dynamic_shape():
|
|||
Description: Test case of dynamic shape for Trace grad operator on CPU and GPU.
|
||||
Expectation: success.
|
||||
"""
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
test_dynamic = TestDynamicGrad(NetTrace())
|
||||
x = Tensor(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), mindspore.float32)
|
||||
test_dynamic.test_dynamic_grad_net(x, False)
|
||||
|
@ -56,6 +55,7 @@ def test_trace_dynamic_shape_rank():
|
|||
Description: Test case of dynamic rank for Trace grad operator on CPU and GPU.
|
||||
Expectation: success.
|
||||
"""
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
test_dynamic = TestDynamicGrad(NetTrace())
|
||||
x = Tensor(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), mindspore.float32)
|
||||
test_dynamic.test_dynamic_grad_net(x, True)
|
||||
|
|
Loading…
Reference in New Issue