!49769 ops InstanceNormV2, SparseFillEmptyRowsGrad, SelfAdjointEig supports dynamic shape feature

Merge pull request !49769 from wang_ziqi/br_instance_norm_v2
This commit is contained in:
i-robot 2023-03-08 07:46:49 +00:00 committed by Gitee
commit 5c287b6496
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
10 changed files with 166 additions and 90 deletions

View File

@ -87,8 +87,8 @@ void InstanceNormV2CpuKernelMod::CollectStatsKernel(const kernel::AddressPtr &x,
template <typename T, template <typename S> class VarTransform>
void InstanceNormV2CpuKernelMod::UpdateStatsTemplate(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) {
std::vector<float> _var_sum(instance_num, float_init_zero);
std::vector<float> _mean_(instance_num, float_init_zero);
std::vector<float> _var_sum(instance_num_, float_init_zero);
std::vector<float> _mean_(instance_num_, float_init_zero);
CollectStatsKernel<T>(inputs[kIndex0], _mean_.data(), _var_sum.data());
const int64_t image_size = x_shape_4d_[kIndex1] * x_shape_4d_[kIndex2];
MS_EXCEPTION_IF_ZERO("image_size", image_size);
@ -116,7 +116,7 @@ void InstanceNormV2CpuKernelMod::UpdateStatsTemplate(const std::vector<kernel::A
static_cast<double>(running_var_vec(idx)));
}
};
CPUKernelUtils::ParallelFor(loop_momentum, instance_num, static_cast<float>(kGrainSize));
CPUKernelUtils::ParallelFor(loop_momentum, instance_num_, static_cast<float>(kGrainSize));
}
void InstanceNormV2CpuKernelMod::CollectLinearAndConstant(const typename TTypes<float>::Vec &gamma,
@ -142,7 +142,7 @@ void InstanceNormV2CpuKernelMod::CollectLinearAndConstant(const typename TTypes<
_beta_[idx] = beta(idx) - mean * _alpha_[idx];
}
};
CPUKernelUtils::ParallelFor(loop_instance, instance_num, static_cast<float>(kGrainSize));
CPUKernelUtils::ParallelFor(loop_instance, instance_num_, static_cast<float>(kGrainSize));
}
template <typename T>
@ -151,8 +151,8 @@ void InstanceNormV2CpuKernelMod::TransformInput(const std::vector<kernel::Addres
const int64_t batch = x_shape_4d_[kIndex0];
const int64_t channel = x_shape_4d_[kIndex3];
const int64_t image_size = x_shape_4d_[kIndex1] * x_shape_4d_[kIndex2];
std::vector<float> _alpha_(instance_num, float_init_zero);
std::vector<float> _beta_(instance_num, float_init_zero);
std::vector<float> _alpha_(instance_num_, float_init_zero);
std::vector<float> _beta_(instance_num_, float_init_zero);
std::vector<int64_t> batch_channels_1d_ = {batch_channels_2d_.front() * batch_channels_2d_.back()};
auto gamma = EigenTensor(batch_channels_1d_, inputs[kIndex1]->addr).vec<float>();
auto beta = EigenTensor(batch_channels_1d_, inputs[kIndex2]->addr).vec<float>();
@ -183,19 +183,26 @@ void InstanceNormV2CpuKernelMod::TransformInput(const std::vector<kernel::Addres
CPUKernelUtils::ParallelFor(loop_transform, batch, block_size);
}
void InstanceNormV2CpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
in_type_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, kIndex0);
std::vector<int64_t> x_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, kIndex0);
std::vector<int64_t> batch_channels_ = AnfAlgo::GetInputDeviceShape(kernel_node, kIndex1);
if (x_shape_.size() != kDim4 && x_shape_.size() != kDim5) {
MS_EXCEPTION(ValueError) << "For '" << kernel_name_ << "', the dimension of 'x' should be 4D or 5D, but got "
<< x_shape_.size() << "D.";
bool InstanceNormV2CpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs) {
MS_EXCEPTION_IF_NULL(base_operator);
kernel_name_ = base_operator->name();
auto prim = base_operator->GetPrim();
MS_EXCEPTION_IF_NULL(prim);
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInstanceNormV2InputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kInstanceNormV2OutputNum, kernel_name_);
auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs);
auto is_match = MatchKernelAttr(kernel_attr, GetOpSupport());
if (!is_match.first) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it does not support this kernel data type: " << kernel_attr;
}
is_training_ = common::AnfAlgo::GetNodeAttr<bool>(kernel_node, kAttrIsTraining);
momentum_ = common::AnfAlgo::GetNodeAttr<float>(kernel_node, kAttrMomentum);
epsilon_ = common::AnfAlgo::GetNodeAttr<float>(kernel_node, kAttrEpsilon);
in_type_ = inputs[kIndex0]->GetDtype();
is_training_ = GetValue<bool>(prim->GetAttr(kAttrIsTraining));
momentum_ = GetValue<float>(prim->GetAttr(kAttrMomentum));
epsilon_ = GetValue<float>(prim->GetAttr(kAttrEpsilon));
if (momentum_ > momentum_max || momentum_ < momentum_min) {
MS_EXCEPTION(ValueError) << "For '" << kernel_name_
<< "momentum value should be in [0, 1], but get momentum = " << momentum_ << ".";
@ -204,27 +211,44 @@ void InstanceNormV2CpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION(ValueError) << "For '" << kernel_name_
<< "epsilon value should be in [0, 1), but get epsilon = " << epsilon_ << ".";
}
input_x_is_4d_ = (x_shape_.size() == kDim4);
return true;
}
int InstanceNormV2CpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs,
const std::map<uint32_t, tensor::TensorPtr> &) {
if (auto ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) {
return ret;
}
std::vector<int64_t> x_shape = inputs[kIndex0]->GetShapeVector();
std::vector<int64_t> batch_channels = inputs[kIndex1]->GetShapeVector();
if (x_shape.size() != kDim4 && x_shape.size() != kDim5) {
MS_EXCEPTION(ValueError) << "For '" << kernel_name_ << "', the dimension of 'x' should be 4D or 5D, but got "
<< x_shape.size() << "D.";
}
input_x_is_4d_ = (x_shape.size() == kDim4);
// Format NCHW could be considered as a situation of format NC1HWC0 when C0 = 1.
if (input_x_is_4d_) {
// extern (N, C, H, W) to (N, C, H, W, 1)
x_shape_.push_back(SizeToLong(kDim1));
x_shape.push_back(SizeToLong(kDim1));
// extern (N, C, 1, 1) to (N, C1=C, 1, 1, C0=1)
batch_channels_.push_back(SizeToLong(kDim1));
batch_channels.push_back(SizeToLong(kDim1));
}
// consider (N, C1, H, W, C0) as (N*C1, H, W, C0), similar to (N, H, W, C)
x_shape_4d_ = {x_shape_[kIndex0] * x_shape_[kIndex1], x_shape_[kIndex2], x_shape_[kIndex3], x_shape_[kIndex4]};
x_shape_4d_ = {x_shape[kIndex0] * x_shape[kIndex1], x_shape[kIndex2], x_shape[kIndex3], x_shape[kIndex4]};
// consider (N, C1, 1, 1 C0) as (N*C1, 1, 1, C0), similar to (N, 1, 1, C)
batch_channels_2d_ = {batch_channels_[kIndex0] * batch_channels_[kIndex1], batch_channels_[kIndex4]};
instance_num = CPUKernelUtils::CalcElementNum(batch_channels_2d_);
batch_channels_2d_ = {batch_channels[kIndex0] * batch_channels[kIndex1], batch_channels[kIndex4]};
instance_num_ = CPUKernelUtils::CalcElementNum(batch_channels_2d_);
return KRET_OK;
}
bool InstanceNormV2CpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInstanceNormV2InputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kInstanceNormV2OutputNum, kernel_name_);
bool res = false;
switch (in_type_) {
case kNumberTypeFloat16:

View File

@ -18,6 +18,7 @@
#include <set>
#include <vector>
#include <map>
#include "kernel/common_utils.h"
#include "plugin/device/cpu/kernel/cpu_kernel.h"
#include "plugin/device/cpu/kernel/eigen/eigen_common_utils.h"
@ -25,13 +26,15 @@
namespace mindspore {
namespace kernel {
class InstanceNormV2CpuKernelMod : public DeprecatedNativeCpuKernelMod {
class InstanceNormV2CpuKernelMod : public NativeCpuKernelMod {
public:
InstanceNormV2CpuKernelMod() = default;
~InstanceNormV2CpuKernelMod() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs) override;
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
@ -65,7 +68,7 @@ class InstanceNormV2CpuKernelMod : public DeprecatedNativeCpuKernelMod {
std::vector<int64_t> x_shape_4d_;
std::vector<int64_t> batch_channels_2d_;
bool input_x_is_4d_ = true;
int64_t instance_num = 0;
int64_t instance_num_ = 0;
};
} // namespace kernel
} // namespace mindspore

View File

@ -49,6 +49,7 @@ bool RaggedTensorToTensorCpuKernelMod::Init(const BaseOperatorPtr &base_operator
MS_EXCEPTION_IF_NULL(base_operator);
kernel_name_ = base_operator->name();
row_partition_types_ = GetValue<std::vector<std::string>>(base_operator->GetAttr("row_partition_types"));
ragged_rank_ = GetRaggedRank(row_partition_types_);
shape_dtype_ = inputs[kShapeInputIndex]->GetDtype();
values_dtype_ = inputs[kValueInputIndex]->GetDtype();
size_t output_num = outputs.size();
@ -66,9 +67,15 @@ int RaggedTensorToTensorCpuKernelMod::Resize(const BaseOperatorPtr &base_operato
values_shape_ = inputs[kValueInputIndex]->GetShapeVector();
default_values_shape_ = inputs[kDefaultValueInputIndex]->GetShapeVector();
output_shape_ = outputs[0]->GetShapeVector();
if (ragged_rank_ + values_shape_.size() != output_shape_.size()) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', row partition size plus 'values' rank should be equal to 'shape' rank: "
<< output_shape_.size() << ", but got row partition size: " << ragged_rank_
<< ", 'values' rank: " << values_shape_.size();
}
row_partition_shape_list_.clear();
for (int i = 0; i < SizeToLong(row_partition_types_.size()); ++i) {
row_partition_shape_list_.push_back(inputs[kFirstPartitionInputIndex + i]->GetShapeVector());
for (int i = 0; i < ragged_rank_; ++i) {
row_partition_shape_list_.emplace_back(inputs[kFirstPartitionInputIndex + i]->GetShapeVector());
}
return KRET_OK;
}
@ -123,16 +130,9 @@ bool RaggedTensorToTensorCpuKernelMod::Launch(const std::vector<kernel::AddressP
template <typename TYPE1, typename TYPE2>
void RaggedTensorToTensorCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) {
int ragged_rank_ = GetRaggedRank(row_partition_types_);
TYPE1 first_dimension;
GetFirstDimension<TYPE1>(inputs, &first_dimension);
std::vector<TYPE1> output_size;
if (ragged_rank_ + values_shape_.size() != output_shape_.size()) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', row partition size plus 'values' rank should be equal to 'shape' rank: "
<< output_shape_.size() << ", but got row partition size: " << ragged_rank_
<< ", 'values' rank: " << values_shape_.size();
}
output_size.reserve(output_shape_.size());
for (unsigned int dim = 0; dim < output_shape_.size(); dim++) {
output_size.push_back(output_shape_[dim]);

View File

@ -78,6 +78,7 @@ class RaggedTensorToTensorCpuKernelMod : public NativeCpuKernelMod {
template <typename TYPE1>
void GetFirstDimension(const std::vector<kernel::AddressPtr> &inputs, TYPE1 *first_dim);
int64_t ragged_rank_;
TypeId shape_dtype_{kTypeUnknown};
TypeId values_dtype_{kTypeUnknown};
std::vector<int64_t> values_shape_;

View File

@ -25,19 +25,42 @@ constexpr auto kSelfAdjopintEig = "SelfAdjopintEig";
constexpr const size_t kInputsNum = 1;
constexpr const size_t kOutputsNum = 2;
void SelfAdjointEigCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
attr_ = common::AnfAlgo::GetNodeAttr<bool>(kernel_node, "compute_v");
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
bool SelfAdjointEigCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs) {
MS_EXCEPTION_IF_NULL(base_operator);
kernel_name_ = base_operator->name();
auto prim = base_operator->GetPrim();
MS_EXCEPTION_IF_NULL(prim);
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOutputsNum, kernel_name_);
auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs);
auto is_match = MatchKernelAttr(kernel_attr, GetOpSupport());
if (!is_match.first) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it does not support this kernel data type: " << kernel_attr;
}
dtype_ = inputs[kIndex0]->GetDtype();
compute_v_ = GetValue<bool>(prim->GetAttr("compute_v"));
return true;
}
int SelfAdjointEigCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs,
const std::map<uint32_t, tensor::TensorPtr> &) {
if (auto ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) {
return ret;
}
input_shape_ = inputs[kIndex0]->GetShapeVector();
return KRET_OK;
}
bool SelfAdjointEigCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInputsNum, kernel_name_);
// CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeFloat32) {
LaunchKernel<float>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat64) {
@ -59,7 +82,7 @@ bool SelfAdjointEigCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressP
auto *input = reinterpret_cast<T *>(inputs[kIndex0]->addr);
auto *output0 = reinterpret_cast<T *>(outputs[kIndex0]->addr);
auto *output1 = reinterpret_cast<T *>(outputs[kIndex1]->addr);
bool attr0_ = attr_;
bool attr0_ = compute_v_;
// The size of each dimension
std::vector<int64_t> shape = input_shape_;
// rank

View File

@ -28,12 +28,15 @@
namespace mindspore {
namespace kernel {
class SelfAdjointEigCpuKernelMod : public DeprecatedNativeCpuKernelMod {
class SelfAdjointEigCpuKernelMod : public NativeCpuKernelMod {
public:
SelfAdjointEigCpuKernelMod() = default;
~SelfAdjointEigCpuKernelMod() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs) override;
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
@ -48,7 +51,7 @@ class SelfAdjointEigCpuKernelMod : public DeprecatedNativeCpuKernelMod {
static std::vector<std::pair<KernelAttr, SelfAdjointEigLaunchFunc>> func_list_;
TypeId dtype_{kTypeUnknown};
std::vector<int64_t> input_shape_;
bool attr_;
bool compute_v_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -34,15 +34,10 @@ constexpr size_t kSparseFillEmptyRowsGradOutputsNum = 2;
constexpr size_t kReverseIndexMapSizeNum = 1;
constexpr size_t kGradValuesSizeNum = 1;
const uint32_t kInput_reverse_index_map = 0;
const uint32_t kInput_grad_values = 1;
const uint32_t kOutput_y_values = 0;
const uint32_t kOutput_y_default_value = 1;
constexpr char kKernelName[] = "SparseFillEmptyRows";
#define EIGEN_SHAPE_CAST(INPUT) static_cast<Eigen::DenseIndex>(AnfAlgo::GetInputDeviceShape(node_ptr, INPUT)[0])
#define SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(DTYPE, TYPE) \
case (DTYPE): { \
ret = LaunchKernel<TYPE>(inputs, outputs); \
@ -50,41 +45,61 @@ constexpr char kKernelName[] = "SparseFillEmptyRows";
}
} // namespace
void SparseFillEmptyRowsGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
node_ptr = kernel_node;
MS_EXCEPTION_IF_NULL(node_ptr);
output_y_values_type_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, 0);
output_y_default_value_type_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, 1);
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
size_t input_num = common::AnfAlgo::GetInputTensorNum(node_ptr);
CHECK_KERNEL_INPUTS_NUM(input_num, kSparseFillEmptyRowsGradInputsNum, kernel_name_);
size_t output_num = AnfAlgo::GetOutputTensorNum(node_ptr);
CHECK_KERNEL_OUTPUTS_NUM(output_num, kSparseFillEmptyRowsGradOutputsNum, kernel_name_);
const auto reverse_index_map_shape = AnfAlgo::GetInputDeviceShape(node_ptr, 0);
const auto grad_values_shape = AnfAlgo::GetInputDeviceShape(node_ptr, 1);
if (reverse_index_map_shape.size() != kReverseIndexMapSizeNum && reverse_index_map_shape[0] > grad_values_shape[0]) {
bool SparseFillEmptyRowsGradCpuKernelMod::Init(const BaseOperatorPtr &base_operator,
const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs) {
MS_EXCEPTION_IF_NULL(base_operator);
kernel_name_ = base_operator->name();
auto prim = base_operator->GetPrim();
MS_EXCEPTION_IF_NULL(prim);
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseFillEmptyRowsGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSparseFillEmptyRowsGradOutputsNum, kernel_name_);
auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs);
auto is_match = MatchKernelAttr(kernel_attr, GetOpSupport());
if (!is_match.first) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it does not support this kernel data type: " << kernel_attr;
}
output_y_values_type_ = inputs[kIndex0]->GetDtype();
output_y_default_value_type_ = inputs[kIndex1]->GetDtype();
return true;
}
int SparseFillEmptyRowsGradCpuKernelMod::Resize(const BaseOperatorPtr &base_operator,
const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs,
const std::map<uint32_t, tensor::TensorPtr> &) {
if (auto ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) {
return ret;
}
reverse_index_map_shape_ = inputs[kIndex0]->GetShapeVector();
grad_values_shape_ = inputs[kIndex1]->GetShapeVector();
if (reverse_index_map_shape_.size() != kReverseIndexMapSizeNum &&
reverse_index_map_shape_[0] > grad_values_shape_[0]) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', it requires 'reverse_index_map' must be a 1-D Tensor and the first dimension length "
"must be smalll or equal to the first dimension length of 'values' "
<< Vector2Str(reverse_index_map_shape);
<< Vector2Str(reverse_index_map_shape_);
}
if (grad_values_shape.size() != kGradValuesSizeNum) {
if (grad_values_shape_.size() != kGradValuesSizeNum) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it requires 'grad_values' must be a 1-D Tensor "
<< Vector2Str(grad_values_shape);
<< Vector2Str(grad_values_shape_);
}
return KRET_OK;
}
template <typename T>
bool SparseFillEmptyRowsGradCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseFillEmptyRowsGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSparseFillEmptyRowsGradOutputsNum, kernel_name_);
auto reverse_index_map_ptr = reinterpret_cast<int64_t *>(inputs[0]->addr);
auto grad_values_ptr = reinterpret_cast<T *>(inputs[1]->addr);
const auto reverse_index_map_shape = AnfAlgo::GetInputDeviceShape(node_ptr, 0);
const auto grad_values_shape = AnfAlgo::GetInputDeviceShape(node_ptr, 1);
const int64_t N = reverse_index_map_shape[0];
const int64_t N_full = grad_values_shape[0];
const int64_t N = reverse_index_map_shape_[0];
const int64_t N_full = grad_values_shape_[0];
auto y_values_ptr = reinterpret_cast<T *>(outputs[kOutput_y_values]->addr);
auto ret1 = memset_s(y_values_ptr, N * sizeof(T), 0, N * sizeof(T));
@ -112,12 +127,7 @@ bool SparseFillEmptyRowsGradCpuKernelMod::LaunchKernel(const std::vector<kernel:
*y_default_value += grad_values_ptr[j];
}
}
ShapeVector output_y_values_shape;
ShapeVector output_y_default_value_shape = {};
output_y_values_shape.push_back(N);
common::AnfAlgo::SetOutputInferTypeAndShape({output_y_values_type_, output_y_default_value_type_},
{output_y_values_shape, output_y_default_value_shape},
cnode_ptr_.lock().get());
return true;
}
@ -199,7 +209,7 @@ bool SparseFillEmptyRowsGradCpuKernelMod::Launch(const std::vector<AddressPtr> &
const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) {
bool ret = false;
auto data_type = AnfAlgo::GetInputDeviceDataType(node_ptr, kInput_grad_values);
auto data_type = output_y_default_value_type_;
switch (data_type) {
SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(kNumberTypeInt8, int8_t)
SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(kNumberTypeUInt8, uint8_t)

View File

@ -17,17 +17,21 @@
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSEFILLEMPTYROWSGRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSEFILLEMPTYROWSGRAD_CPU_KERNEL_H_
#include <vector>
#include <map>
#include "plugin/device/cpu/kernel/cpu_kernel.h"
#include "plugin/factory/ms_factory.h"
namespace mindspore {
namespace kernel {
class SparseFillEmptyRowsGradCpuKernelMod : public DeprecatedNativeCpuKernelMod {
class SparseFillEmptyRowsGradCpuKernelMod : public NativeCpuKernelMod {
public:
SparseFillEmptyRowsGradCpuKernelMod() = default;
~SparseFillEmptyRowsGradCpuKernelMod() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs) override;
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
@ -38,9 +42,10 @@ class SparseFillEmptyRowsGradCpuKernelMod : public DeprecatedNativeCpuKernelMod
template <typename T>
bool LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);
CNodePtr node_ptr;
TypeId output_y_values_type_;
TypeId output_y_default_value_type_;
ShapeVector reverse_index_map_shape_;
ShapeVector grad_values_shape_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -52,7 +52,7 @@ BaseShapePtr RaggedTensorToTensorInferShape(const PrimitivePtr &primitive,
if (IsDynamic(shape_shape) || IsDynamicRank(values_shape) || IsDynamicRank(default_value_shape) ||
IsDynamicRank(tensor0_shape)) {
return std::make_shared<abstract::Shape>(ShapeVector{abstract::Shape::kShapeRankAny});
return std::make_shared<abstract::Shape>(output_shape);
}
CheckAndConvertUtils::CheckInteger("dimension of 'shape'", SizeToLong(shape_shape.size()), kEqual, 1, prim_name);

View File

@ -36,6 +36,13 @@ abstract::TupleShapePtr SelfAdjointEigInferShape(const PrimitivePtr &primitive,
auto x = input_args[0]->BuildShape();
auto input_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(x)[kShape];
auto input_rank = SizeToLong(input_shape.size());
if (IsDynamicRank(input_shape)) {
auto unknow_shape_ptr = std::make_shared<abstract::Shape>(ShapeVector{abstract::Shape::kShapeRankAny});
return std::make_shared<abstract::TupleShape>(
std::vector<abstract::BaseShapePtr>{unknow_shape_ptr, unknow_shape_ptr});
}
CheckAndConvertUtils::CheckInteger("input rank", input_rank, kGreaterEqual, kNumber, prim_name);
int64_t last_shape_input = input_shape[input_rank - 1];
int64_t last_second__shape_input = input_shape[input_rank - 2];