!49769 ops InstanceNormV2, SparseFillEmptyRowsGrad, SelfAdjointEig supports dynamic shape feature

Merge pull request !49769 from wang_ziqi/br_instance_norm_v2
This commit is contained in:
i-robot 2023-03-08 07:46:49 +00:00 committed by Gitee
commit 5c287b6496
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
10 changed files with 166 additions and 90 deletions

View File

@ -87,8 +87,8 @@ void InstanceNormV2CpuKernelMod::CollectStatsKernel(const kernel::AddressPtr &x,
template <typename T, template <typename S> class VarTransform> template <typename T, template <typename S> class VarTransform>
void InstanceNormV2CpuKernelMod::UpdateStatsTemplate(const std::vector<kernel::AddressPtr> &inputs, void InstanceNormV2CpuKernelMod::UpdateStatsTemplate(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) { const std::vector<kernel::AddressPtr> &outputs) {
std::vector<float> _var_sum(instance_num, float_init_zero); std::vector<float> _var_sum(instance_num_, float_init_zero);
std::vector<float> _mean_(instance_num, float_init_zero); std::vector<float> _mean_(instance_num_, float_init_zero);
CollectStatsKernel<T>(inputs[kIndex0], _mean_.data(), _var_sum.data()); CollectStatsKernel<T>(inputs[kIndex0], _mean_.data(), _var_sum.data());
const int64_t image_size = x_shape_4d_[kIndex1] * x_shape_4d_[kIndex2]; const int64_t image_size = x_shape_4d_[kIndex1] * x_shape_4d_[kIndex2];
MS_EXCEPTION_IF_ZERO("image_size", image_size); MS_EXCEPTION_IF_ZERO("image_size", image_size);
@ -116,7 +116,7 @@ void InstanceNormV2CpuKernelMod::UpdateStatsTemplate(const std::vector<kernel::A
static_cast<double>(running_var_vec(idx))); static_cast<double>(running_var_vec(idx)));
} }
}; };
CPUKernelUtils::ParallelFor(loop_momentum, instance_num, static_cast<float>(kGrainSize)); CPUKernelUtils::ParallelFor(loop_momentum, instance_num_, static_cast<float>(kGrainSize));
} }
void InstanceNormV2CpuKernelMod::CollectLinearAndConstant(const typename TTypes<float>::Vec &gamma, void InstanceNormV2CpuKernelMod::CollectLinearAndConstant(const typename TTypes<float>::Vec &gamma,
@ -142,7 +142,7 @@ void InstanceNormV2CpuKernelMod::CollectLinearAndConstant(const typename TTypes<
_beta_[idx] = beta(idx) - mean * _alpha_[idx]; _beta_[idx] = beta(idx) - mean * _alpha_[idx];
} }
}; };
CPUKernelUtils::ParallelFor(loop_instance, instance_num, static_cast<float>(kGrainSize)); CPUKernelUtils::ParallelFor(loop_instance, instance_num_, static_cast<float>(kGrainSize));
} }
template <typename T> template <typename T>
@ -151,8 +151,8 @@ void InstanceNormV2CpuKernelMod::TransformInput(const std::vector<kernel::Addres
const int64_t batch = x_shape_4d_[kIndex0]; const int64_t batch = x_shape_4d_[kIndex0];
const int64_t channel = x_shape_4d_[kIndex3]; const int64_t channel = x_shape_4d_[kIndex3];
const int64_t image_size = x_shape_4d_[kIndex1] * x_shape_4d_[kIndex2]; const int64_t image_size = x_shape_4d_[kIndex1] * x_shape_4d_[kIndex2];
std::vector<float> _alpha_(instance_num, float_init_zero); std::vector<float> _alpha_(instance_num_, float_init_zero);
std::vector<float> _beta_(instance_num, float_init_zero); std::vector<float> _beta_(instance_num_, float_init_zero);
std::vector<int64_t> batch_channels_1d_ = {batch_channels_2d_.front() * batch_channels_2d_.back()}; std::vector<int64_t> batch_channels_1d_ = {batch_channels_2d_.front() * batch_channels_2d_.back()};
auto gamma = EigenTensor(batch_channels_1d_, inputs[kIndex1]->addr).vec<float>(); auto gamma = EigenTensor(batch_channels_1d_, inputs[kIndex1]->addr).vec<float>();
auto beta = EigenTensor(batch_channels_1d_, inputs[kIndex2]->addr).vec<float>(); auto beta = EigenTensor(batch_channels_1d_, inputs[kIndex2]->addr).vec<float>();
@ -183,19 +183,26 @@ void InstanceNormV2CpuKernelMod::TransformInput(const std::vector<kernel::Addres
CPUKernelUtils::ParallelFor(loop_transform, batch, block_size); CPUKernelUtils::ParallelFor(loop_transform, batch, block_size);
} }
void InstanceNormV2CpuKernelMod::InitKernel(const CNodePtr &kernel_node) { bool InstanceNormV2CpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
MS_EXCEPTION_IF_NULL(kernel_node); const std::vector<KernelTensorPtr> &outputs) {
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); MS_EXCEPTION_IF_NULL(base_operator);
in_type_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, kIndex0); kernel_name_ = base_operator->name();
std::vector<int64_t> x_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, kIndex0); auto prim = base_operator->GetPrim();
std::vector<int64_t> batch_channels_ = AnfAlgo::GetInputDeviceShape(kernel_node, kIndex1); MS_EXCEPTION_IF_NULL(prim);
if (x_shape_.size() != kDim4 && x_shape_.size() != kDim5) {
MS_EXCEPTION(ValueError) << "For '" << kernel_name_ << "', the dimension of 'x' should be 4D or 5D, but got " CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInstanceNormV2InputsNum, kernel_name_);
<< x_shape_.size() << "D."; CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kInstanceNormV2OutputNum, kernel_name_);
auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs);
auto is_match = MatchKernelAttr(kernel_attr, GetOpSupport());
if (!is_match.first) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it does not support this kernel data type: " << kernel_attr;
} }
is_training_ = common::AnfAlgo::GetNodeAttr<bool>(kernel_node, kAttrIsTraining);
momentum_ = common::AnfAlgo::GetNodeAttr<float>(kernel_node, kAttrMomentum); in_type_ = inputs[kIndex0]->GetDtype();
epsilon_ = common::AnfAlgo::GetNodeAttr<float>(kernel_node, kAttrEpsilon); is_training_ = GetValue<bool>(prim->GetAttr(kAttrIsTraining));
momentum_ = GetValue<float>(prim->GetAttr(kAttrMomentum));
epsilon_ = GetValue<float>(prim->GetAttr(kAttrEpsilon));
if (momentum_ > momentum_max || momentum_ < momentum_min) { if (momentum_ > momentum_max || momentum_ < momentum_min) {
MS_EXCEPTION(ValueError) << "For '" << kernel_name_ MS_EXCEPTION(ValueError) << "For '" << kernel_name_
<< "momentum value should be in [0, 1], but get momentum = " << momentum_ << "."; << "momentum value should be in [0, 1], but get momentum = " << momentum_ << ".";
@ -204,27 +211,44 @@ void InstanceNormV2CpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION(ValueError) << "For '" << kernel_name_ MS_EXCEPTION(ValueError) << "For '" << kernel_name_
<< "epsilon value should be in [0, 1), but get epsilon = " << epsilon_ << "."; << "epsilon value should be in [0, 1), but get epsilon = " << epsilon_ << ".";
} }
input_x_is_4d_ = (x_shape_.size() == kDim4);
return true;
}
int InstanceNormV2CpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs,
const std::map<uint32_t, tensor::TensorPtr> &) {
if (auto ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) {
return ret;
}
std::vector<int64_t> x_shape = inputs[kIndex0]->GetShapeVector();
std::vector<int64_t> batch_channels = inputs[kIndex1]->GetShapeVector();
if (x_shape.size() != kDim4 && x_shape.size() != kDim5) {
MS_EXCEPTION(ValueError) << "For '" << kernel_name_ << "', the dimension of 'x' should be 4D or 5D, but got "
<< x_shape.size() << "D.";
}
input_x_is_4d_ = (x_shape.size() == kDim4);
// Format NCHW could be considered as a situation of format NC1HWC0 when C0 = 1. // Format NCHW could be considered as a situation of format NC1HWC0 when C0 = 1.
if (input_x_is_4d_) { if (input_x_is_4d_) {
// extern (N, C, H, W) to (N, C, H, W, 1) // extern (N, C, H, W) to (N, C, H, W, 1)
x_shape_.push_back(SizeToLong(kDim1)); x_shape.push_back(SizeToLong(kDim1));
// extern (N, C, 1, 1) to (N, C1=C, 1, 1, C0=1) // extern (N, C, 1, 1) to (N, C1=C, 1, 1, C0=1)
batch_channels_.push_back(SizeToLong(kDim1)); batch_channels.push_back(SizeToLong(kDim1));
} }
// consider (N, C1, H, W, C0) as (N*C1, H, W, C0), similar to (N, H, W, C) // consider (N, C1, H, W, C0) as (N*C1, H, W, C0), similar to (N, H, W, C)
x_shape_4d_ = {x_shape_[kIndex0] * x_shape_[kIndex1], x_shape_[kIndex2], x_shape_[kIndex3], x_shape_[kIndex4]}; x_shape_4d_ = {x_shape[kIndex0] * x_shape[kIndex1], x_shape[kIndex2], x_shape[kIndex3], x_shape[kIndex4]};
// consider (N, C1, 1, 1 C0) as (N*C1, 1, 1, C0), similar to (N, 1, 1, C) // consider (N, C1, 1, 1 C0) as (N*C1, 1, 1, C0), similar to (N, 1, 1, C)
batch_channels_2d_ = {batch_channels_[kIndex0] * batch_channels_[kIndex1], batch_channels_[kIndex4]}; batch_channels_2d_ = {batch_channels[kIndex0] * batch_channels[kIndex1], batch_channels[kIndex4]};
instance_num = CPUKernelUtils::CalcElementNum(batch_channels_2d_); instance_num_ = CPUKernelUtils::CalcElementNum(batch_channels_2d_);
return KRET_OK;
} }
bool InstanceNormV2CpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs, bool InstanceNormV2CpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) { const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInstanceNormV2InputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kInstanceNormV2OutputNum, kernel_name_);
bool res = false; bool res = false;
switch (in_type_) { switch (in_type_) {
case kNumberTypeFloat16: case kNumberTypeFloat16:

View File

@ -18,6 +18,7 @@
#include <set> #include <set>
#include <vector> #include <vector>
#include <map>
#include "kernel/common_utils.h" #include "kernel/common_utils.h"
#include "plugin/device/cpu/kernel/cpu_kernel.h" #include "plugin/device/cpu/kernel/cpu_kernel.h"
#include "plugin/device/cpu/kernel/eigen/eigen_common_utils.h" #include "plugin/device/cpu/kernel/eigen/eigen_common_utils.h"
@ -25,13 +26,15 @@
namespace mindspore { namespace mindspore {
namespace kernel { namespace kernel {
class InstanceNormV2CpuKernelMod : public DeprecatedNativeCpuKernelMod { class InstanceNormV2CpuKernelMod : public NativeCpuKernelMod {
public: public:
InstanceNormV2CpuKernelMod() = default; InstanceNormV2CpuKernelMod() = default;
~InstanceNormV2CpuKernelMod() override = default; ~InstanceNormV2CpuKernelMod() override = default;
void InitKernel(const CNodePtr &kernel_node) override; bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs) override;
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override; const std::vector<AddressPtr> &outputs) override;
@ -65,7 +68,7 @@ class InstanceNormV2CpuKernelMod : public DeprecatedNativeCpuKernelMod {
std::vector<int64_t> x_shape_4d_; std::vector<int64_t> x_shape_4d_;
std::vector<int64_t> batch_channels_2d_; std::vector<int64_t> batch_channels_2d_;
bool input_x_is_4d_ = true; bool input_x_is_4d_ = true;
int64_t instance_num = 0; int64_t instance_num_ = 0;
}; };
} // namespace kernel } // namespace kernel
} // namespace mindspore } // namespace mindspore

View File

@ -49,6 +49,7 @@ bool RaggedTensorToTensorCpuKernelMod::Init(const BaseOperatorPtr &base_operator
MS_EXCEPTION_IF_NULL(base_operator); MS_EXCEPTION_IF_NULL(base_operator);
kernel_name_ = base_operator->name(); kernel_name_ = base_operator->name();
row_partition_types_ = GetValue<std::vector<std::string>>(base_operator->GetAttr("row_partition_types")); row_partition_types_ = GetValue<std::vector<std::string>>(base_operator->GetAttr("row_partition_types"));
ragged_rank_ = GetRaggedRank(row_partition_types_);
shape_dtype_ = inputs[kShapeInputIndex]->GetDtype(); shape_dtype_ = inputs[kShapeInputIndex]->GetDtype();
values_dtype_ = inputs[kValueInputIndex]->GetDtype(); values_dtype_ = inputs[kValueInputIndex]->GetDtype();
size_t output_num = outputs.size(); size_t output_num = outputs.size();
@ -66,9 +67,15 @@ int RaggedTensorToTensorCpuKernelMod::Resize(const BaseOperatorPtr &base_operato
values_shape_ = inputs[kValueInputIndex]->GetShapeVector(); values_shape_ = inputs[kValueInputIndex]->GetShapeVector();
default_values_shape_ = inputs[kDefaultValueInputIndex]->GetShapeVector(); default_values_shape_ = inputs[kDefaultValueInputIndex]->GetShapeVector();
output_shape_ = outputs[0]->GetShapeVector(); output_shape_ = outputs[0]->GetShapeVector();
if (ragged_rank_ + values_shape_.size() != output_shape_.size()) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', row partition size plus 'values' rank should be equal to 'shape' rank: "
<< output_shape_.size() << ", but got row partition size: " << ragged_rank_
<< ", 'values' rank: " << values_shape_.size();
}
row_partition_shape_list_.clear(); row_partition_shape_list_.clear();
for (int i = 0; i < SizeToLong(row_partition_types_.size()); ++i) { for (int i = 0; i < ragged_rank_; ++i) {
row_partition_shape_list_.push_back(inputs[kFirstPartitionInputIndex + i]->GetShapeVector()); row_partition_shape_list_.emplace_back(inputs[kFirstPartitionInputIndex + i]->GetShapeVector());
} }
return KRET_OK; return KRET_OK;
} }
@ -123,16 +130,9 @@ bool RaggedTensorToTensorCpuKernelMod::Launch(const std::vector<kernel::AddressP
template <typename TYPE1, typename TYPE2> template <typename TYPE1, typename TYPE2>
void RaggedTensorToTensorCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, void RaggedTensorToTensorCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) { const std::vector<kernel::AddressPtr> &outputs) {
int ragged_rank_ = GetRaggedRank(row_partition_types_);
TYPE1 first_dimension; TYPE1 first_dimension;
GetFirstDimension<TYPE1>(inputs, &first_dimension); GetFirstDimension<TYPE1>(inputs, &first_dimension);
std::vector<TYPE1> output_size; std::vector<TYPE1> output_size;
if (ragged_rank_ + values_shape_.size() != output_shape_.size()) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', row partition size plus 'values' rank should be equal to 'shape' rank: "
<< output_shape_.size() << ", but got row partition size: " << ragged_rank_
<< ", 'values' rank: " << values_shape_.size();
}
output_size.reserve(output_shape_.size()); output_size.reserve(output_shape_.size());
for (unsigned int dim = 0; dim < output_shape_.size(); dim++) { for (unsigned int dim = 0; dim < output_shape_.size(); dim++) {
output_size.push_back(output_shape_[dim]); output_size.push_back(output_shape_[dim]);

View File

@ -78,6 +78,7 @@ class RaggedTensorToTensorCpuKernelMod : public NativeCpuKernelMod {
template <typename TYPE1> template <typename TYPE1>
void GetFirstDimension(const std::vector<kernel::AddressPtr> &inputs, TYPE1 *first_dim); void GetFirstDimension(const std::vector<kernel::AddressPtr> &inputs, TYPE1 *first_dim);
int64_t ragged_rank_;
TypeId shape_dtype_{kTypeUnknown}; TypeId shape_dtype_{kTypeUnknown};
TypeId values_dtype_{kTypeUnknown}; TypeId values_dtype_{kTypeUnknown};
std::vector<int64_t> values_shape_; std::vector<int64_t> values_shape_;

View File

@ -25,19 +25,42 @@ constexpr auto kSelfAdjopintEig = "SelfAdjopintEig";
constexpr const size_t kInputsNum = 1; constexpr const size_t kInputsNum = 1;
constexpr const size_t kOutputsNum = 2; constexpr const size_t kOutputsNum = 2;
void SelfAdjointEigCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { bool SelfAdjointEigCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
MS_EXCEPTION_IF_NULL(kernel_node); const std::vector<KernelTensorPtr> &outputs) {
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); MS_EXCEPTION_IF_NULL(base_operator);
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); kernel_name_ = base_operator->name();
attr_ = common::AnfAlgo::GetNodeAttr<bool>(kernel_node, "compute_v"); auto prim = base_operator->GetPrim();
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); MS_EXCEPTION_IF_NULL(prim);
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOutputsNum, kernel_name_);
auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs);
auto is_match = MatchKernelAttr(kernel_attr, GetOpSupport());
if (!is_match.first) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it does not support this kernel data type: " << kernel_attr;
}
dtype_ = inputs[kIndex0]->GetDtype();
compute_v_ = GetValue<bool>(prim->GetAttr("compute_v"));
return true;
}
int SelfAdjointEigCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs,
const std::map<uint32_t, tensor::TensorPtr> &) {
if (auto ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) {
return ret;
}
input_shape_ = inputs[kIndex0]->GetShapeVector();
return KRET_OK;
} }
bool SelfAdjointEigCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs, bool SelfAdjointEigCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &, const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) { const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInputsNum, kernel_name_);
// CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOutputsNum, kernel_name_);
if (dtype_ == kNumberTypeFloat32) { if (dtype_ == kNumberTypeFloat32) {
LaunchKernel<float>(inputs, outputs); LaunchKernel<float>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat64) { } else if (dtype_ == kNumberTypeFloat64) {
@ -59,7 +82,7 @@ bool SelfAdjointEigCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressP
auto *input = reinterpret_cast<T *>(inputs[kIndex0]->addr); auto *input = reinterpret_cast<T *>(inputs[kIndex0]->addr);
auto *output0 = reinterpret_cast<T *>(outputs[kIndex0]->addr); auto *output0 = reinterpret_cast<T *>(outputs[kIndex0]->addr);
auto *output1 = reinterpret_cast<T *>(outputs[kIndex1]->addr); auto *output1 = reinterpret_cast<T *>(outputs[kIndex1]->addr);
bool attr0_ = attr_; bool attr0_ = compute_v_;
// The size of each dimension // The size of each dimension
std::vector<int64_t> shape = input_shape_; std::vector<int64_t> shape = input_shape_;
// rank // rank

View File

@ -28,12 +28,15 @@
namespace mindspore { namespace mindspore {
namespace kernel { namespace kernel {
class SelfAdjointEigCpuKernelMod : public DeprecatedNativeCpuKernelMod { class SelfAdjointEigCpuKernelMod : public NativeCpuKernelMod {
public: public:
SelfAdjointEigCpuKernelMod() = default; SelfAdjointEigCpuKernelMod() = default;
~SelfAdjointEigCpuKernelMod() override = default; ~SelfAdjointEigCpuKernelMod() override = default;
void InitKernel(const CNodePtr &kernel_node) override; bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs) override;
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override; const std::vector<AddressPtr> &outputs) override;
@ -48,7 +51,7 @@ class SelfAdjointEigCpuKernelMod : public DeprecatedNativeCpuKernelMod {
static std::vector<std::pair<KernelAttr, SelfAdjointEigLaunchFunc>> func_list_; static std::vector<std::pair<KernelAttr, SelfAdjointEigLaunchFunc>> func_list_;
TypeId dtype_{kTypeUnknown}; TypeId dtype_{kTypeUnknown};
std::vector<int64_t> input_shape_; std::vector<int64_t> input_shape_;
bool attr_; bool compute_v_;
}; };
} // namespace kernel } // namespace kernel
} // namespace mindspore } // namespace mindspore

View File

@ -34,15 +34,10 @@ constexpr size_t kSparseFillEmptyRowsGradOutputsNum = 2;
constexpr size_t kReverseIndexMapSizeNum = 1; constexpr size_t kReverseIndexMapSizeNum = 1;
constexpr size_t kGradValuesSizeNum = 1; constexpr size_t kGradValuesSizeNum = 1;
const uint32_t kInput_reverse_index_map = 0;
const uint32_t kInput_grad_values = 1;
const uint32_t kOutput_y_values = 0; const uint32_t kOutput_y_values = 0;
const uint32_t kOutput_y_default_value = 1; const uint32_t kOutput_y_default_value = 1;
constexpr char kKernelName[] = "SparseFillEmptyRows"; constexpr char kKernelName[] = "SparseFillEmptyRows";
#define EIGEN_SHAPE_CAST(INPUT) static_cast<Eigen::DenseIndex>(AnfAlgo::GetInputDeviceShape(node_ptr, INPUT)[0])
#define SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(DTYPE, TYPE) \ #define SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(DTYPE, TYPE) \
case (DTYPE): { \ case (DTYPE): { \
ret = LaunchKernel<TYPE>(inputs, outputs); \ ret = LaunchKernel<TYPE>(inputs, outputs); \
@ -50,41 +45,61 @@ constexpr char kKernelName[] = "SparseFillEmptyRows";
} }
} // namespace } // namespace
void SparseFillEmptyRowsGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { bool SparseFillEmptyRowsGradCpuKernelMod::Init(const BaseOperatorPtr &base_operator,
node_ptr = kernel_node; const std::vector<KernelTensorPtr> &inputs,
MS_EXCEPTION_IF_NULL(node_ptr); const std::vector<KernelTensorPtr> &outputs) {
output_y_values_type_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, 0); MS_EXCEPTION_IF_NULL(base_operator);
output_y_default_value_type_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, 1); kernel_name_ = base_operator->name();
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); auto prim = base_operator->GetPrim();
size_t input_num = common::AnfAlgo::GetInputTensorNum(node_ptr); MS_EXCEPTION_IF_NULL(prim);
CHECK_KERNEL_INPUTS_NUM(input_num, kSparseFillEmptyRowsGradInputsNum, kernel_name_);
size_t output_num = AnfAlgo::GetOutputTensorNum(node_ptr); CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseFillEmptyRowsGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(output_num, kSparseFillEmptyRowsGradOutputsNum, kernel_name_); CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSparseFillEmptyRowsGradOutputsNum, kernel_name_);
const auto reverse_index_map_shape = AnfAlgo::GetInputDeviceShape(node_ptr, 0);
const auto grad_values_shape = AnfAlgo::GetInputDeviceShape(node_ptr, 1); auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs);
if (reverse_index_map_shape.size() != kReverseIndexMapSizeNum && reverse_index_map_shape[0] > grad_values_shape[0]) { auto is_match = MatchKernelAttr(kernel_attr, GetOpSupport());
if (!is_match.first) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it does not support this kernel data type: " << kernel_attr;
}
output_y_values_type_ = inputs[kIndex0]->GetDtype();
output_y_default_value_type_ = inputs[kIndex1]->GetDtype();
return true;
}
int SparseFillEmptyRowsGradCpuKernelMod::Resize(const BaseOperatorPtr &base_operator,
const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs,
const std::map<uint32_t, tensor::TensorPtr> &) {
if (auto ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) {
return ret;
}
reverse_index_map_shape_ = inputs[kIndex0]->GetShapeVector();
grad_values_shape_ = inputs[kIndex1]->GetShapeVector();
if (reverse_index_map_shape_.size() != kReverseIndexMapSizeNum &&
reverse_index_map_shape_[0] > grad_values_shape_[0]) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', it requires 'reverse_index_map' must be a 1-D Tensor and the first dimension length " << "', it requires 'reverse_index_map' must be a 1-D Tensor and the first dimension length "
"must be smalll or equal to the first dimension length of 'values' " "must be smalll or equal to the first dimension length of 'values' "
<< Vector2Str(reverse_index_map_shape); << Vector2Str(reverse_index_map_shape_);
} }
if (grad_values_shape.size() != kGradValuesSizeNum) { if (grad_values_shape_.size() != kGradValuesSizeNum) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it requires 'grad_values' must be a 1-D Tensor " MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it requires 'grad_values' must be a 1-D Tensor "
<< Vector2Str(grad_values_shape); << Vector2Str(grad_values_shape_);
} }
return KRET_OK;
} }
template <typename T> template <typename T>
bool SparseFillEmptyRowsGradCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, bool SparseFillEmptyRowsGradCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) { const std::vector<kernel::AddressPtr> &outputs) {
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseFillEmptyRowsGradInputsNum, kernel_name_);
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSparseFillEmptyRowsGradOutputsNum, kernel_name_);
auto reverse_index_map_ptr = reinterpret_cast<int64_t *>(inputs[0]->addr); auto reverse_index_map_ptr = reinterpret_cast<int64_t *>(inputs[0]->addr);
auto grad_values_ptr = reinterpret_cast<T *>(inputs[1]->addr); auto grad_values_ptr = reinterpret_cast<T *>(inputs[1]->addr);
const auto reverse_index_map_shape = AnfAlgo::GetInputDeviceShape(node_ptr, 0); const int64_t N = reverse_index_map_shape_[0];
const auto grad_values_shape = AnfAlgo::GetInputDeviceShape(node_ptr, 1); const int64_t N_full = grad_values_shape_[0];
const int64_t N = reverse_index_map_shape[0];
const int64_t N_full = grad_values_shape[0];
auto y_values_ptr = reinterpret_cast<T *>(outputs[kOutput_y_values]->addr); auto y_values_ptr = reinterpret_cast<T *>(outputs[kOutput_y_values]->addr);
auto ret1 = memset_s(y_values_ptr, N * sizeof(T), 0, N * sizeof(T)); auto ret1 = memset_s(y_values_ptr, N * sizeof(T), 0, N * sizeof(T));
@ -112,12 +127,7 @@ bool SparseFillEmptyRowsGradCpuKernelMod::LaunchKernel(const std::vector<kernel:
*y_default_value += grad_values_ptr[j]; *y_default_value += grad_values_ptr[j];
} }
} }
ShapeVector output_y_values_shape;
ShapeVector output_y_default_value_shape = {};
output_y_values_shape.push_back(N);
common::AnfAlgo::SetOutputInferTypeAndShape({output_y_values_type_, output_y_default_value_type_},
{output_y_values_shape, output_y_default_value_shape},
cnode_ptr_.lock().get());
return true; return true;
} }
@ -199,7 +209,7 @@ bool SparseFillEmptyRowsGradCpuKernelMod::Launch(const std::vector<AddressPtr> &
const std::vector<AddressPtr> &workspace, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) { const std::vector<AddressPtr> &outputs) {
bool ret = false; bool ret = false;
auto data_type = AnfAlgo::GetInputDeviceDataType(node_ptr, kInput_grad_values); auto data_type = output_y_default_value_type_;
switch (data_type) { switch (data_type) {
SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(kNumberTypeInt8, int8_t) SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(kNumberTypeInt8, int8_t)
SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(kNumberTypeUInt8, uint8_t) SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(kNumberTypeUInt8, uint8_t)

View File

@ -17,17 +17,21 @@
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSEFILLEMPTYROWSGRAD_CPU_KERNEL_H_ #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSEFILLEMPTYROWSGRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSEFILLEMPTYROWSGRAD_CPU_KERNEL_H_ #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSEFILLEMPTYROWSGRAD_CPU_KERNEL_H_
#include <vector> #include <vector>
#include <map>
#include "plugin/device/cpu/kernel/cpu_kernel.h" #include "plugin/device/cpu/kernel/cpu_kernel.h"
#include "plugin/factory/ms_factory.h" #include "plugin/factory/ms_factory.h"
namespace mindspore { namespace mindspore {
namespace kernel { namespace kernel {
class SparseFillEmptyRowsGradCpuKernelMod : public DeprecatedNativeCpuKernelMod { class SparseFillEmptyRowsGradCpuKernelMod : public NativeCpuKernelMod {
public: public:
SparseFillEmptyRowsGradCpuKernelMod() = default; SparseFillEmptyRowsGradCpuKernelMod() = default;
~SparseFillEmptyRowsGradCpuKernelMod() override = default; ~SparseFillEmptyRowsGradCpuKernelMod() override = default;
void InitKernel(const CNodePtr &kernel_node) override; bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs) override;
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override; const std::vector<AddressPtr> &outputs) override;
@ -38,9 +42,10 @@ class SparseFillEmptyRowsGradCpuKernelMod : public DeprecatedNativeCpuKernelMod
template <typename T> template <typename T>
bool LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs); bool LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);
CNodePtr node_ptr;
TypeId output_y_values_type_; TypeId output_y_values_type_;
TypeId output_y_default_value_type_; TypeId output_y_default_value_type_;
ShapeVector reverse_index_map_shape_;
ShapeVector grad_values_shape_;
}; };
} // namespace kernel } // namespace kernel
} // namespace mindspore } // namespace mindspore

View File

@ -52,7 +52,7 @@ BaseShapePtr RaggedTensorToTensorInferShape(const PrimitivePtr &primitive,
if (IsDynamic(shape_shape) || IsDynamicRank(values_shape) || IsDynamicRank(default_value_shape) || if (IsDynamic(shape_shape) || IsDynamicRank(values_shape) || IsDynamicRank(default_value_shape) ||
IsDynamicRank(tensor0_shape)) { IsDynamicRank(tensor0_shape)) {
return std::make_shared<abstract::Shape>(ShapeVector{abstract::Shape::kShapeRankAny}); return std::make_shared<abstract::Shape>(output_shape);
} }
CheckAndConvertUtils::CheckInteger("dimension of 'shape'", SizeToLong(shape_shape.size()), kEqual, 1, prim_name); CheckAndConvertUtils::CheckInteger("dimension of 'shape'", SizeToLong(shape_shape.size()), kEqual, 1, prim_name);

View File

@ -36,6 +36,13 @@ abstract::TupleShapePtr SelfAdjointEigInferShape(const PrimitivePtr &primitive,
auto x = input_args[0]->BuildShape(); auto x = input_args[0]->BuildShape();
auto input_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(x)[kShape]; auto input_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(x)[kShape];
auto input_rank = SizeToLong(input_shape.size()); auto input_rank = SizeToLong(input_shape.size());
if (IsDynamicRank(input_shape)) {
auto unknow_shape_ptr = std::make_shared<abstract::Shape>(ShapeVector{abstract::Shape::kShapeRankAny});
return std::make_shared<abstract::TupleShape>(
std::vector<abstract::BaseShapePtr>{unknow_shape_ptr, unknow_shape_ptr});
}
CheckAndConvertUtils::CheckInteger("input rank", input_rank, kGreaterEqual, kNumber, prim_name); CheckAndConvertUtils::CheckInteger("input rank", input_rank, kGreaterEqual, kNumber, prim_name);
int64_t last_shape_input = input_shape[input_rank - 1]; int64_t last_shape_input = input_shape[input_rank - 1];
int64_t last_second__shape_input = input_shape[input_rank - 2]; int64_t last_second__shape_input = input_shape[input_rank - 2];