forked from mindspore-Ecosystem/mindspore
ops InstanceNormV2, SparseFillEmptyRowsGrad, SelfAdjointEig supports dynamic shape feature
type: feature reason: add codes to support dynamic shape for InstanceNormV2, SparseFillEmptyRowsGradx, SelfAdjointEig. ------ Signed-off-by: wang_ziqi <wangziqi4@huawei.com>
This commit is contained in:
parent
5c9e9861fc
commit
badea48451
|
@ -87,8 +87,8 @@ void InstanceNormV2CpuKernelMod::CollectStatsKernel(const kernel::AddressPtr &x,
|
|||
template <typename T, template <typename S> class VarTransform>
|
||||
void InstanceNormV2CpuKernelMod::UpdateStatsTemplate(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
std::vector<float> _var_sum(instance_num, float_init_zero);
|
||||
std::vector<float> _mean_(instance_num, float_init_zero);
|
||||
std::vector<float> _var_sum(instance_num_, float_init_zero);
|
||||
std::vector<float> _mean_(instance_num_, float_init_zero);
|
||||
CollectStatsKernel<T>(inputs[kIndex0], _mean_.data(), _var_sum.data());
|
||||
const int64_t image_size = x_shape_4d_[kIndex1] * x_shape_4d_[kIndex2];
|
||||
MS_EXCEPTION_IF_ZERO("image_size", image_size);
|
||||
|
@ -116,7 +116,7 @@ void InstanceNormV2CpuKernelMod::UpdateStatsTemplate(const std::vector<kernel::A
|
|||
static_cast<double>(running_var_vec(idx)));
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(loop_momentum, instance_num, static_cast<float>(kGrainSize));
|
||||
CPUKernelUtils::ParallelFor(loop_momentum, instance_num_, static_cast<float>(kGrainSize));
|
||||
}
|
||||
|
||||
void InstanceNormV2CpuKernelMod::CollectLinearAndConstant(const typename TTypes<float>::Vec &gamma,
|
||||
|
@ -142,7 +142,7 @@ void InstanceNormV2CpuKernelMod::CollectLinearAndConstant(const typename TTypes<
|
|||
_beta_[idx] = beta(idx) - mean * _alpha_[idx];
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(loop_instance, instance_num, static_cast<float>(kGrainSize));
|
||||
CPUKernelUtils::ParallelFor(loop_instance, instance_num_, static_cast<float>(kGrainSize));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -151,8 +151,8 @@ void InstanceNormV2CpuKernelMod::TransformInput(const std::vector<kernel::Addres
|
|||
const int64_t batch = x_shape_4d_[kIndex0];
|
||||
const int64_t channel = x_shape_4d_[kIndex3];
|
||||
const int64_t image_size = x_shape_4d_[kIndex1] * x_shape_4d_[kIndex2];
|
||||
std::vector<float> _alpha_(instance_num, float_init_zero);
|
||||
std::vector<float> _beta_(instance_num, float_init_zero);
|
||||
std::vector<float> _alpha_(instance_num_, float_init_zero);
|
||||
std::vector<float> _beta_(instance_num_, float_init_zero);
|
||||
std::vector<int64_t> batch_channels_1d_ = {batch_channels_2d_.front() * batch_channels_2d_.back()};
|
||||
auto gamma = EigenTensor(batch_channels_1d_, inputs[kIndex1]->addr).vec<float>();
|
||||
auto beta = EigenTensor(batch_channels_1d_, inputs[kIndex2]->addr).vec<float>();
|
||||
|
@ -183,19 +183,26 @@ void InstanceNormV2CpuKernelMod::TransformInput(const std::vector<kernel::Addres
|
|||
CPUKernelUtils::ParallelFor(loop_transform, batch, block_size);
|
||||
}
|
||||
|
||||
void InstanceNormV2CpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
in_type_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, kIndex0);
|
||||
std::vector<int64_t> x_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, kIndex0);
|
||||
std::vector<int64_t> batch_channels_ = AnfAlgo::GetInputDeviceShape(kernel_node, kIndex1);
|
||||
if (x_shape_.size() != kDim4 && x_shape_.size() != kDim5) {
|
||||
MS_EXCEPTION(ValueError) << "For '" << kernel_name_ << "', the dimension of 'x' should be 4D or 5D, but got "
|
||||
<< x_shape_.size() << "D.";
|
||||
bool InstanceNormV2CpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) {
|
||||
MS_EXCEPTION_IF_NULL(base_operator);
|
||||
kernel_name_ = base_operator->name();
|
||||
auto prim = base_operator->GetPrim();
|
||||
MS_EXCEPTION_IF_NULL(prim);
|
||||
|
||||
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInstanceNormV2InputsNum, kernel_name_);
|
||||
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kInstanceNormV2OutputNum, kernel_name_);
|
||||
|
||||
auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs);
|
||||
auto is_match = MatchKernelAttr(kernel_attr, GetOpSupport());
|
||||
if (!is_match.first) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it does not support this kernel data type: " << kernel_attr;
|
||||
}
|
||||
is_training_ = common::AnfAlgo::GetNodeAttr<bool>(kernel_node, kAttrIsTraining);
|
||||
momentum_ = common::AnfAlgo::GetNodeAttr<float>(kernel_node, kAttrMomentum);
|
||||
epsilon_ = common::AnfAlgo::GetNodeAttr<float>(kernel_node, kAttrEpsilon);
|
||||
|
||||
in_type_ = inputs[kIndex0]->GetDtype();
|
||||
is_training_ = GetValue<bool>(prim->GetAttr(kAttrIsTraining));
|
||||
momentum_ = GetValue<float>(prim->GetAttr(kAttrMomentum));
|
||||
epsilon_ = GetValue<float>(prim->GetAttr(kAttrEpsilon));
|
||||
if (momentum_ > momentum_max || momentum_ < momentum_min) {
|
||||
MS_EXCEPTION(ValueError) << "For '" << kernel_name_
|
||||
<< "momentum value should be in [0, 1], but get momentum = " << momentum_ << ".";
|
||||
|
@ -204,27 +211,44 @@ void InstanceNormV2CpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
|||
MS_EXCEPTION(ValueError) << "For '" << kernel_name_
|
||||
<< "epsilon value should be in [0, 1), but get epsilon = " << epsilon_ << ".";
|
||||
}
|
||||
input_x_is_4d_ = (x_shape_.size() == kDim4);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int InstanceNormV2CpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &) {
|
||||
if (auto ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<int64_t> x_shape = inputs[kIndex0]->GetShapeVector();
|
||||
std::vector<int64_t> batch_channels = inputs[kIndex1]->GetShapeVector();
|
||||
|
||||
if (x_shape.size() != kDim4 && x_shape.size() != kDim5) {
|
||||
MS_EXCEPTION(ValueError) << "For '" << kernel_name_ << "', the dimension of 'x' should be 4D or 5D, but got "
|
||||
<< x_shape.size() << "D.";
|
||||
}
|
||||
input_x_is_4d_ = (x_shape.size() == kDim4);
|
||||
// Format NCHW could be considered as a situation of format NC1HWC0 when C0 = 1.
|
||||
if (input_x_is_4d_) {
|
||||
// extern (N, C, H, W) to (N, C, H, W, 1)
|
||||
x_shape_.push_back(SizeToLong(kDim1));
|
||||
x_shape.push_back(SizeToLong(kDim1));
|
||||
// extern (N, C, 1, 1) to (N, C1=C, 1, 1, C0=1)
|
||||
batch_channels_.push_back(SizeToLong(kDim1));
|
||||
batch_channels.push_back(SizeToLong(kDim1));
|
||||
}
|
||||
// consider (N, C1, H, W, C0) as (N*C1, H, W, C0), similar to (N, H, W, C)
|
||||
x_shape_4d_ = {x_shape_[kIndex0] * x_shape_[kIndex1], x_shape_[kIndex2], x_shape_[kIndex3], x_shape_[kIndex4]};
|
||||
x_shape_4d_ = {x_shape[kIndex0] * x_shape[kIndex1], x_shape[kIndex2], x_shape[kIndex3], x_shape[kIndex4]};
|
||||
// consider (N, C1, 1, 1 C0) as (N*C1, 1, 1, C0), similar to (N, 1, 1, C)
|
||||
batch_channels_2d_ = {batch_channels_[kIndex0] * batch_channels_[kIndex1], batch_channels_[kIndex4]};
|
||||
instance_num = CPUKernelUtils::CalcElementNum(batch_channels_2d_);
|
||||
batch_channels_2d_ = {batch_channels[kIndex0] * batch_channels[kIndex1], batch_channels[kIndex4]};
|
||||
instance_num_ = CPUKernelUtils::CalcElementNum(batch_channels_2d_);
|
||||
|
||||
return KRET_OK;
|
||||
}
|
||||
|
||||
bool InstanceNormV2CpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInstanceNormV2InputsNum, kernel_name_);
|
||||
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kInstanceNormV2OutputNum, kernel_name_);
|
||||
|
||||
bool res = false;
|
||||
switch (in_type_) {
|
||||
case kNumberTypeFloat16:
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "kernel/common_utils.h"
|
||||
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
||||
#include "plugin/device/cpu/kernel/eigen/eigen_common_utils.h"
|
||||
|
@ -25,13 +26,15 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class InstanceNormV2CpuKernelMod : public DeprecatedNativeCpuKernelMod {
|
||||
class InstanceNormV2CpuKernelMod : public NativeCpuKernelMod {
|
||||
public:
|
||||
InstanceNormV2CpuKernelMod() = default;
|
||||
~InstanceNormV2CpuKernelMod() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) override;
|
||||
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
|
@ -65,7 +68,7 @@ class InstanceNormV2CpuKernelMod : public DeprecatedNativeCpuKernelMod {
|
|||
std::vector<int64_t> x_shape_4d_;
|
||||
std::vector<int64_t> batch_channels_2d_;
|
||||
bool input_x_is_4d_ = true;
|
||||
int64_t instance_num = 0;
|
||||
int64_t instance_num_ = 0;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -49,6 +49,7 @@ bool RaggedTensorToTensorCpuKernelMod::Init(const BaseOperatorPtr &base_operator
|
|||
MS_EXCEPTION_IF_NULL(base_operator);
|
||||
kernel_name_ = base_operator->name();
|
||||
row_partition_types_ = GetValue<std::vector<std::string>>(base_operator->GetAttr("row_partition_types"));
|
||||
ragged_rank_ = GetRaggedRank(row_partition_types_);
|
||||
shape_dtype_ = inputs[kShapeInputIndex]->GetDtype();
|
||||
values_dtype_ = inputs[kValueInputIndex]->GetDtype();
|
||||
size_t output_num = outputs.size();
|
||||
|
@ -66,9 +67,15 @@ int RaggedTensorToTensorCpuKernelMod::Resize(const BaseOperatorPtr &base_operato
|
|||
values_shape_ = inputs[kValueInputIndex]->GetShapeVector();
|
||||
default_values_shape_ = inputs[kDefaultValueInputIndex]->GetShapeVector();
|
||||
output_shape_ = outputs[0]->GetShapeVector();
|
||||
if (ragged_rank_ + values_shape_.size() != output_shape_.size()) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', row partition size plus 'values' rank should be equal to 'shape' rank: "
|
||||
<< output_shape_.size() << ", but got row partition size: " << ragged_rank_
|
||||
<< ", 'values' rank: " << values_shape_.size();
|
||||
}
|
||||
row_partition_shape_list_.clear();
|
||||
for (int i = 0; i < SizeToLong(row_partition_types_.size()); ++i) {
|
||||
row_partition_shape_list_.push_back(inputs[kFirstPartitionInputIndex + i]->GetShapeVector());
|
||||
for (int i = 0; i < ragged_rank_; ++i) {
|
||||
row_partition_shape_list_.emplace_back(inputs[kFirstPartitionInputIndex + i]->GetShapeVector());
|
||||
}
|
||||
return KRET_OK;
|
||||
}
|
||||
|
@ -123,16 +130,9 @@ bool RaggedTensorToTensorCpuKernelMod::Launch(const std::vector<kernel::AddressP
|
|||
template <typename TYPE1, typename TYPE2>
|
||||
void RaggedTensorToTensorCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
int ragged_rank_ = GetRaggedRank(row_partition_types_);
|
||||
TYPE1 first_dimension;
|
||||
GetFirstDimension<TYPE1>(inputs, &first_dimension);
|
||||
std::vector<TYPE1> output_size;
|
||||
if (ragged_rank_ + values_shape_.size() != output_shape_.size()) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', row partition size plus 'values' rank should be equal to 'shape' rank: "
|
||||
<< output_shape_.size() << ", but got row partition size: " << ragged_rank_
|
||||
<< ", 'values' rank: " << values_shape_.size();
|
||||
}
|
||||
output_size.reserve(output_shape_.size());
|
||||
for (unsigned int dim = 0; dim < output_shape_.size(); dim++) {
|
||||
output_size.push_back(output_shape_[dim]);
|
||||
|
|
|
@ -78,6 +78,7 @@ class RaggedTensorToTensorCpuKernelMod : public NativeCpuKernelMod {
|
|||
template <typename TYPE1>
|
||||
void GetFirstDimension(const std::vector<kernel::AddressPtr> &inputs, TYPE1 *first_dim);
|
||||
|
||||
int64_t ragged_rank_;
|
||||
TypeId shape_dtype_{kTypeUnknown};
|
||||
TypeId values_dtype_{kTypeUnknown};
|
||||
std::vector<int64_t> values_shape_;
|
||||
|
|
|
@ -25,19 +25,42 @@ constexpr auto kSelfAdjopintEig = "SelfAdjopintEig";
|
|||
constexpr const size_t kInputsNum = 1;
|
||||
constexpr const size_t kOutputsNum = 2;
|
||||
|
||||
void SelfAdjointEigCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
|
||||
attr_ = common::AnfAlgo::GetNodeAttr<bool>(kernel_node, "compute_v");
|
||||
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
bool SelfAdjointEigCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) {
|
||||
MS_EXCEPTION_IF_NULL(base_operator);
|
||||
kernel_name_ = base_operator->name();
|
||||
auto prim = base_operator->GetPrim();
|
||||
MS_EXCEPTION_IF_NULL(prim);
|
||||
|
||||
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInputsNum, kernel_name_);
|
||||
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOutputsNum, kernel_name_);
|
||||
|
||||
auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs);
|
||||
auto is_match = MatchKernelAttr(kernel_attr, GetOpSupport());
|
||||
if (!is_match.first) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it does not support this kernel data type: " << kernel_attr;
|
||||
}
|
||||
|
||||
dtype_ = inputs[kIndex0]->GetDtype();
|
||||
compute_v_ = GetValue<bool>(prim->GetAttr("compute_v"));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int SelfAdjointEigCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &) {
|
||||
if (auto ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) {
|
||||
return ret;
|
||||
}
|
||||
input_shape_ = inputs[kIndex0]->GetShapeVector();
|
||||
|
||||
return KRET_OK;
|
||||
}
|
||||
|
||||
bool SelfAdjointEigCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInputsNum, kernel_name_);
|
||||
// CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOutputsNum, kernel_name_);
|
||||
if (dtype_ == kNumberTypeFloat32) {
|
||||
LaunchKernel<float>(inputs, outputs);
|
||||
} else if (dtype_ == kNumberTypeFloat64) {
|
||||
|
@ -59,7 +82,7 @@ bool SelfAdjointEigCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressP
|
|||
auto *input = reinterpret_cast<T *>(inputs[kIndex0]->addr);
|
||||
auto *output0 = reinterpret_cast<T *>(outputs[kIndex0]->addr);
|
||||
auto *output1 = reinterpret_cast<T *>(outputs[kIndex1]->addr);
|
||||
bool attr0_ = attr_;
|
||||
bool attr0_ = compute_v_;
|
||||
// The size of each dimension
|
||||
std::vector<int64_t> shape = input_shape_;
|
||||
// rank
|
||||
|
|
|
@ -28,12 +28,15 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class SelfAdjointEigCpuKernelMod : public DeprecatedNativeCpuKernelMod {
|
||||
class SelfAdjointEigCpuKernelMod : public NativeCpuKernelMod {
|
||||
public:
|
||||
SelfAdjointEigCpuKernelMod() = default;
|
||||
~SelfAdjointEigCpuKernelMod() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) override;
|
||||
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
|
@ -48,7 +51,7 @@ class SelfAdjointEigCpuKernelMod : public DeprecatedNativeCpuKernelMod {
|
|||
static std::vector<std::pair<KernelAttr, SelfAdjointEigLaunchFunc>> func_list_;
|
||||
TypeId dtype_{kTypeUnknown};
|
||||
std::vector<int64_t> input_shape_;
|
||||
bool attr_;
|
||||
bool compute_v_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -34,15 +34,10 @@ constexpr size_t kSparseFillEmptyRowsGradOutputsNum = 2;
|
|||
constexpr size_t kReverseIndexMapSizeNum = 1;
|
||||
constexpr size_t kGradValuesSizeNum = 1;
|
||||
|
||||
const uint32_t kInput_reverse_index_map = 0;
|
||||
const uint32_t kInput_grad_values = 1;
|
||||
|
||||
const uint32_t kOutput_y_values = 0;
|
||||
const uint32_t kOutput_y_default_value = 1;
|
||||
constexpr char kKernelName[] = "SparseFillEmptyRows";
|
||||
|
||||
#define EIGEN_SHAPE_CAST(INPUT) static_cast<Eigen::DenseIndex>(AnfAlgo::GetInputDeviceShape(node_ptr, INPUT)[0])
|
||||
|
||||
#define SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(DTYPE, TYPE) \
|
||||
case (DTYPE): { \
|
||||
ret = LaunchKernel<TYPE>(inputs, outputs); \
|
||||
|
@ -50,41 +45,61 @@ constexpr char kKernelName[] = "SparseFillEmptyRows";
|
|||
}
|
||||
} // namespace
|
||||
|
||||
void SparseFillEmptyRowsGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
||||
node_ptr = kernel_node;
|
||||
MS_EXCEPTION_IF_NULL(node_ptr);
|
||||
output_y_values_type_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, 0);
|
||||
output_y_default_value_type_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, 1);
|
||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_num = common::AnfAlgo::GetInputTensorNum(node_ptr);
|
||||
CHECK_KERNEL_INPUTS_NUM(input_num, kSparseFillEmptyRowsGradInputsNum, kernel_name_);
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(node_ptr);
|
||||
CHECK_KERNEL_OUTPUTS_NUM(output_num, kSparseFillEmptyRowsGradOutputsNum, kernel_name_);
|
||||
const auto reverse_index_map_shape = AnfAlgo::GetInputDeviceShape(node_ptr, 0);
|
||||
const auto grad_values_shape = AnfAlgo::GetInputDeviceShape(node_ptr, 1);
|
||||
if (reverse_index_map_shape.size() != kReverseIndexMapSizeNum && reverse_index_map_shape[0] > grad_values_shape[0]) {
|
||||
bool SparseFillEmptyRowsGradCpuKernelMod::Init(const BaseOperatorPtr &base_operator,
|
||||
const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) {
|
||||
MS_EXCEPTION_IF_NULL(base_operator);
|
||||
kernel_name_ = base_operator->name();
|
||||
auto prim = base_operator->GetPrim();
|
||||
MS_EXCEPTION_IF_NULL(prim);
|
||||
|
||||
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseFillEmptyRowsGradInputsNum, kernel_name_);
|
||||
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSparseFillEmptyRowsGradOutputsNum, kernel_name_);
|
||||
|
||||
auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs);
|
||||
auto is_match = MatchKernelAttr(kernel_attr, GetOpSupport());
|
||||
if (!is_match.first) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it does not support this kernel data type: " << kernel_attr;
|
||||
}
|
||||
|
||||
output_y_values_type_ = inputs[kIndex0]->GetDtype();
|
||||
output_y_default_value_type_ = inputs[kIndex1]->GetDtype();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int SparseFillEmptyRowsGradCpuKernelMod::Resize(const BaseOperatorPtr &base_operator,
|
||||
const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &) {
|
||||
if (auto ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
reverse_index_map_shape_ = inputs[kIndex0]->GetShapeVector();
|
||||
grad_values_shape_ = inputs[kIndex1]->GetShapeVector();
|
||||
if (reverse_index_map_shape_.size() != kReverseIndexMapSizeNum &&
|
||||
reverse_index_map_shape_[0] > grad_values_shape_[0]) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', it requires 'reverse_index_map' must be a 1-D Tensor and the first dimension length "
|
||||
"must be smalll or equal to the first dimension length of 'values' "
|
||||
<< Vector2Str(reverse_index_map_shape);
|
||||
<< Vector2Str(reverse_index_map_shape_);
|
||||
}
|
||||
if (grad_values_shape.size() != kGradValuesSizeNum) {
|
||||
if (grad_values_shape_.size() != kGradValuesSizeNum) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it requires 'grad_values' must be a 1-D Tensor "
|
||||
<< Vector2Str(grad_values_shape);
|
||||
<< Vector2Str(grad_values_shape_);
|
||||
}
|
||||
|
||||
return KRET_OK;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool SparseFillEmptyRowsGradCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseFillEmptyRowsGradInputsNum, kernel_name_);
|
||||
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSparseFillEmptyRowsGradOutputsNum, kernel_name_);
|
||||
auto reverse_index_map_ptr = reinterpret_cast<int64_t *>(inputs[0]->addr);
|
||||
auto grad_values_ptr = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
const auto reverse_index_map_shape = AnfAlgo::GetInputDeviceShape(node_ptr, 0);
|
||||
const auto grad_values_shape = AnfAlgo::GetInputDeviceShape(node_ptr, 1);
|
||||
const int64_t N = reverse_index_map_shape[0];
|
||||
const int64_t N_full = grad_values_shape[0];
|
||||
const int64_t N = reverse_index_map_shape_[0];
|
||||
const int64_t N_full = grad_values_shape_[0];
|
||||
auto y_values_ptr = reinterpret_cast<T *>(outputs[kOutput_y_values]->addr);
|
||||
|
||||
auto ret1 = memset_s(y_values_ptr, N * sizeof(T), 0, N * sizeof(T));
|
||||
|
@ -112,12 +127,7 @@ bool SparseFillEmptyRowsGradCpuKernelMod::LaunchKernel(const std::vector<kernel:
|
|||
*y_default_value += grad_values_ptr[j];
|
||||
}
|
||||
}
|
||||
ShapeVector output_y_values_shape;
|
||||
ShapeVector output_y_default_value_shape = {};
|
||||
output_y_values_shape.push_back(N);
|
||||
common::AnfAlgo::SetOutputInferTypeAndShape({output_y_values_type_, output_y_default_value_type_},
|
||||
{output_y_values_shape, output_y_default_value_shape},
|
||||
cnode_ptr_.lock().get());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -199,7 +209,7 @@ bool SparseFillEmptyRowsGradCpuKernelMod::Launch(const std::vector<AddressPtr> &
|
|||
const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
bool ret = false;
|
||||
auto data_type = AnfAlgo::GetInputDeviceDataType(node_ptr, kInput_grad_values);
|
||||
auto data_type = output_y_default_value_type_;
|
||||
switch (data_type) {
|
||||
SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(kNumberTypeInt8, int8_t)
|
||||
SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(kNumberTypeUInt8, uint8_t)
|
||||
|
|
|
@ -17,17 +17,21 @@
|
|||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSEFILLEMPTYROWSGRAD_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSEFILLEMPTYROWSGRAD_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
||||
#include "plugin/factory/ms_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class SparseFillEmptyRowsGradCpuKernelMod : public DeprecatedNativeCpuKernelMod {
|
||||
class SparseFillEmptyRowsGradCpuKernelMod : public NativeCpuKernelMod {
|
||||
public:
|
||||
SparseFillEmptyRowsGradCpuKernelMod() = default;
|
||||
~SparseFillEmptyRowsGradCpuKernelMod() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) override;
|
||||
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
|
@ -38,9 +42,10 @@ class SparseFillEmptyRowsGradCpuKernelMod : public DeprecatedNativeCpuKernelMod
|
|||
template <typename T>
|
||||
bool LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);
|
||||
|
||||
CNodePtr node_ptr;
|
||||
TypeId output_y_values_type_;
|
||||
TypeId output_y_default_value_type_;
|
||||
ShapeVector reverse_index_map_shape_;
|
||||
ShapeVector grad_values_shape_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -52,7 +52,7 @@ BaseShapePtr RaggedTensorToTensorInferShape(const PrimitivePtr &primitive,
|
|||
|
||||
if (IsDynamic(shape_shape) || IsDynamicRank(values_shape) || IsDynamicRank(default_value_shape) ||
|
||||
IsDynamicRank(tensor0_shape)) {
|
||||
return std::make_shared<abstract::Shape>(ShapeVector{abstract::Shape::kShapeRankAny});
|
||||
return std::make_shared<abstract::Shape>(output_shape);
|
||||
}
|
||||
|
||||
CheckAndConvertUtils::CheckInteger("dimension of 'shape'", SizeToLong(shape_shape.size()), kEqual, 1, prim_name);
|
||||
|
|
|
@ -36,6 +36,13 @@ abstract::TupleShapePtr SelfAdjointEigInferShape(const PrimitivePtr &primitive,
|
|||
auto x = input_args[0]->BuildShape();
|
||||
auto input_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(x)[kShape];
|
||||
auto input_rank = SizeToLong(input_shape.size());
|
||||
|
||||
if (IsDynamicRank(input_shape)) {
|
||||
auto unknow_shape_ptr = std::make_shared<abstract::Shape>(ShapeVector{abstract::Shape::kShapeRankAny});
|
||||
return std::make_shared<abstract::TupleShape>(
|
||||
std::vector<abstract::BaseShapePtr>{unknow_shape_ptr, unknow_shape_ptr});
|
||||
}
|
||||
|
||||
CheckAndConvertUtils::CheckInteger("input rank", input_rank, kGreaterEqual, kNumber, prim_name);
|
||||
int64_t last_shape_input = input_shape[input_rank - 1];
|
||||
int64_t last_second__shape_input = input_shape[input_rank - 2];
|
||||
|
|
Loading…
Reference in New Issue