forked from mindspore-Ecosystem/mindspore
!49769 ops InstanceNormV2, SparseFillEmptyRowsGrad, SelfAdjointEig supports dynamic shape feature
Merge pull request !49769 from wang_ziqi/br_instance_norm_v2
This commit is contained in:
commit
5c287b6496
|
@ -87,8 +87,8 @@ void InstanceNormV2CpuKernelMod::CollectStatsKernel(const kernel::AddressPtr &x,
|
||||||
template <typename T, template <typename S> class VarTransform>
|
template <typename T, template <typename S> class VarTransform>
|
||||||
void InstanceNormV2CpuKernelMod::UpdateStatsTemplate(const std::vector<kernel::AddressPtr> &inputs,
|
void InstanceNormV2CpuKernelMod::UpdateStatsTemplate(const std::vector<kernel::AddressPtr> &inputs,
|
||||||
const std::vector<kernel::AddressPtr> &outputs) {
|
const std::vector<kernel::AddressPtr> &outputs) {
|
||||||
std::vector<float> _var_sum(instance_num, float_init_zero);
|
std::vector<float> _var_sum(instance_num_, float_init_zero);
|
||||||
std::vector<float> _mean_(instance_num, float_init_zero);
|
std::vector<float> _mean_(instance_num_, float_init_zero);
|
||||||
CollectStatsKernel<T>(inputs[kIndex0], _mean_.data(), _var_sum.data());
|
CollectStatsKernel<T>(inputs[kIndex0], _mean_.data(), _var_sum.data());
|
||||||
const int64_t image_size = x_shape_4d_[kIndex1] * x_shape_4d_[kIndex2];
|
const int64_t image_size = x_shape_4d_[kIndex1] * x_shape_4d_[kIndex2];
|
||||||
MS_EXCEPTION_IF_ZERO("image_size", image_size);
|
MS_EXCEPTION_IF_ZERO("image_size", image_size);
|
||||||
|
@ -116,7 +116,7 @@ void InstanceNormV2CpuKernelMod::UpdateStatsTemplate(const std::vector<kernel::A
|
||||||
static_cast<double>(running_var_vec(idx)));
|
static_cast<double>(running_var_vec(idx)));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
CPUKernelUtils::ParallelFor(loop_momentum, instance_num, static_cast<float>(kGrainSize));
|
CPUKernelUtils::ParallelFor(loop_momentum, instance_num_, static_cast<float>(kGrainSize));
|
||||||
}
|
}
|
||||||
|
|
||||||
void InstanceNormV2CpuKernelMod::CollectLinearAndConstant(const typename TTypes<float>::Vec &gamma,
|
void InstanceNormV2CpuKernelMod::CollectLinearAndConstant(const typename TTypes<float>::Vec &gamma,
|
||||||
|
@ -142,7 +142,7 @@ void InstanceNormV2CpuKernelMod::CollectLinearAndConstant(const typename TTypes<
|
||||||
_beta_[idx] = beta(idx) - mean * _alpha_[idx];
|
_beta_[idx] = beta(idx) - mean * _alpha_[idx];
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
CPUKernelUtils::ParallelFor(loop_instance, instance_num, static_cast<float>(kGrainSize));
|
CPUKernelUtils::ParallelFor(loop_instance, instance_num_, static_cast<float>(kGrainSize));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
@ -151,8 +151,8 @@ void InstanceNormV2CpuKernelMod::TransformInput(const std::vector<kernel::Addres
|
||||||
const int64_t batch = x_shape_4d_[kIndex0];
|
const int64_t batch = x_shape_4d_[kIndex0];
|
||||||
const int64_t channel = x_shape_4d_[kIndex3];
|
const int64_t channel = x_shape_4d_[kIndex3];
|
||||||
const int64_t image_size = x_shape_4d_[kIndex1] * x_shape_4d_[kIndex2];
|
const int64_t image_size = x_shape_4d_[kIndex1] * x_shape_4d_[kIndex2];
|
||||||
std::vector<float> _alpha_(instance_num, float_init_zero);
|
std::vector<float> _alpha_(instance_num_, float_init_zero);
|
||||||
std::vector<float> _beta_(instance_num, float_init_zero);
|
std::vector<float> _beta_(instance_num_, float_init_zero);
|
||||||
std::vector<int64_t> batch_channels_1d_ = {batch_channels_2d_.front() * batch_channels_2d_.back()};
|
std::vector<int64_t> batch_channels_1d_ = {batch_channels_2d_.front() * batch_channels_2d_.back()};
|
||||||
auto gamma = EigenTensor(batch_channels_1d_, inputs[kIndex1]->addr).vec<float>();
|
auto gamma = EigenTensor(batch_channels_1d_, inputs[kIndex1]->addr).vec<float>();
|
||||||
auto beta = EigenTensor(batch_channels_1d_, inputs[kIndex2]->addr).vec<float>();
|
auto beta = EigenTensor(batch_channels_1d_, inputs[kIndex2]->addr).vec<float>();
|
||||||
|
@ -183,19 +183,26 @@ void InstanceNormV2CpuKernelMod::TransformInput(const std::vector<kernel::Addres
|
||||||
CPUKernelUtils::ParallelFor(loop_transform, batch, block_size);
|
CPUKernelUtils::ParallelFor(loop_transform, batch, block_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void InstanceNormV2CpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
bool InstanceNormV2CpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
const std::vector<KernelTensorPtr> &outputs) {
|
||||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
MS_EXCEPTION_IF_NULL(base_operator);
|
||||||
in_type_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, kIndex0);
|
kernel_name_ = base_operator->name();
|
||||||
std::vector<int64_t> x_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, kIndex0);
|
auto prim = base_operator->GetPrim();
|
||||||
std::vector<int64_t> batch_channels_ = AnfAlgo::GetInputDeviceShape(kernel_node, kIndex1);
|
MS_EXCEPTION_IF_NULL(prim);
|
||||||
if (x_shape_.size() != kDim4 && x_shape_.size() != kDim5) {
|
|
||||||
MS_EXCEPTION(ValueError) << "For '" << kernel_name_ << "', the dimension of 'x' should be 4D or 5D, but got "
|
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInstanceNormV2InputsNum, kernel_name_);
|
||||||
<< x_shape_.size() << "D.";
|
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kInstanceNormV2OutputNum, kernel_name_);
|
||||||
|
|
||||||
|
auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs);
|
||||||
|
auto is_match = MatchKernelAttr(kernel_attr, GetOpSupport());
|
||||||
|
if (!is_match.first) {
|
||||||
|
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it does not support this kernel data type: " << kernel_attr;
|
||||||
}
|
}
|
||||||
is_training_ = common::AnfAlgo::GetNodeAttr<bool>(kernel_node, kAttrIsTraining);
|
|
||||||
momentum_ = common::AnfAlgo::GetNodeAttr<float>(kernel_node, kAttrMomentum);
|
in_type_ = inputs[kIndex0]->GetDtype();
|
||||||
epsilon_ = common::AnfAlgo::GetNodeAttr<float>(kernel_node, kAttrEpsilon);
|
is_training_ = GetValue<bool>(prim->GetAttr(kAttrIsTraining));
|
||||||
|
momentum_ = GetValue<float>(prim->GetAttr(kAttrMomentum));
|
||||||
|
epsilon_ = GetValue<float>(prim->GetAttr(kAttrEpsilon));
|
||||||
if (momentum_ > momentum_max || momentum_ < momentum_min) {
|
if (momentum_ > momentum_max || momentum_ < momentum_min) {
|
||||||
MS_EXCEPTION(ValueError) << "For '" << kernel_name_
|
MS_EXCEPTION(ValueError) << "For '" << kernel_name_
|
||||||
<< "momentum value should be in [0, 1], but get momentum = " << momentum_ << ".";
|
<< "momentum value should be in [0, 1], but get momentum = " << momentum_ << ".";
|
||||||
|
@ -204,27 +211,44 @@ void InstanceNormV2CpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
||||||
MS_EXCEPTION(ValueError) << "For '" << kernel_name_
|
MS_EXCEPTION(ValueError) << "For '" << kernel_name_
|
||||||
<< "epsilon value should be in [0, 1), but get epsilon = " << epsilon_ << ".";
|
<< "epsilon value should be in [0, 1), but get epsilon = " << epsilon_ << ".";
|
||||||
}
|
}
|
||||||
input_x_is_4d_ = (x_shape_.size() == kDim4);
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int InstanceNormV2CpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||||
|
const std::vector<KernelTensorPtr> &outputs,
|
||||||
|
const std::map<uint32_t, tensor::TensorPtr> &) {
|
||||||
|
if (auto ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int64_t> x_shape = inputs[kIndex0]->GetShapeVector();
|
||||||
|
std::vector<int64_t> batch_channels = inputs[kIndex1]->GetShapeVector();
|
||||||
|
|
||||||
|
if (x_shape.size() != kDim4 && x_shape.size() != kDim5) {
|
||||||
|
MS_EXCEPTION(ValueError) << "For '" << kernel_name_ << "', the dimension of 'x' should be 4D or 5D, but got "
|
||||||
|
<< x_shape.size() << "D.";
|
||||||
|
}
|
||||||
|
input_x_is_4d_ = (x_shape.size() == kDim4);
|
||||||
// Format NCHW could be considered as a situation of format NC1HWC0 when C0 = 1.
|
// Format NCHW could be considered as a situation of format NC1HWC0 when C0 = 1.
|
||||||
if (input_x_is_4d_) {
|
if (input_x_is_4d_) {
|
||||||
// extern (N, C, H, W) to (N, C, H, W, 1)
|
// extern (N, C, H, W) to (N, C, H, W, 1)
|
||||||
x_shape_.push_back(SizeToLong(kDim1));
|
x_shape.push_back(SizeToLong(kDim1));
|
||||||
// extern (N, C, 1, 1) to (N, C1=C, 1, 1, C0=1)
|
// extern (N, C, 1, 1) to (N, C1=C, 1, 1, C0=1)
|
||||||
batch_channels_.push_back(SizeToLong(kDim1));
|
batch_channels.push_back(SizeToLong(kDim1));
|
||||||
}
|
}
|
||||||
// consider (N, C1, H, W, C0) as (N*C1, H, W, C0), similar to (N, H, W, C)
|
// consider (N, C1, H, W, C0) as (N*C1, H, W, C0), similar to (N, H, W, C)
|
||||||
x_shape_4d_ = {x_shape_[kIndex0] * x_shape_[kIndex1], x_shape_[kIndex2], x_shape_[kIndex3], x_shape_[kIndex4]};
|
x_shape_4d_ = {x_shape[kIndex0] * x_shape[kIndex1], x_shape[kIndex2], x_shape[kIndex3], x_shape[kIndex4]};
|
||||||
// consider (N, C1, 1, 1 C0) as (N*C1, 1, 1, C0), similar to (N, 1, 1, C)
|
// consider (N, C1, 1, 1 C0) as (N*C1, 1, 1, C0), similar to (N, 1, 1, C)
|
||||||
batch_channels_2d_ = {batch_channels_[kIndex0] * batch_channels_[kIndex1], batch_channels_[kIndex4]};
|
batch_channels_2d_ = {batch_channels[kIndex0] * batch_channels[kIndex1], batch_channels[kIndex4]};
|
||||||
instance_num = CPUKernelUtils::CalcElementNum(batch_channels_2d_);
|
instance_num_ = CPUKernelUtils::CalcElementNum(batch_channels_2d_);
|
||||||
|
|
||||||
|
return KRET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool InstanceNormV2CpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
bool InstanceNormV2CpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||||
const std::vector<kernel::AddressPtr> &,
|
const std::vector<kernel::AddressPtr> &,
|
||||||
const std::vector<kernel::AddressPtr> &outputs) {
|
const std::vector<kernel::AddressPtr> &outputs) {
|
||||||
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInstanceNormV2InputsNum, kernel_name_);
|
|
||||||
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kInstanceNormV2OutputNum, kernel_name_);
|
|
||||||
|
|
||||||
bool res = false;
|
bool res = false;
|
||||||
switch (in_type_) {
|
switch (in_type_) {
|
||||||
case kNumberTypeFloat16:
|
case kNumberTypeFloat16:
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
|
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
#include "kernel/common_utils.h"
|
#include "kernel/common_utils.h"
|
||||||
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
||||||
#include "plugin/device/cpu/kernel/eigen/eigen_common_utils.h"
|
#include "plugin/device/cpu/kernel/eigen/eigen_common_utils.h"
|
||||||
|
@ -25,13 +26,15 @@
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace kernel {
|
namespace kernel {
|
||||||
class InstanceNormV2CpuKernelMod : public DeprecatedNativeCpuKernelMod {
|
class InstanceNormV2CpuKernelMod : public NativeCpuKernelMod {
|
||||||
public:
|
public:
|
||||||
InstanceNormV2CpuKernelMod() = default;
|
InstanceNormV2CpuKernelMod() = default;
|
||||||
~InstanceNormV2CpuKernelMod() override = default;
|
~InstanceNormV2CpuKernelMod() override = default;
|
||||||
|
|
||||||
void InitKernel(const CNodePtr &kernel_node) override;
|
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||||
|
const std::vector<KernelTensorPtr> &outputs) override;
|
||||||
|
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||||
|
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
|
||||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||||
const std::vector<AddressPtr> &outputs) override;
|
const std::vector<AddressPtr> &outputs) override;
|
||||||
|
|
||||||
|
@ -65,7 +68,7 @@ class InstanceNormV2CpuKernelMod : public DeprecatedNativeCpuKernelMod {
|
||||||
std::vector<int64_t> x_shape_4d_;
|
std::vector<int64_t> x_shape_4d_;
|
||||||
std::vector<int64_t> batch_channels_2d_;
|
std::vector<int64_t> batch_channels_2d_;
|
||||||
bool input_x_is_4d_ = true;
|
bool input_x_is_4d_ = true;
|
||||||
int64_t instance_num = 0;
|
int64_t instance_num_ = 0;
|
||||||
};
|
};
|
||||||
} // namespace kernel
|
} // namespace kernel
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -49,6 +49,7 @@ bool RaggedTensorToTensorCpuKernelMod::Init(const BaseOperatorPtr &base_operator
|
||||||
MS_EXCEPTION_IF_NULL(base_operator);
|
MS_EXCEPTION_IF_NULL(base_operator);
|
||||||
kernel_name_ = base_operator->name();
|
kernel_name_ = base_operator->name();
|
||||||
row_partition_types_ = GetValue<std::vector<std::string>>(base_operator->GetAttr("row_partition_types"));
|
row_partition_types_ = GetValue<std::vector<std::string>>(base_operator->GetAttr("row_partition_types"));
|
||||||
|
ragged_rank_ = GetRaggedRank(row_partition_types_);
|
||||||
shape_dtype_ = inputs[kShapeInputIndex]->GetDtype();
|
shape_dtype_ = inputs[kShapeInputIndex]->GetDtype();
|
||||||
values_dtype_ = inputs[kValueInputIndex]->GetDtype();
|
values_dtype_ = inputs[kValueInputIndex]->GetDtype();
|
||||||
size_t output_num = outputs.size();
|
size_t output_num = outputs.size();
|
||||||
|
@ -66,9 +67,15 @@ int RaggedTensorToTensorCpuKernelMod::Resize(const BaseOperatorPtr &base_operato
|
||||||
values_shape_ = inputs[kValueInputIndex]->GetShapeVector();
|
values_shape_ = inputs[kValueInputIndex]->GetShapeVector();
|
||||||
default_values_shape_ = inputs[kDefaultValueInputIndex]->GetShapeVector();
|
default_values_shape_ = inputs[kDefaultValueInputIndex]->GetShapeVector();
|
||||||
output_shape_ = outputs[0]->GetShapeVector();
|
output_shape_ = outputs[0]->GetShapeVector();
|
||||||
|
if (ragged_rank_ + values_shape_.size() != output_shape_.size()) {
|
||||||
|
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||||
|
<< "', row partition size plus 'values' rank should be equal to 'shape' rank: "
|
||||||
|
<< output_shape_.size() << ", but got row partition size: " << ragged_rank_
|
||||||
|
<< ", 'values' rank: " << values_shape_.size();
|
||||||
|
}
|
||||||
row_partition_shape_list_.clear();
|
row_partition_shape_list_.clear();
|
||||||
for (int i = 0; i < SizeToLong(row_partition_types_.size()); ++i) {
|
for (int i = 0; i < ragged_rank_; ++i) {
|
||||||
row_partition_shape_list_.push_back(inputs[kFirstPartitionInputIndex + i]->GetShapeVector());
|
row_partition_shape_list_.emplace_back(inputs[kFirstPartitionInputIndex + i]->GetShapeVector());
|
||||||
}
|
}
|
||||||
return KRET_OK;
|
return KRET_OK;
|
||||||
}
|
}
|
||||||
|
@ -123,16 +130,9 @@ bool RaggedTensorToTensorCpuKernelMod::Launch(const std::vector<kernel::AddressP
|
||||||
template <typename TYPE1, typename TYPE2>
|
template <typename TYPE1, typename TYPE2>
|
||||||
void RaggedTensorToTensorCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
|
void RaggedTensorToTensorCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
|
||||||
const std::vector<kernel::AddressPtr> &outputs) {
|
const std::vector<kernel::AddressPtr> &outputs) {
|
||||||
int ragged_rank_ = GetRaggedRank(row_partition_types_);
|
|
||||||
TYPE1 first_dimension;
|
TYPE1 first_dimension;
|
||||||
GetFirstDimension<TYPE1>(inputs, &first_dimension);
|
GetFirstDimension<TYPE1>(inputs, &first_dimension);
|
||||||
std::vector<TYPE1> output_size;
|
std::vector<TYPE1> output_size;
|
||||||
if (ragged_rank_ + values_shape_.size() != output_shape_.size()) {
|
|
||||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
|
||||||
<< "', row partition size plus 'values' rank should be equal to 'shape' rank: "
|
|
||||||
<< output_shape_.size() << ", but got row partition size: " << ragged_rank_
|
|
||||||
<< ", 'values' rank: " << values_shape_.size();
|
|
||||||
}
|
|
||||||
output_size.reserve(output_shape_.size());
|
output_size.reserve(output_shape_.size());
|
||||||
for (unsigned int dim = 0; dim < output_shape_.size(); dim++) {
|
for (unsigned int dim = 0; dim < output_shape_.size(); dim++) {
|
||||||
output_size.push_back(output_shape_[dim]);
|
output_size.push_back(output_shape_[dim]);
|
||||||
|
|
|
@ -78,6 +78,7 @@ class RaggedTensorToTensorCpuKernelMod : public NativeCpuKernelMod {
|
||||||
template <typename TYPE1>
|
template <typename TYPE1>
|
||||||
void GetFirstDimension(const std::vector<kernel::AddressPtr> &inputs, TYPE1 *first_dim);
|
void GetFirstDimension(const std::vector<kernel::AddressPtr> &inputs, TYPE1 *first_dim);
|
||||||
|
|
||||||
|
int64_t ragged_rank_;
|
||||||
TypeId shape_dtype_{kTypeUnknown};
|
TypeId shape_dtype_{kTypeUnknown};
|
||||||
TypeId values_dtype_{kTypeUnknown};
|
TypeId values_dtype_{kTypeUnknown};
|
||||||
std::vector<int64_t> values_shape_;
|
std::vector<int64_t> values_shape_;
|
||||||
|
|
|
@ -25,19 +25,42 @@ constexpr auto kSelfAdjopintEig = "SelfAdjopintEig";
|
||||||
constexpr const size_t kInputsNum = 1;
|
constexpr const size_t kInputsNum = 1;
|
||||||
constexpr const size_t kOutputsNum = 2;
|
constexpr const size_t kOutputsNum = 2;
|
||||||
|
|
||||||
void SelfAdjointEigCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
bool SelfAdjointEigCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
const std::vector<KernelTensorPtr> &outputs) {
|
||||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
MS_EXCEPTION_IF_NULL(base_operator);
|
||||||
dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
|
kernel_name_ = base_operator->name();
|
||||||
attr_ = common::AnfAlgo::GetNodeAttr<bool>(kernel_node, "compute_v");
|
auto prim = base_operator->GetPrim();
|
||||||
input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
MS_EXCEPTION_IF_NULL(prim);
|
||||||
|
|
||||||
|
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInputsNum, kernel_name_);
|
||||||
|
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOutputsNum, kernel_name_);
|
||||||
|
|
||||||
|
auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs);
|
||||||
|
auto is_match = MatchKernelAttr(kernel_attr, GetOpSupport());
|
||||||
|
if (!is_match.first) {
|
||||||
|
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it does not support this kernel data type: " << kernel_attr;
|
||||||
|
}
|
||||||
|
|
||||||
|
dtype_ = inputs[kIndex0]->GetDtype();
|
||||||
|
compute_v_ = GetValue<bool>(prim->GetAttr("compute_v"));
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int SelfAdjointEigCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||||
|
const std::vector<KernelTensorPtr> &outputs,
|
||||||
|
const std::map<uint32_t, tensor::TensorPtr> &) {
|
||||||
|
if (auto ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
input_shape_ = inputs[kIndex0]->GetShapeVector();
|
||||||
|
|
||||||
|
return KRET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SelfAdjointEigCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
bool SelfAdjointEigCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||||
const std::vector<kernel::AddressPtr> &,
|
const std::vector<kernel::AddressPtr> &,
|
||||||
const std::vector<kernel::AddressPtr> &outputs) {
|
const std::vector<kernel::AddressPtr> &outputs) {
|
||||||
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kInputsNum, kernel_name_);
|
|
||||||
// CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kOutputsNum, kernel_name_);
|
|
||||||
if (dtype_ == kNumberTypeFloat32) {
|
if (dtype_ == kNumberTypeFloat32) {
|
||||||
LaunchKernel<float>(inputs, outputs);
|
LaunchKernel<float>(inputs, outputs);
|
||||||
} else if (dtype_ == kNumberTypeFloat64) {
|
} else if (dtype_ == kNumberTypeFloat64) {
|
||||||
|
@ -59,7 +82,7 @@ bool SelfAdjointEigCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressP
|
||||||
auto *input = reinterpret_cast<T *>(inputs[kIndex0]->addr);
|
auto *input = reinterpret_cast<T *>(inputs[kIndex0]->addr);
|
||||||
auto *output0 = reinterpret_cast<T *>(outputs[kIndex0]->addr);
|
auto *output0 = reinterpret_cast<T *>(outputs[kIndex0]->addr);
|
||||||
auto *output1 = reinterpret_cast<T *>(outputs[kIndex1]->addr);
|
auto *output1 = reinterpret_cast<T *>(outputs[kIndex1]->addr);
|
||||||
bool attr0_ = attr_;
|
bool attr0_ = compute_v_;
|
||||||
// The size of each dimension
|
// The size of each dimension
|
||||||
std::vector<int64_t> shape = input_shape_;
|
std::vector<int64_t> shape = input_shape_;
|
||||||
// rank
|
// rank
|
||||||
|
|
|
@ -28,12 +28,15 @@
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace kernel {
|
namespace kernel {
|
||||||
class SelfAdjointEigCpuKernelMod : public DeprecatedNativeCpuKernelMod {
|
class SelfAdjointEigCpuKernelMod : public NativeCpuKernelMod {
|
||||||
public:
|
public:
|
||||||
SelfAdjointEigCpuKernelMod() = default;
|
SelfAdjointEigCpuKernelMod() = default;
|
||||||
~SelfAdjointEigCpuKernelMod() override = default;
|
~SelfAdjointEigCpuKernelMod() override = default;
|
||||||
|
|
||||||
void InitKernel(const CNodePtr &kernel_node) override;
|
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||||
|
const std::vector<KernelTensorPtr> &outputs) override;
|
||||||
|
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||||
|
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
|
||||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||||
const std::vector<AddressPtr> &outputs) override;
|
const std::vector<AddressPtr> &outputs) override;
|
||||||
|
|
||||||
|
@ -48,7 +51,7 @@ class SelfAdjointEigCpuKernelMod : public DeprecatedNativeCpuKernelMod {
|
||||||
static std::vector<std::pair<KernelAttr, SelfAdjointEigLaunchFunc>> func_list_;
|
static std::vector<std::pair<KernelAttr, SelfAdjointEigLaunchFunc>> func_list_;
|
||||||
TypeId dtype_{kTypeUnknown};
|
TypeId dtype_{kTypeUnknown};
|
||||||
std::vector<int64_t> input_shape_;
|
std::vector<int64_t> input_shape_;
|
||||||
bool attr_;
|
bool compute_v_;
|
||||||
};
|
};
|
||||||
} // namespace kernel
|
} // namespace kernel
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -34,15 +34,10 @@ constexpr size_t kSparseFillEmptyRowsGradOutputsNum = 2;
|
||||||
constexpr size_t kReverseIndexMapSizeNum = 1;
|
constexpr size_t kReverseIndexMapSizeNum = 1;
|
||||||
constexpr size_t kGradValuesSizeNum = 1;
|
constexpr size_t kGradValuesSizeNum = 1;
|
||||||
|
|
||||||
const uint32_t kInput_reverse_index_map = 0;
|
|
||||||
const uint32_t kInput_grad_values = 1;
|
|
||||||
|
|
||||||
const uint32_t kOutput_y_values = 0;
|
const uint32_t kOutput_y_values = 0;
|
||||||
const uint32_t kOutput_y_default_value = 1;
|
const uint32_t kOutput_y_default_value = 1;
|
||||||
constexpr char kKernelName[] = "SparseFillEmptyRows";
|
constexpr char kKernelName[] = "SparseFillEmptyRows";
|
||||||
|
|
||||||
#define EIGEN_SHAPE_CAST(INPUT) static_cast<Eigen::DenseIndex>(AnfAlgo::GetInputDeviceShape(node_ptr, INPUT)[0])
|
|
||||||
|
|
||||||
#define SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(DTYPE, TYPE) \
|
#define SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(DTYPE, TYPE) \
|
||||||
case (DTYPE): { \
|
case (DTYPE): { \
|
||||||
ret = LaunchKernel<TYPE>(inputs, outputs); \
|
ret = LaunchKernel<TYPE>(inputs, outputs); \
|
||||||
|
@ -50,41 +45,61 @@ constexpr char kKernelName[] = "SparseFillEmptyRows";
|
||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void SparseFillEmptyRowsGradCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
bool SparseFillEmptyRowsGradCpuKernelMod::Init(const BaseOperatorPtr &base_operator,
|
||||||
node_ptr = kernel_node;
|
const std::vector<KernelTensorPtr> &inputs,
|
||||||
MS_EXCEPTION_IF_NULL(node_ptr);
|
const std::vector<KernelTensorPtr> &outputs) {
|
||||||
output_y_values_type_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, 0);
|
MS_EXCEPTION_IF_NULL(base_operator);
|
||||||
output_y_default_value_type_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, 1);
|
kernel_name_ = base_operator->name();
|
||||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
auto prim = base_operator->GetPrim();
|
||||||
size_t input_num = common::AnfAlgo::GetInputTensorNum(node_ptr);
|
MS_EXCEPTION_IF_NULL(prim);
|
||||||
CHECK_KERNEL_INPUTS_NUM(input_num, kSparseFillEmptyRowsGradInputsNum, kernel_name_);
|
|
||||||
size_t output_num = AnfAlgo::GetOutputTensorNum(node_ptr);
|
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseFillEmptyRowsGradInputsNum, kernel_name_);
|
||||||
CHECK_KERNEL_OUTPUTS_NUM(output_num, kSparseFillEmptyRowsGradOutputsNum, kernel_name_);
|
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSparseFillEmptyRowsGradOutputsNum, kernel_name_);
|
||||||
const auto reverse_index_map_shape = AnfAlgo::GetInputDeviceShape(node_ptr, 0);
|
|
||||||
const auto grad_values_shape = AnfAlgo::GetInputDeviceShape(node_ptr, 1);
|
auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs);
|
||||||
if (reverse_index_map_shape.size() != kReverseIndexMapSizeNum && reverse_index_map_shape[0] > grad_values_shape[0]) {
|
auto is_match = MatchKernelAttr(kernel_attr, GetOpSupport());
|
||||||
|
if (!is_match.first) {
|
||||||
|
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it does not support this kernel data type: " << kernel_attr;
|
||||||
|
}
|
||||||
|
|
||||||
|
output_y_values_type_ = inputs[kIndex0]->GetDtype();
|
||||||
|
output_y_default_value_type_ = inputs[kIndex1]->GetDtype();
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int SparseFillEmptyRowsGradCpuKernelMod::Resize(const BaseOperatorPtr &base_operator,
|
||||||
|
const std::vector<KernelTensorPtr> &inputs,
|
||||||
|
const std::vector<KernelTensorPtr> &outputs,
|
||||||
|
const std::map<uint32_t, tensor::TensorPtr> &) {
|
||||||
|
if (auto ret = KernelMod::Resize(base_operator, inputs, outputs); ret != KRET_OK) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
reverse_index_map_shape_ = inputs[kIndex0]->GetShapeVector();
|
||||||
|
grad_values_shape_ = inputs[kIndex1]->GetShapeVector();
|
||||||
|
if (reverse_index_map_shape_.size() != kReverseIndexMapSizeNum &&
|
||||||
|
reverse_index_map_shape_[0] > grad_values_shape_[0]) {
|
||||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||||
<< "', it requires 'reverse_index_map' must be a 1-D Tensor and the first dimension length "
|
<< "', it requires 'reverse_index_map' must be a 1-D Tensor and the first dimension length "
|
||||||
"must be smalll or equal to the first dimension length of 'values' "
|
"must be smalll or equal to the first dimension length of 'values' "
|
||||||
<< Vector2Str(reverse_index_map_shape);
|
<< Vector2Str(reverse_index_map_shape_);
|
||||||
}
|
}
|
||||||
if (grad_values_shape.size() != kGradValuesSizeNum) {
|
if (grad_values_shape_.size() != kGradValuesSizeNum) {
|
||||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it requires 'grad_values' must be a 1-D Tensor "
|
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', it requires 'grad_values' must be a 1-D Tensor "
|
||||||
<< Vector2Str(grad_values_shape);
|
<< Vector2Str(grad_values_shape_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return KRET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool SparseFillEmptyRowsGradCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
|
bool SparseFillEmptyRowsGradCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
|
||||||
const std::vector<kernel::AddressPtr> &outputs) {
|
const std::vector<kernel::AddressPtr> &outputs) {
|
||||||
CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseFillEmptyRowsGradInputsNum, kernel_name_);
|
|
||||||
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kSparseFillEmptyRowsGradOutputsNum, kernel_name_);
|
|
||||||
auto reverse_index_map_ptr = reinterpret_cast<int64_t *>(inputs[0]->addr);
|
auto reverse_index_map_ptr = reinterpret_cast<int64_t *>(inputs[0]->addr);
|
||||||
auto grad_values_ptr = reinterpret_cast<T *>(inputs[1]->addr);
|
auto grad_values_ptr = reinterpret_cast<T *>(inputs[1]->addr);
|
||||||
const auto reverse_index_map_shape = AnfAlgo::GetInputDeviceShape(node_ptr, 0);
|
const int64_t N = reverse_index_map_shape_[0];
|
||||||
const auto grad_values_shape = AnfAlgo::GetInputDeviceShape(node_ptr, 1);
|
const int64_t N_full = grad_values_shape_[0];
|
||||||
const int64_t N = reverse_index_map_shape[0];
|
|
||||||
const int64_t N_full = grad_values_shape[0];
|
|
||||||
auto y_values_ptr = reinterpret_cast<T *>(outputs[kOutput_y_values]->addr);
|
auto y_values_ptr = reinterpret_cast<T *>(outputs[kOutput_y_values]->addr);
|
||||||
|
|
||||||
auto ret1 = memset_s(y_values_ptr, N * sizeof(T), 0, N * sizeof(T));
|
auto ret1 = memset_s(y_values_ptr, N * sizeof(T), 0, N * sizeof(T));
|
||||||
|
@ -112,12 +127,7 @@ bool SparseFillEmptyRowsGradCpuKernelMod::LaunchKernel(const std::vector<kernel:
|
||||||
*y_default_value += grad_values_ptr[j];
|
*y_default_value += grad_values_ptr[j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ShapeVector output_y_values_shape;
|
|
||||||
ShapeVector output_y_default_value_shape = {};
|
|
||||||
output_y_values_shape.push_back(N);
|
|
||||||
common::AnfAlgo::SetOutputInferTypeAndShape({output_y_values_type_, output_y_default_value_type_},
|
|
||||||
{output_y_values_shape, output_y_default_value_shape},
|
|
||||||
cnode_ptr_.lock().get());
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -199,7 +209,7 @@ bool SparseFillEmptyRowsGradCpuKernelMod::Launch(const std::vector<AddressPtr> &
|
||||||
const std::vector<AddressPtr> &workspace,
|
const std::vector<AddressPtr> &workspace,
|
||||||
const std::vector<AddressPtr> &outputs) {
|
const std::vector<AddressPtr> &outputs) {
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
auto data_type = AnfAlgo::GetInputDeviceDataType(node_ptr, kInput_grad_values);
|
auto data_type = output_y_default_value_type_;
|
||||||
switch (data_type) {
|
switch (data_type) {
|
||||||
SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(kNumberTypeInt8, int8_t)
|
SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(kNumberTypeInt8, int8_t)
|
||||||
SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(kNumberTypeUInt8, uint8_t)
|
SPARSE_FILL_EMPTY_ROWS_GRAD_COMPUTE_CASE(kNumberTypeUInt8, uint8_t)
|
||||||
|
|
|
@ -17,17 +17,21 @@
|
||||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSEFILLEMPTYROWSGRAD_CPU_KERNEL_H_
|
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSEFILLEMPTYROWSGRAD_CPU_KERNEL_H_
|
||||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSEFILLEMPTYROWSGRAD_CPU_KERNEL_H_
|
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SPARSEFILLEMPTYROWSGRAD_CPU_KERNEL_H_
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
||||||
#include "plugin/factory/ms_factory.h"
|
#include "plugin/factory/ms_factory.h"
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace kernel {
|
namespace kernel {
|
||||||
class SparseFillEmptyRowsGradCpuKernelMod : public DeprecatedNativeCpuKernelMod {
|
class SparseFillEmptyRowsGradCpuKernelMod : public NativeCpuKernelMod {
|
||||||
public:
|
public:
|
||||||
SparseFillEmptyRowsGradCpuKernelMod() = default;
|
SparseFillEmptyRowsGradCpuKernelMod() = default;
|
||||||
~SparseFillEmptyRowsGradCpuKernelMod() override = default;
|
~SparseFillEmptyRowsGradCpuKernelMod() override = default;
|
||||||
|
|
||||||
void InitKernel(const CNodePtr &kernel_node) override;
|
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||||
|
const std::vector<KernelTensorPtr> &outputs) override;
|
||||||
|
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||||
|
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
|
||||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||||
const std::vector<AddressPtr> &outputs) override;
|
const std::vector<AddressPtr> &outputs) override;
|
||||||
|
|
||||||
|
@ -38,9 +42,10 @@ class SparseFillEmptyRowsGradCpuKernelMod : public DeprecatedNativeCpuKernelMod
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);
|
bool LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);
|
||||||
|
|
||||||
CNodePtr node_ptr;
|
|
||||||
TypeId output_y_values_type_;
|
TypeId output_y_values_type_;
|
||||||
TypeId output_y_default_value_type_;
|
TypeId output_y_default_value_type_;
|
||||||
|
ShapeVector reverse_index_map_shape_;
|
||||||
|
ShapeVector grad_values_shape_;
|
||||||
};
|
};
|
||||||
} // namespace kernel
|
} // namespace kernel
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -52,7 +52,7 @@ BaseShapePtr RaggedTensorToTensorInferShape(const PrimitivePtr &primitive,
|
||||||
|
|
||||||
if (IsDynamic(shape_shape) || IsDynamicRank(values_shape) || IsDynamicRank(default_value_shape) ||
|
if (IsDynamic(shape_shape) || IsDynamicRank(values_shape) || IsDynamicRank(default_value_shape) ||
|
||||||
IsDynamicRank(tensor0_shape)) {
|
IsDynamicRank(tensor0_shape)) {
|
||||||
return std::make_shared<abstract::Shape>(ShapeVector{abstract::Shape::kShapeRankAny});
|
return std::make_shared<abstract::Shape>(output_shape);
|
||||||
}
|
}
|
||||||
|
|
||||||
CheckAndConvertUtils::CheckInteger("dimension of 'shape'", SizeToLong(shape_shape.size()), kEqual, 1, prim_name);
|
CheckAndConvertUtils::CheckInteger("dimension of 'shape'", SizeToLong(shape_shape.size()), kEqual, 1, prim_name);
|
||||||
|
|
|
@ -36,6 +36,13 @@ abstract::TupleShapePtr SelfAdjointEigInferShape(const PrimitivePtr &primitive,
|
||||||
auto x = input_args[0]->BuildShape();
|
auto x = input_args[0]->BuildShape();
|
||||||
auto input_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(x)[kShape];
|
auto input_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(x)[kShape];
|
||||||
auto input_rank = SizeToLong(input_shape.size());
|
auto input_rank = SizeToLong(input_shape.size());
|
||||||
|
|
||||||
|
if (IsDynamicRank(input_shape)) {
|
||||||
|
auto unknow_shape_ptr = std::make_shared<abstract::Shape>(ShapeVector{abstract::Shape::kShapeRankAny});
|
||||||
|
return std::make_shared<abstract::TupleShape>(
|
||||||
|
std::vector<abstract::BaseShapePtr>{unknow_shape_ptr, unknow_shape_ptr});
|
||||||
|
}
|
||||||
|
|
||||||
CheckAndConvertUtils::CheckInteger("input rank", input_rank, kGreaterEqual, kNumber, prim_name);
|
CheckAndConvertUtils::CheckInteger("input rank", input_rank, kGreaterEqual, kNumber, prim_name);
|
||||||
int64_t last_shape_input = input_shape[input_rank - 1];
|
int64_t last_shape_input = input_shape[input_rank - 1];
|
||||||
int64_t last_second__shape_input = input_shape[input_rank - 2];
|
int64_t last_second__shape_input = input_shape[input_rank - 2];
|
||||||
|
|
Loading…
Reference in New Issue