diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_ftrl_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_ftrl_cpu_kernel.cc index 2c90fa578c2..5645dd8f0d5 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_ftrl_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_ftrl_cpu_kernel.cc @@ -21,6 +21,7 @@ #include "kernel/common_utils.h" #include "plugin/device/cpu/hal/device/cpu_device_address.h" #include "ops/fused_sparse_ftrl.h" +#include "ops/sparse_apply_ftrl.h" namespace mindspore { namespace kernel { @@ -36,6 +37,7 @@ constexpr size_t kSparseApplyFtrlWorkspaceSize = 4; constexpr char kKernelName[] = "SparseApplyFtrl"; using KernelRunFunc = SparseApplyFtrlCpuKernelMod::KernelRunFunc; +using FusedKernelRunFunc = FusedSparseFtrlCpuKernelMod::KernelRunFunc; template void ComputeFtrl(MultiThreadComputeParams *input_params, size_t start, size_t end) { @@ -53,8 +55,8 @@ void ComputeFtrl(MultiThreadComputeParams *input_params, size_t start, size_t for (size_t i = start; i < end; ++i) { T index = unique_sparse_grad.indices_[i]; if (index < 0 || LongToSize(index) >= var_first_dim_size) { - MS_LOG(EXCEPTION) << "For '" << kKernelName << "', each element in 'indices' must be in range [0, " - << SizeToLong(var_first_dim_size) << "), but got " << index; + MS_LOG(ERROR) << "For '" << kKernelName << "', each element in 'indices' must be in range [0, " + << SizeToLong(var_first_dim_size) << "), but got " << index; } size_t start_index = var_outer_dim_size * static_cast(index); size_t end_index = start_index + var_outer_dim_size; @@ -80,14 +82,14 @@ void ComputeFtrl(MultiThreadComputeParams *input_params, size_t start, size_t } // namespace template -void SparseApplyFtrlCpuKernelMod::InitWorkspaceSize() { +void FusedSparseFtrlCpuKernelMod::InitWorkspaceSize() { (void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float)); (void)workspace_size_list_.emplace_back(indices_size_ * sizeof(T)); (void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float)); (void)workspace_size_list_.emplace_back(indices_size_ * sizeof(T)); } -bool SparseApplyFtrlCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector &inputs, +bool FusedSparseFtrlCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector &inputs, const std::vector &outputs) { kernel_name_ = base_operator->name(); if (inputs.empty() || outputs.empty()) { @@ -126,7 +128,7 @@ bool SparseApplyFtrlCpuKernelMod::Init(const BaseOperatorPtr &base_operator, con return true; } -void SparseApplyFtrlCpuKernelMod::ResetResource() noexcept { +void FusedSparseFtrlCpuKernelMod::ResetResource() noexcept { input_size_list_.clear(); output_size_list_.clear(); workspace_size_list_.clear(); @@ -136,7 +138,7 @@ void SparseApplyFtrlCpuKernelMod::ResetResource() noexcept { var_outer_dim_size_ = 1; } -int SparseApplyFtrlCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, +int FusedSparseFtrlCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector &inputs, const std::vector &outputs, const std::map &) { @@ -211,8 +213,8 @@ int SparseApplyFtrlCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, return KRET_OK; } -const std::vector> &SparseApplyFtrlCpuKernelMod::GetFuncList() const { - static const std::vector> func_list = { +const std::vector> &FusedSparseFtrlCpuKernelMod::GetFuncList() const { + static const std::vector> func_list = { {KernelAttr() .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeFloat32) @@ -223,7 +225,7 @@ const std::vector> &SparseApplyFtrlCpuKerne .AddOutputAttr(kNumberTypeFloat32) .AddOutputAttr(kNumberTypeFloat32) .AddOutInRef(0, 0), - &SparseApplyFtrlCpuKernelMod::LaunchKernel}, + &FusedSparseFtrlCpuKernelMod::LaunchKernel}, {KernelAttr() .AddInputAttr(kNumberTypeFloat32) .AddInputAttr(kNumberTypeFloat32) @@ -234,12 +236,12 @@ const std::vector> &SparseApplyFtrlCpuKerne .AddOutputAttr(kNumberTypeFloat32) .AddOutputAttr(kNumberTypeFloat32) .AddOutInRef(0, 0), - &SparseApplyFtrlCpuKernelMod::LaunchKernel}}; + &FusedSparseFtrlCpuKernelMod::LaunchKernel}}; return func_list; } template -bool SparseApplyFtrlCpuKernelMod::LaunchKernel(const std::vector &inputs, +bool FusedSparseFtrlCpuKernelMod::LaunchKernel(const std::vector &inputs, const std::vector &workspace, const std::vector &) const { auto *var = reinterpret_cast(inputs[0]->addr); @@ -278,6 +280,189 @@ bool SparseApplyFtrlCpuKernelMod::LaunchKernel(const std::vector &inputs, + const std::vector &outputs) { + kernel_name_ = base_operator->name(); + if (inputs.empty() || outputs.empty()) { + MS_LOG(ERROR) << "For '" << kernel_name_ << "', it got empty inputs or outputs, which is invalid."; + return false; + } + if (inputs.size() != kSparseApplyFtrlInputsNum) { + MS_LOG(ERROR) << "For '" << kernel_name_ << "', input size must be " << kSparseApplyFtrlInputsNum << ", but got " + << inputs.size(); + return false; + } + auto kernel_ptr = std::make_shared(base_operator->GetPrim()); + lr_ = kernel_ptr->get_lr(); + if (lr_ <= 0) { + MS_LOG(ERROR) << "For '" << kernel_name_ << "', 'lr' must be a positive scalar, but got " << lr_; + return false; + } + l1_ = kernel_ptr->get_l1(); + if (l1_ < 0) { + MS_LOG(ERROR) << "For '" << kernel_name_ << "', 'l1' must be a non-negative scalar, but got " << l1_; + return false; + } + l2_ = kernel_ptr->get_l2(); + if (l2_ < 0) { + MS_LOG(ERROR) << "For '" << kernel_name_ << "', 'l2' must be a non-negative scalar, but got " << l2_; + return false; + } + lr_power_ = kernel_ptr->get_lr_power(); + if (lr_power_ > 0) { + MS_LOG(ERROR) << "For '" << kernel_name_ << "', 'lr_power' must be a non-negative scalar, but got " << lr_power_; + return false; + } + if (!MatchKernelFunc(base_operator, inputs, outputs)) { + return false; + } + return true; +} + +void SparseApplyFtrlCpuKernelMod::ResetResource() noexcept { + input_size_list_.clear(); + output_size_list_.clear(); + indices_data_type_ = kNumberTypeInt32; + indices_size_ = 0; + var_first_dim_size_ = 0; + var_outer_dim_size_ = 1; +} + +int SparseApplyFtrlCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, + const std::vector &inputs, + const std::vector &outputs, + const std::map &) { + ResetResource(); + int ret = KernelMod::Resize(base_operator, inputs, outputs); + if (ret != KRET_OK) { + return ret; + } + ShapeVector var_shape = inputs[kVarIndex]->GetShapeVector(); + ShapeVector accum_shape = inputs[kAccumIndex]->GetShapeVector(); + ShapeVector linear_shape = inputs[kLinearIndex]->GetShapeVector(); + ShapeVector grad_shape = inputs[kGradIndex]->GetShapeVector(); + ShapeVector indices_shape = inputs[kIndicesIndex]->GetShapeVector(); + if (var_shape.empty()) { + MS_LOG(ERROR) << "For '" << kernel_name_ << "', the 'var' must be at least 1-D, but got scalar or None."; + return KRET_RESIZE_FAILED; + } + if (!IsSameShape(var_shape, accum_shape)) { + MS_LOG(ERROR) << "For '" << kernel_name_ + << "', the shape of 'accum' must be the same as the shape of 'var', " + "but got the shape of 'accum': " + << Vector2Str(accum_shape) << " and the shape of 'var': " << Vector2Str(var_shape); + return KRET_RESIZE_FAILED; + } + if (!IsSameShape(var_shape, linear_shape)) { + MS_LOG(ERROR) << "For '" << kernel_name_ + << "', the shape of 'linear' must be the same as the shape of 'var', " + "but got the shape of 'linear': " + << Vector2Str(linear_shape) << " and the shape of 'var': " << Vector2Str(var_shape); + return KRET_RESIZE_FAILED; + } + if (var_shape.size() != grad_shape.size()) { + MS_LOG(ERROR) << "For '" << kernel_name_ + << "', the dimension of 'grad' must be the same as the dimension of " + "'var', but got the dimension of 'grad': " + << grad_shape.size() << " and the dimension of 'var': " << var_shape.size() << "."; + return KRET_RESIZE_FAILED; + } + var_first_dim_size_ = var_shape[0]; + for (size_t i = 1; i < var_shape.size(); ++i) { + if (var_shape[i] != grad_shape[i]) { + MS_LOG(ERROR) << "For '" << kernel_name_ << "', the shape of 'var' and 'grad' must be equal in dimension i=" << i + << ", but got 'var_shape[i]': " << var_shape[i] << " and 'grad_shape[i]': " << grad_shape[i]; + return KRET_RESIZE_FAILED; + } + var_outer_dim_size_ *= var_shape[i]; + } + if (indices_shape.size() != 1) { + MS_LOG(ERROR) << "For '" << kernel_name_ << "', the 'indices' must be a 1-D vector, but got " + << indices_shape.size() << "-D."; + return KRET_RESIZE_FAILED; + } + indices_size_ = indices_shape[0]; + if (grad_shape[0] != SizeToLong(indices_size_)) { + MS_LOG(ERROR) << "For '" << kernel_name_ + << "', the first dimension value of 'grad' must be equal to " + "the first dimension value of 'indices', but got the first dimension value of 'grad': " + << grad_shape[0] << ", and the first dimension value of 'indices': " << indices_size_; + return KRET_RESIZE_FAILED; + } + return KRET_OK; +} + +const std::vector> &SparseApplyFtrlCpuKernelMod::GetFuncList() const { + static const std::vector> func_list = { + {KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutInRef(0, 0), + &SparseApplyFtrlCpuKernelMod::LaunchKernel}, + {KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutInRef(0, 0), + &SparseApplyFtrlCpuKernelMod::LaunchKernel}}; + return func_list; +} + +template +bool SparseApplyFtrlCpuKernelMod::LaunchKernel(const std::vector &inputs, + const std::vector &workspace, + const std::vector &outputs) const { + auto *var = reinterpret_cast(inputs[kVarIndex]->addr); + auto *accum = reinterpret_cast(inputs[kAccumIndex]->addr); + auto *linear = reinterpret_cast(inputs[kLinearIndex]->addr); + auto *grad = reinterpret_cast(inputs[kGradIndex]->addr); + auto *indices = reinterpret_cast(inputs[kIndicesIndex]->addr); + auto *var_out = reinterpret_cast(outputs[kVarIndex]->addr); + auto *accum_out = reinterpret_cast(outputs[kAccumIndex]->addr); + auto *linear_out = reinterpret_cast(outputs[kLinearIndex]->addr); + + SparseGradient input_sparse_grad({grad, indices, indices_size_}); + MultiThreadComputeParams input_params; + input_params.var_ = var; + input_params.accum_ = accum; + input_params.linear_ = linear; + input_params.lr_ = lr_; + input_params.l1_ = l1_; + input_params.l2_ = l2_; + input_params.lr_power_ = lr_power_; + input_params.sparse_grad_ = input_sparse_grad; + input_params.var_first_dim_size_ = var_first_dim_size_; + input_params.var_outer_dim_size_ = var_outer_dim_size_; + MultiThreadCompute(ComputeFtrl, &input_params, indices_size_); + + // assign results back to outputs. + auto ret = memcpy_s(var_out, outputs[kVarIndex]->size, var, inputs[kVarIndex]->size); + if (ret != EOK) { + MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', launch kernel error: memcpy failed. Error no: " << ret; + } + ret = memcpy_s(accum_out, outputs[kAccumIndex]->size, accum, inputs[kAccumIndex]->size); + if (ret != EOK) { + MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', launch kernel error: memcpy failed. Error no: " << ret; + } + ret = memcpy_s(linear_out, outputs[kLinearIndex]->size, linear, inputs[kLinearIndex]->size); + if (ret != EOK) { + MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', launch kernel error: memcpy failed. Error no: " << ret; + } + return true; +} + +MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, FusedSparseFtrl, FusedSparseFtrlCpuKernelMod); +MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, SparseApplyFtrl, SparseApplyFtrlCpuKernelMod); } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_ftrl_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_ftrl_cpu_kernel.h index b490dbca995..06cabde8db5 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_ftrl_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_ftrl_cpu_kernel.h @@ -25,11 +25,11 @@ namespace mindspore { namespace kernel { -class BACKEND_EXPORT SparseApplyFtrlCpuKernelMod : public SparseOptimizerCpuKernelMod, - public MatchKernelHelper { +class BACKEND_EXPORT FusedSparseFtrlCpuKernelMod : public SparseOptimizerCpuKernelMod, + public MatchKernelHelper { public: - SparseApplyFtrlCpuKernelMod() = default; - ~SparseApplyFtrlCpuKernelMod() override = default; + FusedSparseFtrlCpuKernelMod() = default; + ~FusedSparseFtrlCpuKernelMod() override = default; bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) override { @@ -62,6 +62,41 @@ class BACKEND_EXPORT SparseApplyFtrlCpuKernelMod : public SparseOptimizerCpuKern bool LaunchKernel(const std::vector &inputs, const std::vector &workspace, const std::vector &) const; }; + +class BACKEND_EXPORT SparseApplyFtrlCpuKernelMod : public SparseOptimizerCpuKernelMod, + public MatchKernelHelper { + public: + SparseApplyFtrlCpuKernelMod() = default; + ~SparseApplyFtrlCpuKernelMod() override = default; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override { + return kernel_func_(this, inputs, workspace, outputs); + } + + bool Init(const BaseOperatorPtr &base_operator, const std::vector &inputs, + const std::vector &outputs) override; + + int Resize(const BaseOperatorPtr &base_operator, const std::vector &inputs, + const std::vector &outputs, const std::map &) override; + + const std::vector> &GetFuncList() const override; + + protected: + std::vector GetOpSupport() override { return OpSupport(); } + void ResetResource() noexcept; + + protected: + float lr_{0.0}; + float l1_{0.0}; + float l2_{0.0}; + float lr_power_{0.0}; + + private: + template + bool LaunchKernel(const std::vector &inputs, const std::vector &workspace, + const std::vector &) const; +}; } // namespace kernel } // namespace mindspore diff --git a/mindspore/core/ops/fused_sparse_ftrl.h b/mindspore/core/ops/fused_sparse_ftrl.h index 169f377f930..f607983dd2a 100644 --- a/mindspore/core/ops/fused_sparse_ftrl.h +++ b/mindspore/core/ops/fused_sparse_ftrl.h @@ -28,7 +28,7 @@ namespace mindspore { namespace ops { constexpr auto kNameFusedSparseFtrl = "FusedSparseFtrl"; -/// \brief Softmax operation. Refer to Python API @ref mindspore.ops.Softmax for more details. +/// \brief FusedSparseFtrl operation. Refer to Python API @ref mindspore.ops.FusedSparseFtrl for more details. class MIND_API FusedSparseFtrl : public BaseOperator { public: MIND_API_BASE_MEMBER(FusedSparseFtrl); diff --git a/mindspore/core/ops/sparse_apply_ftrl.cc b/mindspore/core/ops/sparse_apply_ftrl.cc new file mode 100644 index 00000000000..cc3bfa33a16 --- /dev/null +++ b/mindspore/core/ops/sparse_apply_ftrl.cc @@ -0,0 +1,165 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "ops/sparse_apply_ftrl.h" +#include +#include +#include +#include "ops/op_utils.h" +#include "utils/check_convert_utils.h" +#include "abstract/ops/primitive_infer_map.h" +#include "mindapi/src/helper.h" + +namespace mindspore { +namespace ops { +namespace sparse_apply_ftrl { +// "var","accum","linear","grad","indices" +constexpr size_t kVarIndex = 0; +constexpr size_t kAccumIndex = 1; +constexpr size_t kLinearIndex = 2; +constexpr size_t kGradIndex = 3; +constexpr size_t kIndicesIndex = 4; +constexpr size_t kSparseApplyFtrlInputNum = 5; + +abstract::TupleShapePtr SparseApplyFtrlInferShape(const PrimitivePtr &primitive, + const std::vector &input_args) { + auto prim_name = primitive->name(); + // the output is useless, so we don't have to focus on the output shape, cannot return 1 + auto var_shape_r = input_args[kVarIndex]->Broaden()->BuildShape(); + auto accum_shape_r = input_args[kAccumIndex]->Broaden()->BuildShape(); + auto linear_shape_r = input_args[kLinearIndex]->Broaden()->BuildShape(); + auto outputs = std::make_shared( + std::vector({var_shape_r, accum_shape_r, linear_shape_r})); + for (auto &input : input_args) { + if (input->BuildShape()->IsDynamic()) { + return outputs; + } + } + auto var_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[kVarIndex]->BuildShape())[kShape]; + auto accum_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[kAccumIndex]->BuildShape())[kShape]; + auto linear_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[kLinearIndex]->BuildShape())[kShape]; + auto indices_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[kIndicesIndex]->BuildShape())[kShape]; + auto grad_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[kGradIndex]->BuildShape())[kShape]; + + (void)CheckAndConvertUtils::CheckValue("var shape", var_shape, kEqual, "accum shape", accum_shape, prim_name); + (void)CheckAndConvertUtils::CheckValue("var shape", var_shape, kEqual, "linear shape", linear_shape, prim_name); + // indices rank == 1 + (void)CheckAndConvertUtils::CheckInteger("indices rank", indices_shape.size(), kEqual, 1, prim_name); + // grad_shape[0] == indices_shape[0] + (void)CheckAndConvertUtils::CheckInteger("grad rank", grad_shape.size(), kGreaterEqual, 1, prim_name); + (void)CheckAndConvertUtils::CheckValue("grad_shape[0]", grad_shape[0], kEqual, "indices_shape[0]", indices_shape[0], + prim_name); + // grad_shape[1:] == var_shape[1:] while grad_shape[0] == indices_shape[0] + if (var_shape.size() > 1) { + auto left_shape = var_shape; + auto right_shape = grad_shape; + left_shape.erase(left_shape.begin()); + right_shape.erase(right_shape.begin()); + (void)CheckAndConvertUtils::CheckValue("var_shape[1:]", left_shape, kEqual, "grad_shape[1:]", right_shape, + prim_name); + } + return outputs; +} + +TypePtr SparseApplyFtrlInferType(const PrimitivePtr &prim, const std::vector &input_args) { + auto prim_name = prim->name(); + std::map types = {{"var", input_args[kVarIndex]->BuildType()}, + {"accum", input_args[kAccumIndex]->BuildType()}, + {"linear", input_args[kLinearIndex]->BuildType()}, + {"grad", input_args[kGradIndex]->BuildType()}}; + (void)CheckAndConvertUtils::CheckTensorTypeSame(types, {kFloat16, kFloat32}, prim_name); + + auto indices_dtype = input_args[kIndicesIndex]->BuildType(); + (void)CheckAndConvertUtils::CheckTensorTypeValid("indices", indices_dtype, {kInt32, kInt64}, prim_name); + + auto type = input_args[kVarIndex]->BuildType(); + return std::make_shared(std::vector{type, type, type}); +} +} // namespace sparse_apply_ftrl + +void SparseApplyFtrl::set_lr(float lr) { (void)this->AddAttr(kLr, api::MakeValue(lr)); } + +float SparseApplyFtrl::get_lr() const { + auto value_ptr = GetAttr(kLr); + return GetValue(value_ptr); +} + +void SparseApplyFtrl::set_l1(float l1) { (void)this->AddAttr(kL1, api::MakeValue(l1)); } + +float SparseApplyFtrl::get_l1() const { + auto value_ptr = GetAttr(kL1); + return GetValue(value_ptr); +} + +void SparseApplyFtrl::set_l2(float l2) { (void)this->AddAttr(kL2, api::MakeValue(l2)); } + +float SparseApplyFtrl::get_l2() const { + auto value_ptr = GetAttr(kL2); + return GetValue(value_ptr); +} + +void SparseApplyFtrl::set_lr_power(float lr_power) { (void)this->AddAttr(kLrPower, api::MakeValue(lr_power)); } + +float SparseApplyFtrl::get_lr_power() const { + auto value_ptr = GetAttr(kLrPower); + return GetValue(value_ptr); +} + +void SparseApplyFtrl::set_use_locking(bool use_locking) { + (void)this->AddAttr(kUseLocking, api::MakeValue(use_locking)); +} + +bool SparseApplyFtrl::get_use_locking() const { + auto value_ptr = GetAttr(kUseLocking); + return GetValue(value_ptr); +} + +void SparseApplyFtrl::Init(float lr, float l1, float l2, float lr_power, bool use_locking) { + set_lr(lr); + set_l1(l1); + set_l2(l2); + set_lr_power(lr_power); + set_use_locking(use_locking); +} + +MIND_API_OPERATOR_IMPL(SparseApplyFtrl, BaseOperator); +AbstractBasePtr SparseApplyFtrlInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, + const std::vector &input_args) { + MS_EXCEPTION_IF_NULL(primitive); + for (auto &item : input_args) { + MS_EXCEPTION_IF_NULL(item); + } + auto op_name = primitive->name(); + // float lr, float l1, float l2, float lr_power + auto lr = GetValue(primitive->GetAttr(kLr)); + auto l1 = GetValue(primitive->GetAttr(kL1)); + auto l2 = GetValue(primitive->GetAttr(kL2)); + auto lr_power = GetValue(primitive->GetAttr(kLrPower)); + + (void)CheckAndConvertUtils::CheckValue(kLr, lr, kGreaterThan, 0.0f, op_name); + (void)CheckAndConvertUtils::CheckValue(kL1, l1, kGreaterEqual, 0.0f, op_name); + (void)CheckAndConvertUtils::CheckValue(kL2, l2, kGreaterEqual, 0.0f, op_name); + (void)CheckAndConvertUtils::CheckValue(kLrPower, lr_power, kLessEqual, 0.0f, op_name); + + (void)CheckAndConvertUtils::CheckInteger("input numbers", SizeToLong(input_args.size()), kGreaterEqual, + sparse_apply_ftrl::kSparseApplyFtrlInputNum, op_name); + auto types = sparse_apply_ftrl::SparseApplyFtrlInferType(primitive, input_args); + auto shapes = sparse_apply_ftrl::SparseApplyFtrlInferShape(primitive, input_args); + return abstract::MakeAbstract(shapes, types); +} + +REGISTER_PRIMITIVE_EVAL_IMPL(SparseApplyFtrl, prim::kPrimSparseApplyFtrl, SparseApplyFtrlInfer, nullptr, true); +} // namespace ops +} // namespace mindspore diff --git a/mindspore/core/ops/sparse_apply_ftrl.h b/mindspore/core/ops/sparse_apply_ftrl.h new file mode 100644 index 00000000000..8ac4017937c --- /dev/null +++ b/mindspore/core/ops/sparse_apply_ftrl.h @@ -0,0 +1,82 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CORE_OPS_SPARSE_APPLY_FTRL_H_ +#define MINDSPORE_CORE_OPS_SPARSE_APPLY_FTRL_H_ + +#include +#include +#include +#include + +#include "ops/base_operator.h" +#include "mindapi/base/types.h" + +namespace mindspore { +namespace ops { +constexpr auto kNameSparseApplyFtrl = "SparseApplyFtrl"; +/// \brief SparseApplyFtrl operation. Refer to Python API @ref mindspore.ops.SparseApplyFtrl for more details. +class MIND_API SparseApplyFtrl : public BaseOperator { + public: + MIND_API_BASE_MEMBER(SparseApplyFtrl); + /// \brief Constructor. + SparseApplyFtrl() : BaseOperator(kNameSparseApplyFtrl) { + InitIOName({"var", "accum", "linear", "grad", "indices"}, {"var", "accum", "linear"}); + } + /// \brief Init. Refer to the parameters of Python API @ref mindspore.ops.SparseApplyFtrl for the inputs. + void Init(float lr, float l1, float l2, float lr_power, bool use_locking = false); + /// \brief Set lr. + void set_lr(float lr); + /// \brief Get lr. + /// + /// \return lr. + float get_lr() const; + + /// \brief Set l1. + void set_l1(float l1); + /// \brief Get l1. + /// + /// \return l1. + float get_l1() const; + + /// \brief Set l2. + void set_l2(float l2); + /// \brief Get l2. + /// + /// \return l2. + float get_l2() const; + + /// \brief Set lr_power. + void set_lr_power(float lr_power); + /// \brief Get lr_power. + /// + /// \return lr_power. + float get_lr_power() const; + + /// \brief Set use_locking. + void set_use_locking(bool use_locking); + /// \brief Get use_locking. + /// + /// \return use_locking. + bool get_use_locking() const; +}; + +abstract::AbstractBasePtr SparseApplyFtrlInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, + const std::vector &input_args); +} // namespace ops +} // namespace mindspore + +#endif // MINDSPORE_CORE_OPS_SPARSE_APPLY_FTRL_H_ diff --git a/mindspore/python/mindspore/ops/operations/nn_ops.py b/mindspore/python/mindspore/ops/operations/nn_ops.py index 819f336253e..594e0c3f54a 100644 --- a/mindspore/python/mindspore/ops/operations/nn_ops.py +++ b/mindspore/python/mindspore/ops/operations/nn_ops.py @@ -6700,7 +6700,7 @@ class SparseApplyFtrl(PrimitiveWithCheck): RuntimeError: If the data type of all of inputs except `indices` conversion of Parameter is not supported. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> class SparseApplyFtrlNet(nn.Cell): diff --git a/tests/st/ops/cpu/test_sparse_apply_ftrl_op.py b/tests/st/ops/cpu/test_sparse_apply_ftrl_op.py index 3ab40fe5017..b9d4617fee6 100644 --- a/tests/st/ops/cpu/test_sparse_apply_ftrl_op.py +++ b/tests/st/ops/cpu/test_sparse_apply_ftrl_op.py @@ -49,6 +49,19 @@ class TestNet(nn.Cell): return out +class SparseApplyFtrlNet(nn.Cell): + def __init__(self, var, accum, linear, lr=0.001, l1=0.0, l2=0.0, lr_power=-0.5): + super(SparseApplyFtrlNet, self).__init__() + self.sparse_apply_ftrl = P.SparseApplyFtrl(lr=lr, l1=l1, l2=l2, lr_power=lr_power) + self.var = Parameter(var, name="var") + self.accum = Parameter(accum, name="accum") + self.linear = Parameter(linear, name="linear") + + def construct(self, grad, indices): + out = self.sparse_apply_ftrl(self.var, self.accum, self.linear, grad, indices) + return out + + @pytest.mark.level0 @pytest.mark.platform_x86_cpu @pytest.mark.env_onecard @@ -340,3 +353,59 @@ def test_fused_sparse_ftrl_dynamic(): [[-0.00598256, -0.00598256]], [[1., 1.]]]).astype(np.float32) assert np.allclose(net.var.data.asnumpy(), expect_var) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_sparse_apply_ftrl(): + """ + Feature: SparseApplyFtrl + Description: normal params, attr and input + Expectation: the result meet expectation + """ + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + + grad_np = np.ones([3, 3, 3]) + indice_np = [0, 1, 2] + var_np = np.ones([3, 3, 3]) + accum_np = np.ones([3, 3, 3]) + linear_np = np.ones([3, 3, 3]) + + # test1: var/accum/linear/gradient are float32 and indices is int32. + gradient = Tensor(grad_np, dtype=mstype.float32) + indices = Tensor(indice_np, dtype=mstype.int32) + var = Tensor(var_np, dtype=mstype.float32) + accum = Tensor(accum_np, dtype=mstype.float32) + linear = Tensor(linear_np, dtype=mstype.float32) + sparse_apply_ftrl = SparseApplyFtrlNet(var, accum, linear) + out = sparse_apply_ftrl(gradient, indices) + expect_var = np.array([[[0.291479, 0.291479, 0.291479], + [0.291479, 0.291479, 0.291479], + [0.291479, 0.291479, 0.291479]], + [[0.291479, 0.291479, 0.291479], + [0.291479, 0.291479, 0.291479], + [0.291479, 0.291479, 0.291479]], + [[0.291479, 0.291479, 0.291479], + [0.291479, 0.291479, 0.291479], + [0.291479, 0.291479, 0.291479]]]).astype(np.float32) + assert np.all(out[0].asnumpy() == expect_var) + + # test2: var/accum/linear/gradient are float16 and indices is int32. + gradient = Tensor(grad_np, dtype=mstype.float16) + indices = Tensor(indice_np, dtype=mstype.int32) + var = Tensor(var_np, dtype=mstype.float16) + accum = Tensor(accum_np, dtype=mstype.float16) + linear = Tensor(linear_np, dtype=mstype.float16) + sparse_apply_ftrl = SparseApplyFtrlNet(var, accum, linear) + out = sparse_apply_ftrl(gradient, indices) + expect_var = np.array([[[0.2915, 0.2915, 0.2915], + [0.2915, 0.2915, 0.2915], + [0.2915, 0.2915, 0.2915]], + [[0.2915, 0.2915, 0.2915], + [0.2915, 0.2915, 0.2915], + [0.2915, 0.2915, 0.2915]], + [[0.2915, 0.2915, 0.2915], + [0.2915, 0.2915, 0.2915], + [0.2915, 0.2915, 0.2915]]]).astype(np.float16) + assert np.all(out[0].asnumpy() == expect_var) diff --git a/tests/ut/cpp/kernel/cpu/sparse_apply_ftrl_cpu_kernel_test.cc b/tests/ut/cpp/kernel/cpu/sparse_apply_ftrl_cpu_kernel_test.cc index ddca3bb4ade..c6f05e247a0 100644 --- a/tests/ut/cpp/kernel/cpu/sparse_apply_ftrl_cpu_kernel_test.cc +++ b/tests/ut/cpp/kernel/cpu/sparse_apply_ftrl_cpu_kernel_test.cc @@ -25,9 +25,9 @@ namespace mindspore { namespace kernel { -class SparseApplyFtrlCpuKernelTest : public UT::Common { +class FusedSparseFtrlCpuKernelTest : public UT::Common { public: - SparseApplyFtrlCpuKernelTest() : sparse_ftrl_(std::make_shared()) {} + FusedSparseFtrlCpuKernelTest() : sparse_ftrl_(std::make_shared()) {} void SetUp() override { sparse_ftrl_->lr_ = 0.001; @@ -104,10 +104,13 @@ class SparseApplyFtrlCpuKernelTest : public UT::Common { std::vector outputs_; std::vector kernel_tensor_inputs_; std::vector kernel_tensor_outputs_; - std::shared_ptr sparse_ftrl_; + std::shared_ptr sparse_ftrl_; }; -TEST_F(SparseApplyFtrlCpuKernelTest, dense_test) { +/// Feature: FusedSparseFtrl +/// Description: Run FusedSparseFtrl +/// Expectation: pass +TEST_F(FusedSparseFtrlCpuKernelTest, dense_test) { for (size_t i = 0; i < 3 * 3 * 3; ++i) { var_.push_back(1.0); accum_.push_back(1.0); @@ -136,7 +139,10 @@ TEST_F(SparseApplyFtrlCpuKernelTest, dense_test) { } } -TEST_F(SparseApplyFtrlCpuKernelTest, sparse_test1) { +/// Feature: FusedSparseFtrl +/// Description: Run FusedSparseFtrl +/// Expectation: pass +TEST_F(FusedSparseFtrlCpuKernelTest, sparse_test1) { for (size_t i = 0; i < 3 * 3 * 3; ++i) { var_.push_back(1.0); accum_.push_back(1.0); @@ -173,7 +179,10 @@ TEST_F(SparseApplyFtrlCpuKernelTest, sparse_test1) { } } -TEST_F(SparseApplyFtrlCpuKernelTest, sparse_test2) { +/// Feature: FusedSparseFtrl +/// Description: Run FusedSparseFtrl +/// Expectation: pass +TEST_F(FusedSparseFtrlCpuKernelTest, sparse_test2) { for (size_t i = 0; i < 3 * 3 * 3; ++i) { var_.push_back(1.0); accum_.push_back(1.0);