Add SparseApplyFtrl cpu op
This commit is contained in:
parent
97c7d50da1
commit
4ecbbb7b08
|
@ -21,6 +21,7 @@
|
|||
#include "kernel/common_utils.h"
|
||||
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
|
||||
#include "ops/fused_sparse_ftrl.h"
|
||||
#include "ops/sparse_apply_ftrl.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
@ -36,6 +37,7 @@ constexpr size_t kSparseApplyFtrlWorkspaceSize = 4;
|
|||
constexpr char kKernelName[] = "SparseApplyFtrl";
|
||||
|
||||
using KernelRunFunc = SparseApplyFtrlCpuKernelMod::KernelRunFunc;
|
||||
using FusedKernelRunFunc = FusedSparseFtrlCpuKernelMod::KernelRunFunc;
|
||||
|
||||
template <typename T>
|
||||
void ComputeFtrl(MultiThreadComputeParams<T> *input_params, size_t start, size_t end) {
|
||||
|
@ -53,8 +55,8 @@ void ComputeFtrl(MultiThreadComputeParams<T> *input_params, size_t start, size_t
|
|||
for (size_t i = start; i < end; ++i) {
|
||||
T index = unique_sparse_grad.indices_[i];
|
||||
if (index < 0 || LongToSize(index) >= var_first_dim_size) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kKernelName << "', each element in 'indices' must be in range [0, "
|
||||
<< SizeToLong(var_first_dim_size) << "), but got " << index;
|
||||
MS_LOG(ERROR) << "For '" << kKernelName << "', each element in 'indices' must be in range [0, "
|
||||
<< SizeToLong(var_first_dim_size) << "), but got " << index;
|
||||
}
|
||||
size_t start_index = var_outer_dim_size * static_cast<size_t>(index);
|
||||
size_t end_index = start_index + var_outer_dim_size;
|
||||
|
@ -80,14 +82,14 @@ void ComputeFtrl(MultiThreadComputeParams<T> *input_params, size_t start, size_t
|
|||
} // namespace
|
||||
|
||||
template <typename T>
|
||||
void SparseApplyFtrlCpuKernelMod::InitWorkspaceSize() {
|
||||
void FusedSparseFtrlCpuKernelMod::InitWorkspaceSize() {
|
||||
(void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
|
||||
(void)workspace_size_list_.emplace_back(indices_size_ * sizeof(T));
|
||||
(void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
|
||||
(void)workspace_size_list_.emplace_back(indices_size_ * sizeof(T));
|
||||
}
|
||||
|
||||
bool SparseApplyFtrlCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
bool FusedSparseFtrlCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) {
|
||||
kernel_name_ = base_operator->name();
|
||||
if (inputs.empty() || outputs.empty()) {
|
||||
|
@ -126,7 +128,7 @@ bool SparseApplyFtrlCpuKernelMod::Init(const BaseOperatorPtr &base_operator, con
|
|||
return true;
|
||||
}
|
||||
|
||||
void SparseApplyFtrlCpuKernelMod::ResetResource() noexcept {
|
||||
void FusedSparseFtrlCpuKernelMod::ResetResource() noexcept {
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
|
@ -136,7 +138,7 @@ void SparseApplyFtrlCpuKernelMod::ResetResource() noexcept {
|
|||
var_outer_dim_size_ = 1;
|
||||
}
|
||||
|
||||
int SparseApplyFtrlCpuKernelMod::Resize(const BaseOperatorPtr &base_operator,
|
||||
int FusedSparseFtrlCpuKernelMod::Resize(const BaseOperatorPtr &base_operator,
|
||||
const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &) {
|
||||
|
@ -211,8 +213,8 @@ int SparseApplyFtrlCpuKernelMod::Resize(const BaseOperatorPtr &base_operator,
|
|||
return KRET_OK;
|
||||
}
|
||||
|
||||
const std::vector<std::pair<KernelAttr, KernelRunFunc>> &SparseApplyFtrlCpuKernelMod::GetFuncList() const {
|
||||
static const std::vector<std::pair<KernelAttr, KernelRunFunc>> func_list = {
|
||||
const std::vector<std::pair<KernelAttr, FusedKernelRunFunc>> &FusedSparseFtrlCpuKernelMod::GetFuncList() const {
|
||||
static const std::vector<std::pair<KernelAttr, FusedKernelRunFunc>> func_list = {
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
|
@ -223,7 +225,7 @@ const std::vector<std::pair<KernelAttr, KernelRunFunc>> &SparseApplyFtrlCpuKerne
|
|||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutInRef(0, 0),
|
||||
&SparseApplyFtrlCpuKernelMod::LaunchKernel<int>},
|
||||
&FusedSparseFtrlCpuKernelMod::LaunchKernel<int>},
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
|
@ -234,12 +236,12 @@ const std::vector<std::pair<KernelAttr, KernelRunFunc>> &SparseApplyFtrlCpuKerne
|
|||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutInRef(0, 0),
|
||||
&SparseApplyFtrlCpuKernelMod::LaunchKernel<int64_t>}};
|
||||
&FusedSparseFtrlCpuKernelMod::LaunchKernel<int64_t>}};
|
||||
return func_list;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool SparseApplyFtrlCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
|
||||
bool FusedSparseFtrlCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &workspace,
|
||||
const std::vector<kernel::AddressPtr> &) const {
|
||||
auto *var = reinterpret_cast<float *>(inputs[0]->addr);
|
||||
|
@ -278,6 +280,189 @@ bool SparseApplyFtrlCpuKernelMod::LaunchKernel(const std::vector<kernel::Address
|
|||
return true;
|
||||
}
|
||||
|
||||
MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, FusedSparseFtrl, SparseApplyFtrlCpuKernelMod);
|
||||
bool SparseApplyFtrlCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) {
|
||||
kernel_name_ = base_operator->name();
|
||||
if (inputs.empty() || outputs.empty()) {
|
||||
MS_LOG(ERROR) << "For '" << kernel_name_ << "', it got empty inputs or outputs, which is invalid.";
|
||||
return false;
|
||||
}
|
||||
if (inputs.size() != kSparseApplyFtrlInputsNum) {
|
||||
MS_LOG(ERROR) << "For '" << kernel_name_ << "', input size must be " << kSparseApplyFtrlInputsNum << ", but got "
|
||||
<< inputs.size();
|
||||
return false;
|
||||
}
|
||||
auto kernel_ptr = std::make_shared<ops::SparseApplyFtrl>(base_operator->GetPrim());
|
||||
lr_ = kernel_ptr->get_lr();
|
||||
if (lr_ <= 0) {
|
||||
MS_LOG(ERROR) << "For '" << kernel_name_ << "', 'lr' must be a positive scalar, but got " << lr_;
|
||||
return false;
|
||||
}
|
||||
l1_ = kernel_ptr->get_l1();
|
||||
if (l1_ < 0) {
|
||||
MS_LOG(ERROR) << "For '" << kernel_name_ << "', 'l1' must be a non-negative scalar, but got " << l1_;
|
||||
return false;
|
||||
}
|
||||
l2_ = kernel_ptr->get_l2();
|
||||
if (l2_ < 0) {
|
||||
MS_LOG(ERROR) << "For '" << kernel_name_ << "', 'l2' must be a non-negative scalar, but got " << l2_;
|
||||
return false;
|
||||
}
|
||||
lr_power_ = kernel_ptr->get_lr_power();
|
||||
if (lr_power_ > 0) {
|
||||
MS_LOG(ERROR) << "For '" << kernel_name_ << "', 'lr_power' must be a non-negative scalar, but got " << lr_power_;
|
||||
return false;
|
||||
}
|
||||
if (!MatchKernelFunc(base_operator, inputs, outputs)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void SparseApplyFtrlCpuKernelMod::ResetResource() noexcept {
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
indices_data_type_ = kNumberTypeInt32;
|
||||
indices_size_ = 0;
|
||||
var_first_dim_size_ = 0;
|
||||
var_outer_dim_size_ = 1;
|
||||
}
|
||||
|
||||
int SparseApplyFtrlCpuKernelMod::Resize(const BaseOperatorPtr &base_operator,
|
||||
const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &) {
|
||||
ResetResource();
|
||||
int ret = KernelMod::Resize(base_operator, inputs, outputs);
|
||||
if (ret != KRET_OK) {
|
||||
return ret;
|
||||
}
|
||||
ShapeVector var_shape = inputs[kVarIndex]->GetShapeVector();
|
||||
ShapeVector accum_shape = inputs[kAccumIndex]->GetShapeVector();
|
||||
ShapeVector linear_shape = inputs[kLinearIndex]->GetShapeVector();
|
||||
ShapeVector grad_shape = inputs[kGradIndex]->GetShapeVector();
|
||||
ShapeVector indices_shape = inputs[kIndicesIndex]->GetShapeVector();
|
||||
if (var_shape.empty()) {
|
||||
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the 'var' must be at least 1-D, but got scalar or None.";
|
||||
return KRET_RESIZE_FAILED;
|
||||
}
|
||||
if (!IsSameShape(var_shape, accum_shape)) {
|
||||
MS_LOG(ERROR) << "For '" << kernel_name_
|
||||
<< "', the shape of 'accum' must be the same as the shape of 'var', "
|
||||
"but got the shape of 'accum': "
|
||||
<< Vector2Str(accum_shape) << " and the shape of 'var': " << Vector2Str(var_shape);
|
||||
return KRET_RESIZE_FAILED;
|
||||
}
|
||||
if (!IsSameShape(var_shape, linear_shape)) {
|
||||
MS_LOG(ERROR) << "For '" << kernel_name_
|
||||
<< "', the shape of 'linear' must be the same as the shape of 'var', "
|
||||
"but got the shape of 'linear': "
|
||||
<< Vector2Str(linear_shape) << " and the shape of 'var': " << Vector2Str(var_shape);
|
||||
return KRET_RESIZE_FAILED;
|
||||
}
|
||||
if (var_shape.size() != grad_shape.size()) {
|
||||
MS_LOG(ERROR) << "For '" << kernel_name_
|
||||
<< "', the dimension of 'grad' must be the same as the dimension of "
|
||||
"'var', but got the dimension of 'grad': "
|
||||
<< grad_shape.size() << " and the dimension of 'var': " << var_shape.size() << ".";
|
||||
return KRET_RESIZE_FAILED;
|
||||
}
|
||||
var_first_dim_size_ = var_shape[0];
|
||||
for (size_t i = 1; i < var_shape.size(); ++i) {
|
||||
if (var_shape[i] != grad_shape[i]) {
|
||||
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the shape of 'var' and 'grad' must be equal in dimension i=" << i
|
||||
<< ", but got 'var_shape[i]': " << var_shape[i] << " and 'grad_shape[i]': " << grad_shape[i];
|
||||
return KRET_RESIZE_FAILED;
|
||||
}
|
||||
var_outer_dim_size_ *= var_shape[i];
|
||||
}
|
||||
if (indices_shape.size() != 1) {
|
||||
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the 'indices' must be a 1-D vector, but got "
|
||||
<< indices_shape.size() << "-D.";
|
||||
return KRET_RESIZE_FAILED;
|
||||
}
|
||||
indices_size_ = indices_shape[0];
|
||||
if (grad_shape[0] != SizeToLong(indices_size_)) {
|
||||
MS_LOG(ERROR) << "For '" << kernel_name_
|
||||
<< "', the first dimension value of 'grad' must be equal to "
|
||||
"the first dimension value of 'indices', but got the first dimension value of 'grad': "
|
||||
<< grad_shape[0] << ", and the first dimension value of 'indices': " << indices_size_;
|
||||
return KRET_RESIZE_FAILED;
|
||||
}
|
||||
return KRET_OK;
|
||||
}
|
||||
|
||||
const std::vector<std::pair<KernelAttr, KernelRunFunc>> &SparseApplyFtrlCpuKernelMod::GetFuncList() const {
|
||||
static const std::vector<std::pair<KernelAttr, KernelRunFunc>> func_list = {
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutInRef(0, 0),
|
||||
&SparseApplyFtrlCpuKernelMod::LaunchKernel<float, int>},
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutInRef(0, 0),
|
||||
&SparseApplyFtrlCpuKernelMod::LaunchKernel<float, int64_t>}};
|
||||
return func_list;
|
||||
}
|
||||
|
||||
template <typename T, typename S>
|
||||
bool SparseApplyFtrlCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &workspace,
|
||||
const std::vector<kernel::AddressPtr> &outputs) const {
|
||||
auto *var = reinterpret_cast<T *>(inputs[kVarIndex]->addr);
|
||||
auto *accum = reinterpret_cast<T *>(inputs[kAccumIndex]->addr);
|
||||
auto *linear = reinterpret_cast<T *>(inputs[kLinearIndex]->addr);
|
||||
auto *grad = reinterpret_cast<T *>(inputs[kGradIndex]->addr);
|
||||
auto *indices = reinterpret_cast<S *>(inputs[kIndicesIndex]->addr);
|
||||
auto *var_out = reinterpret_cast<T *>(outputs[kVarIndex]->addr);
|
||||
auto *accum_out = reinterpret_cast<T *>(outputs[kAccumIndex]->addr);
|
||||
auto *linear_out = reinterpret_cast<T *>(outputs[kLinearIndex]->addr);
|
||||
|
||||
SparseGradient<S> input_sparse_grad({grad, indices, indices_size_});
|
||||
MultiThreadComputeParams<S> input_params;
|
||||
input_params.var_ = var;
|
||||
input_params.accum_ = accum;
|
||||
input_params.linear_ = linear;
|
||||
input_params.lr_ = lr_;
|
||||
input_params.l1_ = l1_;
|
||||
input_params.l2_ = l2_;
|
||||
input_params.lr_power_ = lr_power_;
|
||||
input_params.sparse_grad_ = input_sparse_grad;
|
||||
input_params.var_first_dim_size_ = var_first_dim_size_;
|
||||
input_params.var_outer_dim_size_ = var_outer_dim_size_;
|
||||
MultiThreadCompute<S>(ComputeFtrl<S>, &input_params, indices_size_);
|
||||
|
||||
// assign results back to outputs.
|
||||
auto ret = memcpy_s(var_out, outputs[kVarIndex]->size, var, inputs[kVarIndex]->size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', launch kernel error: memcpy failed. Error no: " << ret;
|
||||
}
|
||||
ret = memcpy_s(accum_out, outputs[kAccumIndex]->size, accum, inputs[kAccumIndex]->size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', launch kernel error: memcpy failed. Error no: " << ret;
|
||||
}
|
||||
ret = memcpy_s(linear_out, outputs[kLinearIndex]->size, linear, inputs[kLinearIndex]->size);
|
||||
if (ret != EOK) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', launch kernel error: memcpy failed. Error no: " << ret;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, FusedSparseFtrl, FusedSparseFtrlCpuKernelMod);
|
||||
MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, SparseApplyFtrl, SparseApplyFtrlCpuKernelMod);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -25,11 +25,11 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class BACKEND_EXPORT SparseApplyFtrlCpuKernelMod : public SparseOptimizerCpuKernelMod,
|
||||
public MatchKernelHelper<SparseApplyFtrlCpuKernelMod> {
|
||||
class BACKEND_EXPORT FusedSparseFtrlCpuKernelMod : public SparseOptimizerCpuKernelMod,
|
||||
public MatchKernelHelper<FusedSparseFtrlCpuKernelMod> {
|
||||
public:
|
||||
SparseApplyFtrlCpuKernelMod() = default;
|
||||
~SparseApplyFtrlCpuKernelMod() override = default;
|
||||
FusedSparseFtrlCpuKernelMod() = default;
|
||||
~FusedSparseFtrlCpuKernelMod() override = default;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override {
|
||||
|
@ -62,6 +62,41 @@ class BACKEND_EXPORT SparseApplyFtrlCpuKernelMod : public SparseOptimizerCpuKern
|
|||
bool LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &workspace,
|
||||
const std::vector<kernel::AddressPtr> &) const;
|
||||
};
|
||||
|
||||
class BACKEND_EXPORT SparseApplyFtrlCpuKernelMod : public SparseOptimizerCpuKernelMod,
|
||||
public MatchKernelHelper<SparseApplyFtrlCpuKernelMod> {
|
||||
public:
|
||||
SparseApplyFtrlCpuKernelMod() = default;
|
||||
~SparseApplyFtrlCpuKernelMod() override = default;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override {
|
||||
return kernel_func_(this, inputs, workspace, outputs);
|
||||
}
|
||||
|
||||
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) override;
|
||||
|
||||
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
|
||||
|
||||
const std::vector<std::pair<KernelAttr, KernelRunFunc>> &GetFuncList() const override;
|
||||
|
||||
protected:
|
||||
std::vector<KernelAttr> GetOpSupport() override { return OpSupport(); }
|
||||
void ResetResource() noexcept;
|
||||
|
||||
protected:
|
||||
float lr_{0.0};
|
||||
float l1_{0.0};
|
||||
float l2_{0.0};
|
||||
float lr_power_{0.0};
|
||||
|
||||
private:
|
||||
template <typename T, typename S>
|
||||
bool LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &workspace,
|
||||
const std::vector<kernel::AddressPtr> &) const;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
namespace mindspore {
|
||||
namespace ops {
|
||||
constexpr auto kNameFusedSparseFtrl = "FusedSparseFtrl";
|
||||
/// \brief Softmax operation. Refer to Python API @ref mindspore.ops.Softmax for more details.
|
||||
/// \brief FusedSparseFtrl operation. Refer to Python API @ref mindspore.ops.FusedSparseFtrl for more details.
|
||||
class MIND_API FusedSparseFtrl : public BaseOperator {
|
||||
public:
|
||||
MIND_API_BASE_MEMBER(FusedSparseFtrl);
|
||||
|
|
|
@ -0,0 +1,165 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "ops/sparse_apply_ftrl.h"
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "ops/op_utils.h"
|
||||
#include "utils/check_convert_utils.h"
|
||||
#include "abstract/ops/primitive_infer_map.h"
|
||||
#include "mindapi/src/helper.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ops {
|
||||
namespace sparse_apply_ftrl {
|
||||
// "var","accum","linear","grad","indices"
|
||||
constexpr size_t kVarIndex = 0;
|
||||
constexpr size_t kAccumIndex = 1;
|
||||
constexpr size_t kLinearIndex = 2;
|
||||
constexpr size_t kGradIndex = 3;
|
||||
constexpr size_t kIndicesIndex = 4;
|
||||
constexpr size_t kSparseApplyFtrlInputNum = 5;
|
||||
|
||||
abstract::TupleShapePtr SparseApplyFtrlInferShape(const PrimitivePtr &primitive,
|
||||
const std::vector<AbstractBasePtr> &input_args) {
|
||||
auto prim_name = primitive->name();
|
||||
// the output is useless, so we don't have to focus on the output shape, cannot return 1
|
||||
auto var_shape_r = input_args[kVarIndex]->Broaden()->BuildShape();
|
||||
auto accum_shape_r = input_args[kAccumIndex]->Broaden()->BuildShape();
|
||||
auto linear_shape_r = input_args[kLinearIndex]->Broaden()->BuildShape();
|
||||
auto outputs = std::make_shared<abstract::TupleShape>(
|
||||
std::vector<abstract::BaseShapePtr>({var_shape_r, accum_shape_r, linear_shape_r}));
|
||||
for (auto &input : input_args) {
|
||||
if (input->BuildShape()->IsDynamic()) {
|
||||
return outputs;
|
||||
}
|
||||
}
|
||||
auto var_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[kVarIndex]->BuildShape())[kShape];
|
||||
auto accum_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[kAccumIndex]->BuildShape())[kShape];
|
||||
auto linear_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[kLinearIndex]->BuildShape())[kShape];
|
||||
auto indices_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[kIndicesIndex]->BuildShape())[kShape];
|
||||
auto grad_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[kGradIndex]->BuildShape())[kShape];
|
||||
|
||||
(void)CheckAndConvertUtils::CheckValue("var shape", var_shape, kEqual, "accum shape", accum_shape, prim_name);
|
||||
(void)CheckAndConvertUtils::CheckValue("var shape", var_shape, kEqual, "linear shape", linear_shape, prim_name);
|
||||
// indices rank == 1
|
||||
(void)CheckAndConvertUtils::CheckInteger("indices rank", indices_shape.size(), kEqual, 1, prim_name);
|
||||
// grad_shape[0] == indices_shape[0]
|
||||
(void)CheckAndConvertUtils::CheckInteger("grad rank", grad_shape.size(), kGreaterEqual, 1, prim_name);
|
||||
(void)CheckAndConvertUtils::CheckValue("grad_shape[0]", grad_shape[0], kEqual, "indices_shape[0]", indices_shape[0],
|
||||
prim_name);
|
||||
// grad_shape[1:] == var_shape[1:] while grad_shape[0] == indices_shape[0]
|
||||
if (var_shape.size() > 1) {
|
||||
auto left_shape = var_shape;
|
||||
auto right_shape = grad_shape;
|
||||
left_shape.erase(left_shape.begin());
|
||||
right_shape.erase(right_shape.begin());
|
||||
(void)CheckAndConvertUtils::CheckValue("var_shape[1:]", left_shape, kEqual, "grad_shape[1:]", right_shape,
|
||||
prim_name);
|
||||
}
|
||||
return outputs;
|
||||
}
|
||||
|
||||
TypePtr SparseApplyFtrlInferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
|
||||
auto prim_name = prim->name();
|
||||
std::map<std::string, TypePtr> types = {{"var", input_args[kVarIndex]->BuildType()},
|
||||
{"accum", input_args[kAccumIndex]->BuildType()},
|
||||
{"linear", input_args[kLinearIndex]->BuildType()},
|
||||
{"grad", input_args[kGradIndex]->BuildType()}};
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeSame(types, {kFloat16, kFloat32}, prim_name);
|
||||
|
||||
auto indices_dtype = input_args[kIndicesIndex]->BuildType();
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeValid("indices", indices_dtype, {kInt32, kInt64}, prim_name);
|
||||
|
||||
auto type = input_args[kVarIndex]->BuildType();
|
||||
return std::make_shared<Tuple>(std::vector<TypePtr>{type, type, type});
|
||||
}
|
||||
} // namespace sparse_apply_ftrl
|
||||
|
||||
void SparseApplyFtrl::set_lr(float lr) { (void)this->AddAttr(kLr, api::MakeValue(lr)); }
|
||||
|
||||
float SparseApplyFtrl::get_lr() const {
|
||||
auto value_ptr = GetAttr(kLr);
|
||||
return GetValue<float>(value_ptr);
|
||||
}
|
||||
|
||||
void SparseApplyFtrl::set_l1(float l1) { (void)this->AddAttr(kL1, api::MakeValue(l1)); }
|
||||
|
||||
float SparseApplyFtrl::get_l1() const {
|
||||
auto value_ptr = GetAttr(kL1);
|
||||
return GetValue<float>(value_ptr);
|
||||
}
|
||||
|
||||
void SparseApplyFtrl::set_l2(float l2) { (void)this->AddAttr(kL2, api::MakeValue(l2)); }
|
||||
|
||||
float SparseApplyFtrl::get_l2() const {
|
||||
auto value_ptr = GetAttr(kL2);
|
||||
return GetValue<float>(value_ptr);
|
||||
}
|
||||
|
||||
void SparseApplyFtrl::set_lr_power(float lr_power) { (void)this->AddAttr(kLrPower, api::MakeValue(lr_power)); }
|
||||
|
||||
float SparseApplyFtrl::get_lr_power() const {
|
||||
auto value_ptr = GetAttr(kLrPower);
|
||||
return GetValue<float>(value_ptr);
|
||||
}
|
||||
|
||||
void SparseApplyFtrl::set_use_locking(bool use_locking) {
|
||||
(void)this->AddAttr(kUseLocking, api::MakeValue(use_locking));
|
||||
}
|
||||
|
||||
bool SparseApplyFtrl::get_use_locking() const {
|
||||
auto value_ptr = GetAttr(kUseLocking);
|
||||
return GetValue<bool>(value_ptr);
|
||||
}
|
||||
|
||||
void SparseApplyFtrl::Init(float lr, float l1, float l2, float lr_power, bool use_locking) {
|
||||
set_lr(lr);
|
||||
set_l1(l1);
|
||||
set_l2(l2);
|
||||
set_lr_power(lr_power);
|
||||
set_use_locking(use_locking);
|
||||
}
|
||||
|
||||
MIND_API_OPERATOR_IMPL(SparseApplyFtrl, BaseOperator);
|
||||
AbstractBasePtr SparseApplyFtrlInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||
const std::vector<AbstractBasePtr> &input_args) {
|
||||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
for (auto &item : input_args) {
|
||||
MS_EXCEPTION_IF_NULL(item);
|
||||
}
|
||||
auto op_name = primitive->name();
|
||||
// float lr, float l1, float l2, float lr_power
|
||||
auto lr = GetValue<float>(primitive->GetAttr(kLr));
|
||||
auto l1 = GetValue<float>(primitive->GetAttr(kL1));
|
||||
auto l2 = GetValue<float>(primitive->GetAttr(kL2));
|
||||
auto lr_power = GetValue<float>(primitive->GetAttr(kLrPower));
|
||||
|
||||
(void)CheckAndConvertUtils::CheckValue(kLr, lr, kGreaterThan, 0.0f, op_name);
|
||||
(void)CheckAndConvertUtils::CheckValue(kL1, l1, kGreaterEqual, 0.0f, op_name);
|
||||
(void)CheckAndConvertUtils::CheckValue(kL2, l2, kGreaterEqual, 0.0f, op_name);
|
||||
(void)CheckAndConvertUtils::CheckValue(kLrPower, lr_power, kLessEqual, 0.0f, op_name);
|
||||
|
||||
(void)CheckAndConvertUtils::CheckInteger("input numbers", SizeToLong(input_args.size()), kGreaterEqual,
|
||||
sparse_apply_ftrl::kSparseApplyFtrlInputNum, op_name);
|
||||
auto types = sparse_apply_ftrl::SparseApplyFtrlInferType(primitive, input_args);
|
||||
auto shapes = sparse_apply_ftrl::SparseApplyFtrlInferShape(primitive, input_args);
|
||||
return abstract::MakeAbstract(shapes, types);
|
||||
}
|
||||
|
||||
REGISTER_PRIMITIVE_EVAL_IMPL(SparseApplyFtrl, prim::kPrimSparseApplyFtrl, SparseApplyFtrlInfer, nullptr, true);
|
||||
} // namespace ops
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,82 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CORE_OPS_SPARSE_APPLY_FTRL_H_
|
||||
#define MINDSPORE_CORE_OPS_SPARSE_APPLY_FTRL_H_
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
#include "ops/base_operator.h"
|
||||
#include "mindapi/base/types.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ops {
|
||||
constexpr auto kNameSparseApplyFtrl = "SparseApplyFtrl";
|
||||
/// \brief SparseApplyFtrl operation. Refer to Python API @ref mindspore.ops.SparseApplyFtrl for more details.
|
||||
class MIND_API SparseApplyFtrl : public BaseOperator {
|
||||
public:
|
||||
MIND_API_BASE_MEMBER(SparseApplyFtrl);
|
||||
/// \brief Constructor.
|
||||
SparseApplyFtrl() : BaseOperator(kNameSparseApplyFtrl) {
|
||||
InitIOName({"var", "accum", "linear", "grad", "indices"}, {"var", "accum", "linear"});
|
||||
}
|
||||
/// \brief Init. Refer to the parameters of Python API @ref mindspore.ops.SparseApplyFtrl for the inputs.
|
||||
void Init(float lr, float l1, float l2, float lr_power, bool use_locking = false);
|
||||
/// \brief Set lr.
|
||||
void set_lr(float lr);
|
||||
/// \brief Get lr.
|
||||
///
|
||||
/// \return lr.
|
||||
float get_lr() const;
|
||||
|
||||
/// \brief Set l1.
|
||||
void set_l1(float l1);
|
||||
/// \brief Get l1.
|
||||
///
|
||||
/// \return l1.
|
||||
float get_l1() const;
|
||||
|
||||
/// \brief Set l2.
|
||||
void set_l2(float l2);
|
||||
/// \brief Get l2.
|
||||
///
|
||||
/// \return l2.
|
||||
float get_l2() const;
|
||||
|
||||
/// \brief Set lr_power.
|
||||
void set_lr_power(float lr_power);
|
||||
/// \brief Get lr_power.
|
||||
///
|
||||
/// \return lr_power.
|
||||
float get_lr_power() const;
|
||||
|
||||
/// \brief Set use_locking.
|
||||
void set_use_locking(bool use_locking);
|
||||
/// \brief Get use_locking.
|
||||
///
|
||||
/// \return use_locking.
|
||||
bool get_use_locking() const;
|
||||
};
|
||||
|
||||
abstract::AbstractBasePtr SparseApplyFtrlInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||
const std::vector<abstract::AbstractBasePtr> &input_args);
|
||||
} // namespace ops
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CORE_OPS_SPARSE_APPLY_FTRL_H_
|
|
@ -6700,7 +6700,7 @@ class SparseApplyFtrl(PrimitiveWithCheck):
|
|||
RuntimeError: If the data type of all of inputs except `indices` conversion of Parameter is not supported.
|
||||
|
||||
Supported Platforms:
|
||||
``Ascend`` ``GPU``
|
||||
``Ascend`` ``GPU`` ``CPU``
|
||||
|
||||
Examples:
|
||||
>>> class SparseApplyFtrlNet(nn.Cell):
|
||||
|
|
|
@ -49,6 +49,19 @@ class TestNet(nn.Cell):
|
|||
return out
|
||||
|
||||
|
||||
class SparseApplyFtrlNet(nn.Cell):
|
||||
def __init__(self, var, accum, linear, lr=0.001, l1=0.0, l2=0.0, lr_power=-0.5):
|
||||
super(SparseApplyFtrlNet, self).__init__()
|
||||
self.sparse_apply_ftrl = P.SparseApplyFtrl(lr=lr, l1=l1, l2=l2, lr_power=lr_power)
|
||||
self.var = Parameter(var, name="var")
|
||||
self.accum = Parameter(accum, name="accum")
|
||||
self.linear = Parameter(linear, name="linear")
|
||||
|
||||
def construct(self, grad, indices):
|
||||
out = self.sparse_apply_ftrl(self.var, self.accum, self.linear, grad, indices)
|
||||
return out
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
|
@ -340,3 +353,59 @@ def test_fused_sparse_ftrl_dynamic():
|
|||
[[-0.00598256, -0.00598256]],
|
||||
[[1., 1.]]]).astype(np.float32)
|
||||
assert np.allclose(net.var.data.asnumpy(), expect_var)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_sparse_apply_ftrl():
|
||||
"""
|
||||
Feature: SparseApplyFtrl
|
||||
Description: normal params, attr and input
|
||||
Expectation: the result meet expectation
|
||||
"""
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
|
||||
grad_np = np.ones([3, 3, 3])
|
||||
indice_np = [0, 1, 2]
|
||||
var_np = np.ones([3, 3, 3])
|
||||
accum_np = np.ones([3, 3, 3])
|
||||
linear_np = np.ones([3, 3, 3])
|
||||
|
||||
# test1: var/accum/linear/gradient are float32 and indices is int32.
|
||||
gradient = Tensor(grad_np, dtype=mstype.float32)
|
||||
indices = Tensor(indice_np, dtype=mstype.int32)
|
||||
var = Tensor(var_np, dtype=mstype.float32)
|
||||
accum = Tensor(accum_np, dtype=mstype.float32)
|
||||
linear = Tensor(linear_np, dtype=mstype.float32)
|
||||
sparse_apply_ftrl = SparseApplyFtrlNet(var, accum, linear)
|
||||
out = sparse_apply_ftrl(gradient, indices)
|
||||
expect_var = np.array([[[0.291479, 0.291479, 0.291479],
|
||||
[0.291479, 0.291479, 0.291479],
|
||||
[0.291479, 0.291479, 0.291479]],
|
||||
[[0.291479, 0.291479, 0.291479],
|
||||
[0.291479, 0.291479, 0.291479],
|
||||
[0.291479, 0.291479, 0.291479]],
|
||||
[[0.291479, 0.291479, 0.291479],
|
||||
[0.291479, 0.291479, 0.291479],
|
||||
[0.291479, 0.291479, 0.291479]]]).astype(np.float32)
|
||||
assert np.all(out[0].asnumpy() == expect_var)
|
||||
|
||||
# test2: var/accum/linear/gradient are float16 and indices is int32.
|
||||
gradient = Tensor(grad_np, dtype=mstype.float16)
|
||||
indices = Tensor(indice_np, dtype=mstype.int32)
|
||||
var = Tensor(var_np, dtype=mstype.float16)
|
||||
accum = Tensor(accum_np, dtype=mstype.float16)
|
||||
linear = Tensor(linear_np, dtype=mstype.float16)
|
||||
sparse_apply_ftrl = SparseApplyFtrlNet(var, accum, linear)
|
||||
out = sparse_apply_ftrl(gradient, indices)
|
||||
expect_var = np.array([[[0.2915, 0.2915, 0.2915],
|
||||
[0.2915, 0.2915, 0.2915],
|
||||
[0.2915, 0.2915, 0.2915]],
|
||||
[[0.2915, 0.2915, 0.2915],
|
||||
[0.2915, 0.2915, 0.2915],
|
||||
[0.2915, 0.2915, 0.2915]],
|
||||
[[0.2915, 0.2915, 0.2915],
|
||||
[0.2915, 0.2915, 0.2915],
|
||||
[0.2915, 0.2915, 0.2915]]]).astype(np.float16)
|
||||
assert np.all(out[0].asnumpy() == expect_var)
|
||||
|
|
|
@ -25,9 +25,9 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class SparseApplyFtrlCpuKernelTest : public UT::Common {
|
||||
class FusedSparseFtrlCpuKernelTest : public UT::Common {
|
||||
public:
|
||||
SparseApplyFtrlCpuKernelTest() : sparse_ftrl_(std::make_shared<SparseApplyFtrlCpuKernelMod>()) {}
|
||||
FusedSparseFtrlCpuKernelTest() : sparse_ftrl_(std::make_shared<FusedSparseFtrlCpuKernelMod>()) {}
|
||||
|
||||
void SetUp() override {
|
||||
sparse_ftrl_->lr_ = 0.001;
|
||||
|
@ -104,10 +104,13 @@ class SparseApplyFtrlCpuKernelTest : public UT::Common {
|
|||
std::vector<AddressPtr> outputs_;
|
||||
std::vector<KernelTensorPtr> kernel_tensor_inputs_;
|
||||
std::vector<KernelTensorPtr> kernel_tensor_outputs_;
|
||||
std::shared_ptr<SparseApplyFtrlCpuKernelMod> sparse_ftrl_;
|
||||
std::shared_ptr<FusedSparseFtrlCpuKernelMod> sparse_ftrl_;
|
||||
};
|
||||
|
||||
TEST_F(SparseApplyFtrlCpuKernelTest, dense_test) {
|
||||
/// Feature: FusedSparseFtrl
|
||||
/// Description: Run FusedSparseFtrl
|
||||
/// Expectation: pass
|
||||
TEST_F(FusedSparseFtrlCpuKernelTest, dense_test) {
|
||||
for (size_t i = 0; i < 3 * 3 * 3; ++i) {
|
||||
var_.push_back(1.0);
|
||||
accum_.push_back(1.0);
|
||||
|
@ -136,7 +139,10 @@ TEST_F(SparseApplyFtrlCpuKernelTest, dense_test) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(SparseApplyFtrlCpuKernelTest, sparse_test1) {
|
||||
/// Feature: FusedSparseFtrl
|
||||
/// Description: Run FusedSparseFtrl
|
||||
/// Expectation: pass
|
||||
TEST_F(FusedSparseFtrlCpuKernelTest, sparse_test1) {
|
||||
for (size_t i = 0; i < 3 * 3 * 3; ++i) {
|
||||
var_.push_back(1.0);
|
||||
accum_.push_back(1.0);
|
||||
|
@ -173,7 +179,10 @@ TEST_F(SparseApplyFtrlCpuKernelTest, sparse_test1) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(SparseApplyFtrlCpuKernelTest, sparse_test2) {
|
||||
/// Feature: FusedSparseFtrl
|
||||
/// Description: Run FusedSparseFtrl
|
||||
/// Expectation: pass
|
||||
TEST_F(FusedSparseFtrlCpuKernelTest, sparse_test2) {
|
||||
for (size_t i = 0; i < 3 * 3 * 3; ++i) {
|
||||
var_.push_back(1.0);
|
||||
accum_.push_back(1.0);
|
||||
|
|
Loading…
Reference in New Issue