!28894 add fc fusion with add

Merge pull request !28894 from wangyanling/fcaddfusion
2022-01-18 01:17:59 +00:00 · 2022-01-18 01:17:59 +00:00 · 72d0d70a6b
parent dd198f2a30 891d7f3b30
commit 72d0d70a6b
7 changed files with 261 additions and 21 deletions
--- a/mindspore/lite/tools/converter/anf_transform.cc
+++ b/mindspore/lite/tools/converter/anf_transform.cc
@ -59,6 +59,7 @@
 #include "tools/optimizer/fusion/scale_activation_fusion.h"
 #include "tools/optimizer/fusion/scale_scale_fusion.h"
 #include "tools/optimizer/fusion/fullconnected_fusion.h"
 #include "tools/optimizer/fusion/fullconnected_add_fusion.h"
 #include "tools/optimizer/fusion/add_concat_activation_fusion.h"
 #include "tools/optimizer/fusion/matmul_activation_fusion.h"
 #include "tools/optimizer/fusion/activation_fusion.h"
@ -217,6 +218,7 @@ int AnfTransform::RunFusionPass(const FuncGraphPtr &old_graph, const converter::
  fusion_pm->AddPass(std::make_shared<opt::ScaleActivationFusion>());
  fusion_pm->AddPass(std::make_shared<opt::ScaleScaleFusion>());
  fusion_pm->AddPass(std::make_shared<opt::FullConnectedFusion>());
  fusion_pm->AddPass(std::make_shared<opt::FullconnectedAddFusion>());
  fusion_pm->AddPass(std::make_shared<opt::TensorDotFusion>());
  fusion_pm->AddPass(std::make_shared<opt::MatMulActivationFusion>());
  optimizer->AddPassManager(fusion_pm);
--- a/mindspore/lite/tools/optimizer/common/gllo_utils.cc
+++ b/mindspore/lite/tools/optimizer/common/gllo_utils.cc
@ -1126,5 +1126,24 @@ int DetermineCertainVarInputHasInferred(const CNodePtr &cnode, size_t index, boo
  *infer_succ = infer_infos[item_index];
  return RET_OK;
 }
 bool CheckAndGetCnodeIndex(const CNodePtr &cnode, size_t *index, const PrimitivePtr &primitive_type) {
  MS_CHECK_TRUE_RET(cnode != nullptr, false);
  MS_CHECK_TRUE_RET(index != nullptr, false);
  if (cnode->size() != kInputSizeThree) {
    return false;
  }
  size_t dst_index = 0;
  for (size_t i = 1; i < cnode->size(); ++i) {
    if (CheckPrimitiveType(cnode->input(i), primitive_type)) {
      dst_index = i;
      break;
    }
  }
  if (dst_index == 0) {
    return false;
  }
  *index = dst_index;
  return true;
 }
 }  // namespace opt
 }  // namespace mindspore
--- a/mindspore/lite/tools/optimizer/common/gllo_utils.h
+++ b/mindspore/lite/tools/optimizer/common/gllo_utils.h
@ -141,6 +141,8 @@ std::pair<CNodePtr, int> GetRealCertainVarInput(const CNodePtr &cnode, size_t in
 int DetermineCertainVarInputHasInferred(const CNodePtr &cnode, size_t index, bool *infer_succ);
 bool CheckAndGetCnodeIndex(const CNodePtr &cnode, size_t *index, const PrimitivePtr &primitive_type);
 template <const PrimitivePtr *prim = nullptr>
 inline bool IsSpecifiedNode(const BaseRef &n) {
  if (utils::isa<AnfNodePtr>(n)) {
--- a/mindspore/lite/tools/optimizer/fusion/fullconnected_add_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/fullconnected_add_fusion.cc
@ -0,0 +1,197 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "tools/optimizer/fusion/fullconnected_add_fusion.h"
 #include <vector>
 #include <memory>
 #include "ops/fusion/add_fusion.h"
 #include "ops/fusion/full_connection.h"
 #include "tools/optimizer/common/gllo_utils.h"
 #include "nnacl/op_base.h"
 namespace mindspore {
 namespace opt {
 namespace {
 bool IsPrimitiveProper(const CNodePtr &add_cnode, const CNodePtr &fc_cnode, int index) {
  auto add_primc = GetValueNode<PrimitiveCPtr>(add_cnode->input(0));
  MS_CHECK_TRUE_RET(add_primc != nullptr, false);
  if (IsQuantParameterNode(add_primc)) {
    MS_LOG(INFO) << add_cnode->fullname_with_scope() << " is quant node";
    return false;
  }
  auto add_param_node = add_cnode->input(kInputSizeThree - index);
  if (!utils::isa<ValueNode>(add_param_node) &&
      (!utils::isa<Parameter>(add_param_node) || !add_param_node->cast<ParameterPtr>()->default_param())) {
    return false;
  }
  auto abstract = add_param_node->abstract();
  MS_CHECK_TRUE_RET(abstract != nullptr, false);
  std::vector<int64_t> bias_shape;
  if (FetchShapeFromAbstract(abstract, &bias_shape) != lite::RET_OK) {
    MS_LOG(ERROR) << "Fetch shape from abstract failed.";
    return false;
  }
  if (bias_shape.size() > DIMENSION_1D) {
    MS_LOG(INFO) << "only support bias with shape size of 1.";
    return false;
  }
  if (fc_cnode->size() > kInputSizeThree) {
    auto fc_bias_node = fc_cnode->input(kInputIndexThree);
    if (!utils::isa<ValueNode>(fc_bias_node) &&
        (!utils::isa<Parameter>(fc_bias_node) || !fc_bias_node->cast<ParameterPtr>()->default_param())) {
      MS_LOG(INFO) << fc_cnode->fullname_with_scope() << "'s bias is not parameter";
      return false;
    }
  }
  auto fc_primc = GetValueNode<std::shared_ptr<ops::FullConnection>>(fc_cnode->input(0));
  MS_CHECK_TRUE_RET(fc_primc != nullptr, false);
  if (fc_primc->GetAttr(ops::kActivationType) != nullptr &&
      fc_primc->get_activation_type() != ActivationType::NO_ACTIVATION) {
    MS_LOG(INFO) << fc_cnode->fullname_with_scope() << " has activation attr";
    return false;
  }
  if (IsQuantParameterNode(fc_primc)) {
    MS_LOG(INFO) << fc_cnode->fullname_with_scope() << "is quant node";
    return false;
  }
  return true;
 }
 int CalNewCnodeBias(const AnfNodePtr &add_weight_node, const CNodePtr &fc_cnode) {
  MS_CHECK_TRUE_RET(add_weight_node != nullptr, RET_ERROR);
  MS_CHECK_TRUE_RET(fc_cnode != nullptr, RET_ERROR);
  auto fc_bias_node = fc_cnode->input(kInputIndexThree);
  MS_CHECK_TRUE_RET(fc_bias_node != nullptr, RET_ERROR);
  std::shared_ptr<tensor::Tensor> fc_bias_tensor = GetTensorInfo(fc_bias_node);
  MS_CHECK_TRUE_RET(fc_bias_tensor != nullptr, RET_ERROR);
  if (fc_bias_tensor->data_type() != kNumberTypeFloat32) {
    MS_LOG(INFO) << "only support float32 data type";
    return RET_ERROR;
  }
  std::vector<int64_t> fc_bias_shape = fc_bias_tensor->shape();
  auto fc_bias_data = reinterpret_cast<float *>(fc_bias_tensor->data_c());
  MS_CHECK_TRUE_RET(fc_bias_data != nullptr, RET_ERROR);
  std::shared_ptr<tensor::Tensor> add_weight_tensor = GetTensorInfo(add_weight_node);
  MS_CHECK_TRUE_RET(add_weight_tensor != nullptr, RET_ERROR);
  if (add_weight_tensor->data_type() != kNumberTypeFloat32) {
    MS_LOG(INFO) << "only support float32 data type";
    return RET_ERROR;
  }
  std::vector<int64_t> add_weight_shape = add_weight_tensor->shape();
  MS_CHECK_TRUE_RET(fc_bias_shape == add_weight_shape, RET_ERROR);
  auto add_weight_data = reinterpret_cast<float *>(add_weight_tensor->data_c());
  MS_CHECK_TRUE_RET(add_weight_data != nullptr, RET_ERROR);
  for (int64_t i = 0; i < fc_bias_shape[0]; ++i) {
    fc_bias_data[i] += add_weight_data[i];
  }
  return RET_OK;
 }
 }  // namespace
 VectorRef FullconnectedAddFusion::DefineFcAddFusionPattern() const {
  auto is_fc1 = std::make_shared<CondVar>(IsSpecifiedNode<&prim::kPrimFullConnection>);
  MS_CHECK_TRUE_RET(is_fc1 != nullptr, {});
  auto is_add = std::make_shared<CondVar>(IsSpecifiedNode<&prim::kPrimAddFusion>);
  MS_CHECK_TRUE_RET(is_add != nullptr, {});
  auto is_seq_var = std::make_shared<SeqVar>();
  MS_CHECK_TRUE_RET(is_seq_var != nullptr, {});
  return VectorRef({is_add, is_fc1, is_seq_var});
 }
 VectorRef FullconnectedAddFusion::DefineFcBiasAddPattern() const {
  auto is_fc1 = std::make_shared<CondVar>(IsSpecifiedNode<&prim::kPrimFullConnection>);
  MS_CHECK_TRUE_RET(is_fc1 != nullptr, {});
  auto is_bias_add = std::make_shared<CondVar>(IsSpecifiedNode<&prim::kPrimBiasAdd>);
  MS_CHECK_TRUE_RET(is_bias_add != nullptr, {});
  auto is_seq_var = std::make_shared<SeqVar>();
  MS_CHECK_TRUE_RET(is_seq_var != nullptr, {});
  return VectorRef({is_bias_add, is_fc1, is_seq_var});
 }
 std::unordered_map<std::string, VectorRef> FullconnectedAddFusion::DefinePatterns() const {
  std::unordered_map<std::string, VectorRef> patterns;
  patterns["FcAddFusionPatternName"] = DefineFcAddFusionPattern();
  patterns["FcBiasAddPatternName"] = DefineFcBiasAddPattern();
  return patterns;
 }
 AnfNodePtr FullconnectedAddFusion::Process(const std::string &pattern_name, const FuncGraphPtr &func_graph,
                                           const AnfNodePtr &node, const EquivPtr &equiv) const {
  if (func_graph == nullptr || node == nullptr) {
    lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_NULL_PTR);
    return nullptr;
  }
  auto add_cnode = node->cast<CNodePtr>();
  MS_CHECK_TRUE_RET(add_cnode != nullptr, nullptr);
  if (IsMarkedTrainOp(add_cnode)) {
    return nullptr;
  }
  if (!CheckPrimitiveType(node, prim::kPrimAddFusion) && !CheckPrimitiveType(node, prim::kPrimBiasAdd)) {
    return nullptr;
  }
  size_t index = 0;
  if (!CheckAndGetCnodeIndex(add_cnode, &index, prim::kPrimFullConnection)) {
    return nullptr;
  }
  auto fc_cnode = add_cnode->input(index)->cast<CNodePtr>();
  MS_ASSERT(fc_cnode != nullptr);
  if (IsMarkedTrainOp(fc_cnode)) {
    return nullptr;
  }
  if (IsMultiOutputTensors(func_graph, fc_cnode)) {
    return nullptr;
  }
  if (!IsPrimitiveProper(add_cnode, fc_cnode, index)) {
    return nullptr;
  }
  auto manager = func_graph->manager();
  auto add_param_node = add_cnode->input(kInputSizeThree - index);
  MS_CHECK_TRUE_RET(manager != nullptr, nullptr);
  if (fc_cnode->size() == kInputSizeThree) {
    manager->AddEdge(fc_cnode, add_param_node);
  } else if (fc_cnode->size() == kInputSizeFour) {
    if (CalNewCnodeBias(add_param_node, fc_cnode) != RET_OK) {
      MS_LOG(INFO) << add_cnode->fullname_with_scope() << " failed to fusion with " << fc_cnode->fullname_with_scope();
      return nullptr;
    }
  }
  if (CheckPrimitiveType(node, prim::kPrimAddFusion)) {
    auto add_primc = GetValueNode<std::shared_ptr<ops::AddFusion>>(add_cnode->input(0));
    MS_CHECK_TRUE_RET(add_primc != nullptr, nullptr);
    if (add_primc->GetAttr(ops::kActivationType) != nullptr &&
        add_primc->get_activation_type() != ActivationType::NO_ACTIVATION) {
      auto fc_primc = GetValueNode<std::shared_ptr<ops::FullConnection>>(fc_cnode->input(0));
      MS_CHECK_TRUE_RET(fc_primc != nullptr, nullptr);
      fc_primc->set_activation_type(add_primc->get_activation_type());
    }
  }
  fc_cnode->set_fullname_with_scope(node->fullname_with_scope());
  (void)manager->Replace(node, fc_cnode);
  return nullptr;
 }
 }  // namespace opt
 }  // namespace mindspore
--- a/mindspore/lite/tools/optimizer/fusion/fullconnected_add_fusion.h
+++ b/mindspore/lite/tools/optimizer/fusion/fullconnected_add_fusion.h
@ -0,0 +1,40 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_TOOLS_OPTIMIZER_FUSION_FULLCONNECTED_ADD_FUSION_H_
 #define MINDSPORE_LITE_TOOLS_OPTIMIZER_FUSION_FULLCONNECTED_ADD_FUSION_H_
 #include <string>
 #include <unordered_map>
 #include "backend/optimizer/common/optimizer.h"
 #include "tools/optimizer/common/multiple_pattern_process_pass.h"
 namespace mindspore::opt {
 class FullconnectedAddFusion : public MultiplePatternProcessPass {
 public:
  explicit FullconnectedAddFusion(const std::string &name = "FullconnectedAddFusion", bool multigraph = true)
      : MultiplePatternProcessPass(name, multigraph) {}
  ~FullconnectedAddFusion() override = default;
 private:
  std::unordered_map<std::string, VectorRef> DefinePatterns() const override;
  VectorRef DefineFcAddFusionPattern() const;
  VectorRef DefineFcBiasAddPattern() const;
  AnfNodePtr Process(const std::string &pattern_name, const FuncGraphPtr &func_graph, const AnfNodePtr &,
                     const EquivPtr &) const override;
 };
 }  // namespace mindspore::opt
 #endif  // MINDSPORE_LITE_TOOLS_OPTIMIZER_FUSION_FULLCONNECTED_ADD_FUSION_H_
--- a/mindspore/lite/tools/optimizer/fusion/fullconnected_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/fullconnected_fusion.cc
@ -19,7 +19,6 @@
 #include <vector>
 #include "tools/common/tensor_util.h"
 #include "ops/fusion/full_connection.h"
 #include "ops/fusion/conv2d_fusion.h"
 #include "tools/optimizer/common/gllo_utils.h"
 #include "tools/converter/quant_param_holder.h"
 #include "nnacl/op_base.h"
--- a/mindspore/lite/tools/optimizer/fusion/matmul_add_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/matmul_add_fusion.cc
@ -25,25 +25,6 @@
 namespace mindspore {
 namespace opt {
 namespace {
 bool CheckAndGetMatMulIndex(const CNodePtr &cnode, size_t *index) {
  MS_ASSERT(cnode != nullptr && index != nullptr);
  if (cnode->size() != kInputSizeThree) {
    return false;
  }
  size_t matmul_index = 0;
  for (size_t i = 1; i < cnode->size(); ++i) {
    if (CheckPrimitiveType(cnode->input(i), prim::kPrimMatMulFusion)) {
      matmul_index = i;
      break;
    }
  }
  if (matmul_index == 0) {
    return false;
  }
  *index = matmul_index;
  return true;
 }
 bool IsPrimitiveProper(const CNodePtr &add_cnode, const CNodePtr &matmul_cnode, int index) {
  auto add_primc = GetValueNode<PrimitiveCPtr>(add_cnode->input(0));
  MS_CHECK_TRUE_RET(add_primc != nullptr, false);
@ -169,7 +150,7 @@ AnfNodePtr MatMulAddFusion::Process(const std::string &pattern_name, const FuncG
  }
  size_t index = 0;
-  if (!CheckAndGetMatMulIndex(add_cnode, &index)) {
+  if (!CheckAndGetCnodeIndex(add_cnode, &index, prim::kPrimMatMulFusion)) {
    return nullptr;
  }
  auto matmul_cnode = add_cnode->input(index)->cast<CNodePtr>();