From f0db19f1eec71328483e8d2b0f3e73482bae82aa Mon Sep 17 00:00:00 2001
From: greatpanc <changpan_yin@163.com>
Date: Fri, 30 Sep 2022 09:55:17 +0800
Subject: [PATCH] fix mul_add_fusion whether InferDone

---
 .../cpu/kernel/nnacl/infer/matmul_infer.c     |  3 +-
 .../cpu/kernel/nnacl/infer/transpose_infer.c  | 15 +++--
 .../device/cpu/kernel/nnacl/transpose.h       |  2 +
 .../delegate/npu/npu_converter_utils.cc       |  4 +-
 .../runtime/delegate/npu/op/activation_npu.cc |  5 +-
 .../runtime/delegate/npu/op/arithmetic_npu.cc | 61 ++++++++++++++-----
 .../runtime/delegate/npu/op/arithmetic_npu.h  |  4 +-
 .../src/runtime/delegate/npu/op/concat_npu.cc |  4 +-
 .../src/runtime/delegate/npu/op/concat_npu.h  |  4 +-
 .../lite/src/runtime/delegate/npu/op/npu_op.h |  4 +-
 .../src/runtime/delegate/npu/op/pad_npu.cc    |  4 +-
 .../src/runtime/delegate/npu/op/pad_npu.h     |  4 +-
 .../src/runtime/delegate/npu/op/reduce_npu.cc |  4 +-
 .../src/runtime/delegate/npu/op/reduce_npu.h  |  4 +-
 .../runtime/delegate/npu/op/softmax_npu.cc    |  4 +-
 .../src/runtime/delegate/npu/op/softmax_npu.h |  4 +-
 .../src/runtime/delegate/npu/op/split_npu.cc  |  4 +-
 .../src/runtime/delegate/npu/op/split_npu.h   |  4 +-
 .../delegate/npu/op/strided_slice_npu.cc      |  4 +-
 .../delegate/npu/op/strided_slice_npu.h       |  4 +-
 .../delegate/npu/pass/npu_fusion_pass.cc      |  7 ++-
 .../parser/onnx/onnx_matmul_parser.cc         |  4 +-
 .../tools/optimizer/fusion/mul_add_fusion.cc  | 17 +++++-
 .../tools/optimizer/fusion/mul_add_fusion.h   |  3 +-
 .../graph/decrease_transpose_algo.cc          |  4 +-
 25 files changed, 119 insertions(+), 62 deletions(-)

diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/matmul_infer.c b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/matmul_infer.c
index 85ae3e39275..92e1de2bbbd 100644
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/matmul_infer.c
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/matmul_infer.c
@@ -143,7 +143,8 @@ int MatmulInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
   for (int i = 0; i < diff; ++i) {
     ShapeInsert(in->shape_, &in->shape_size_, 0, 1);
   }
-  SetDataTypeFormat(output, input0);
+  TensorC *input = input1->data_ == NULL ? input1 : input0;  // transfer the input which comes from the other node.
+  SetDataTypeFormat(output, input);
   if (parameter->quant_type_ == QuantType_QUANT_DYNAMIC || parameter->quant_type_ == QuantType_QUANT_WEIGHT) {
     output->data_type_ = kNumberTypeFloat32;
   }
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/transpose_infer.c b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/transpose_infer.c
index 0778aa56aeb..2ce3fb75532 100644
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/transpose_infer.c
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/transpose_infer.c
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021 Huawei Technologies Co., Ltd
+ * Copyright 2021-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -66,21 +66,17 @@ int TransposeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
   const TensorC *perm_tensor = inputs[1];
   const int32_t *perm_data = (int32_t *)perm_tensor->data_;
   const int perms_num = perm_tensor->shape_[0];
-  if (perm_tensor->shape_size_ == 0) {
-    return NNACL_INFER_INVALID;
-  }
+  MS_CHECK_TRUE_RET(perm_tensor->shape_size_ != 0, NNACL_INFER_INVALID);
   if (perms_num != 0 && perm_data == NULL) {
     return NNACL_INFER_INVALID;
   }
   int perm[MAX_TRANSPOSE_DIM_SIZE] = {0};
   size_t perm_size = 0;
   for (int i = 0; i < perms_num; i++) {
-    if (perm_data[i] >= perms_num) {
-      return NNACL_ERR;
-    }
+    MS_CHECK_TRUE_RET(perm_data[i] < perms_num, NNACL_ERR);
     ShapePush(perm, &perm_size, perm_data[i]);
   }
-  if (perms_num == 4) {
+  if (perms_num == PERM_NUM_FOUR) {
     const int nchw2nhwc[4] = {0, 2, 3, 1};
     const int nhwc2nchw[4] = {0, 3, 1, 2};
     const int trans3d[3] = {0, 2, 1};
@@ -95,6 +91,9 @@ int TransposeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
       ShapeSet(perm, &perm_size, trans3d, 3);
     }
   }
+  if (perms_num == PERM_NUM_THREE && perm[0] == 0 && perm[1] == 2) {
+    output->format_ = input->format_ == Format_NCHW ? Format_NHWC : Format_NCHW;
+  }
   if (parameter->quant_type_ == QuantType_QUANT_WEIGHT) {
     output->data_type_ = kNumberTypeFloat32;
   }
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/transpose.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/transpose.h
index cbc0f96c8d4..8c94102deed 100644
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/transpose.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/transpose.h
@@ -22,6 +22,8 @@
 // MAX_TRANSPOSE_SERIAL_SIZE = 64 * 3 * 512 * 512
 #define MAX_TRANSPOSE_SERIAL_SIZE 50331648
 #define MAX_TRANSPOSE_DIM_SIZE 20
+#define PERM_NUM_THREE 3
+#define PERM_NUM_FOUR 4
 
 typedef struct TransposeParameter {
   // primitive parameter
diff --git a/mindspore/lite/src/runtime/delegate/npu/npu_converter_utils.cc b/mindspore/lite/src/runtime/delegate/npu/npu_converter_utils.cc
index 6139666ca7e..90b18bc1283 100644
--- a/mindspore/lite/src/runtime/delegate/npu/npu_converter_utils.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/npu_converter_utils.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -250,6 +250,8 @@ int ConverterToNPUActivationMode(schema::ActivationType type) {
       return RELU6;
     case schema::ActivationType_ELU:
       return ELU;
+    case schema::ActivationType_GELU:
+      return GELU;
     default:
       return ACTIVATION_INVALID;
   }
diff --git a/mindspore/lite/src/runtime/delegate/npu/op/activation_npu.cc b/mindspore/lite/src/runtime/delegate/npu/op/activation_npu.cc
index e7b05b9507f..1b1caadc272 100644
--- a/mindspore/lite/src/runtime/delegate/npu/op/activation_npu.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/op/activation_npu.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,7 +28,8 @@ int ActivationNPUOp::IsSupport(const schema::Primitive *primitive, const std::ve
   if (act_type_ != schema::ActivationType_RELU && act_type_ != schema::ActivationType_RELU6 &&
       act_type_ != schema::ActivationType_SIGMOID && act_type_ != schema::ActivationType_TANH &&
       act_type_ != schema::ActivationType_HSIGMOID && act_type_ != schema::ActivationType_LEAKY_RELU &&
-      act_type_ != schema::ActivationType_SWISH && act_type_ != schema::ActivationType_ELU) {
+      act_type_ != schema::ActivationType_SWISH && act_type_ != schema::ActivationType_ELU &&
+      act_type_ != schema::ActivationType_GELU) {
     MS_LOG(WARNING) << "Unsupported activation type for activation op " << name_ << "when running npu";
     return RET_NOT_SUPPORT;
   }
diff --git a/mindspore/lite/src/runtime/delegate/npu/op/arithmetic_npu.cc b/mindspore/lite/src/runtime/delegate/npu/op/arithmetic_npu.cc
index 2f9661ff777..fb8c9f21c44 100644
--- a/mindspore/lite/src/runtime/delegate/npu/op/arithmetic_npu.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/op/arithmetic_npu.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,9 @@
 
 #include "src/runtime/delegate/npu/op/arithmetic_npu.h"
 #include "src/runtime/delegate/npu/npu_converter_utils.h"
+#include "src/runtime/delegate/delegate_utils.h"
+#include "src/runtime/delegate/npu/transpose_kernel.h"
+
 namespace mindspore {
 constexpr int ARITHMETIC_INPUT_NUM = 2;
 constexpr int MAX_HW_SIZE = 1664;
@@ -24,20 +27,24 @@ int ArithmeticNPUOp::IsSupport(const schema::Primitive *primitive, const std::ve
   auto in_shape_0 = in_tensors[0].Shape();
   auto in_shape_1 = in_tensors[1].Shape();
   auto out_shape = out_tensors[0].Shape();
-  if (in_shape_0.size() != 0 && in_shape_1.size() != 0 && in_shape_0.size() != in_shape_1.size()) {
-    MS_LOG(WARNING) << name_ << " for the two inputs, the dimension size must be same. size 1 is: " << in_shape_0.size()
-                    << " size 2 is: " << in_shape_1.size();
-    return RET_NOT_SUPPORT;
-  }
   // a hidden limitation in npu bottom implementation
-  if (type_ == schema::PrimitiveType_MulFusion && out_shape.size() == NPU_SHAPE_SIZE) {
-    bool is_nhwc = out_tensors[0].format() == Format::NHWC;
-    auto out_h = is_nhwc ? out_shape.at(NHWC_H) : out_shape.at(NCHW_H);
-    auto out_w = is_nhwc ? out_shape.at(NHWC_W) : out_shape.at(NCHW_W);
-    // two inputs have different shape with the output, which means both of them need broadcast
-    if (in_shape_0 != out_shape && in_shape_1 != out_shape && out_h * out_w > MAX_HW_SIZE) {
-      MS_LOG(WARNING) << "The size of out_height * out_width is larger than the max value (1664) that npu supports "
-                         "during broadcasting.";
+  if (type_ == schema::PrimitiveType_MulFusion) {
+    if (out_shape.size() == NPU_SHAPE_SIZE) {
+      bool is_nhwc = out_tensors[0].format() == Format::NHWC;
+      auto out_h = is_nhwc ? out_shape.at(NHWC_H) : out_shape.at(NCHW_H);
+      auto out_w = is_nhwc ? out_shape.at(NHWC_W) : out_shape.at(NCHW_W);
+      // two inputs have different shape with the output, which means both of them need broadcast
+      if (in_shape_0 != out_shape && in_shape_1 != out_shape && out_h * out_w > MAX_HW_SIZE) {
+        MS_LOG(WARNING) << "The size of out_height * out_width is larger than the max value (1664) that npu supports "
+                           "during broadcasting.";
+        return RET_NOT_SUPPORT;
+      }
+    }
+  } else {
+    if (in_shape_0.size() != 0 && in_shape_1.size() != 0 && in_shape_0.size() != in_shape_1.size()) {
+      MS_LOG(WARNING) << name_
+                      << " for the two inputs, the dimension size must be same. size 1 is: " << in_shape_0.size()
+                      << " size 2 is: " << in_shape_1.size();
       return RET_NOT_SUPPORT;
     }
   }
@@ -240,6 +247,32 @@ ge::Operator *ArithmeticNPUOp::GetNPUOp() {
   return act_;
 }
 
+int ArithmeticNPUOp::HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) {
+  for (size_t i = 0; i < inputs_.size(); i++) {
+    auto in_tensor = inputs_.at(i);
+    if (!in_tensor.IsConst() || in_tensor.Shape().size() != DIMENSION_4D) {
+      continue;
+    }
+    auto shape = in_tensor.Shape();
+    auto new_shape = {in_tensor.Shape().at(NHWC_N), in_tensor.Shape().at(NHWC_C), in_tensor.Shape().at(NHWC_H),
+                      in_tensor.Shape().at(NHWC_W)};
+    auto nh2nc_tensor =
+      mindspore::MSTensor::CreateTensor(in_tensor.Name() + "_nh2nc", in_tensor.DataType(), new_shape, nullptr, 0);
+    if (nh2nc_tensor == nullptr) {
+      MS_LOG(ERROR) << "New nchw tensor failed when inserting nchw2nhwc op.";
+      return RET_ERROR;
+    }
+    auto dst_data = nh2nc_tensor->MutableData();
+    MS_CHECK_TRUE_RET(dst_data != nullptr, RET_ERROR);
+    // transpose dst_data to nchw.
+    PackNHWCToNCHWFp32(in_tensor.MutableData(), dst_data, shape[NHWC_N], shape[NHWC_H] * shape[NHWC_W], shape[NHWC_C]);
+    nh2nc_tensor->SetFormat(NCHW);
+    inputs_.at(i) = *nh2nc_tensor;
+    all_tensors->push_back(nh2nc_tensor);
+  }
+  return RET_OK;
+}
+
 ArithmeticNPUOp::~ArithmeticNPUOp() {
   if (op_ != nullptr) {
     delete op_;
diff --git a/mindspore/lite/src/runtime/delegate/npu/op/arithmetic_npu.h b/mindspore/lite/src/runtime/delegate/npu/op/arithmetic_npu.h
index f546e724b3e..8333231d3c6 100644
--- a/mindspore/lite/src/runtime/delegate/npu/op/arithmetic_npu.h
+++ b/mindspore/lite/src/runtime/delegate/npu/op/arithmetic_npu.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -47,6 +47,8 @@ class ArithmeticNPUOp : public NPUOp {
 
   ge::Operator *GetNPUOp() override;
 
+  int HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) override;
+
  private:
   int SetActivation();
   schema::ActivationType act_type_ = schema::ActivationType_NO_ACTIVATION;
diff --git a/mindspore/lite/src/runtime/delegate/npu/op/concat_npu.cc b/mindspore/lite/src/runtime/delegate/npu/op/concat_npu.cc
index 1ffa4739f2a..af4e02e1acb 100644
--- a/mindspore/lite/src/runtime/delegate/npu/op/concat_npu.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/op/concat_npu.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -57,7 +57,7 @@ int ConcatNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors
 
 ge::Operator *ConcatNPUOp::GetNPUOp() { return this->concat_; }
 
-int ConcatNPUOp::HandleAxis() {
+int ConcatNPUOp::HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) {
   axis_ = TransFormAxis(axis_);
   if (axis_ == NCHW_INVALID) {
     MS_LOG(ERROR) << "Transform axis for concat op failed.";
diff --git a/mindspore/lite/src/runtime/delegate/npu/op/concat_npu.h b/mindspore/lite/src/runtime/delegate/npu/op/concat_npu.h
index e92d6bc65ba..c1c26fe3fc9 100644
--- a/mindspore/lite/src/runtime/delegate/npu/op/concat_npu.h
+++ b/mindspore/lite/src/runtime/delegate/npu/op/concat_npu.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -45,7 +45,7 @@ class ConcatNPUOp : public NPUOp {
 
   ge::Operator *GetNPUOp() override;
 
-  int HandleAxis() override;
+  int HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) override;
 
  private:
   hiai::op::ConcatD *concat_ = nullptr;
diff --git a/mindspore/lite/src/runtime/delegate/npu/op/npu_op.h b/mindspore/lite/src/runtime/delegate/npu/op/npu_op.h
index 0576f539fce..ccd23ccb16e 100644
--- a/mindspore/lite/src/runtime/delegate/npu/op/npu_op.h
+++ b/mindspore/lite/src/runtime/delegate/npu/op/npu_op.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -77,7 +77,7 @@ class NPUOp {
 
   virtual ge::Operator *GetNPUOp() { return nullptr; }
 
-  virtual int HandleAxis() { return RET_OK; }
+  virtual int HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) { return RET_OK; }
 
   void set_inputs(const std::vector<mindspore::MSTensor> &in_tensors) { this->inputs_ = in_tensors; }
 
diff --git a/mindspore/lite/src/runtime/delegate/npu/op/pad_npu.cc b/mindspore/lite/src/runtime/delegate/npu/op/pad_npu.cc
index d068c030548..9411e0f7af8 100644
--- a/mindspore/lite/src/runtime/delegate/npu/op/pad_npu.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/op/pad_npu.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -116,7 +116,7 @@ int PadNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
 
 ge::Operator *PadNPUOp::GetNPUOp() { return this->pad_; }
 
-int PadNPUOp::HandleAxis() {
+int PadNPUOp::HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) {
   if (paddings_vec_.size() != PAD_SIZE) {
     return RET_ERROR;
   }
diff --git a/mindspore/lite/src/runtime/delegate/npu/op/pad_npu.h b/mindspore/lite/src/runtime/delegate/npu/op/pad_npu.h
index 3ef2c2556c2..b0e3e518a30 100644
--- a/mindspore/lite/src/runtime/delegate/npu/op/pad_npu.h
+++ b/mindspore/lite/src/runtime/delegate/npu/op/pad_npu.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -42,7 +42,7 @@ class PadNPUOp : public NPUOp {
 
   ge::Operator *GetNPUOp() override;
 
-  int HandleAxis() override;
+  int HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) override;
 
  private:
   hiai::op::PadV2 *pad_ = nullptr;
diff --git a/mindspore/lite/src/runtime/delegate/npu/op/reduce_npu.cc b/mindspore/lite/src/runtime/delegate/npu/op/reduce_npu.cc
index 090297bbac3..02e9b9a27d6 100644
--- a/mindspore/lite/src/runtime/delegate/npu/op/reduce_npu.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/op/reduce_npu.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -88,7 +88,7 @@ int ReduceNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors
 
 ge::Operator *ReduceNPUOp::GetNPUOp() { return this->reduce_; }
 
-int ReduceNPUOp::HandleAxis() {
+int ReduceNPUOp::HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) {
   auto reduce_axes = inputs_.at(1);
   int num_axes = reduce_axes.Shape().at(0);
   MS_CHECK_TRUE_RET(reduce_axes.MutableData() != nullptr, RET_ERROR);
diff --git a/mindspore/lite/src/runtime/delegate/npu/op/reduce_npu.h b/mindspore/lite/src/runtime/delegate/npu/op/reduce_npu.h
index 6732e9d9f72..a7d0752b4e7 100644
--- a/mindspore/lite/src/runtime/delegate/npu/op/reduce_npu.h
+++ b/mindspore/lite/src/runtime/delegate/npu/op/reduce_npu.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -42,7 +42,7 @@ class ReduceNPUOp : public NPUOp {
 
   ge::Operator *GetNPUOp() override;
 
-  int HandleAxis() override;
+  int HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) override;
 
  private:
   schema::ReduceMode reduce_mode_ = schema::ReduceMode_ReduceMean;
diff --git a/mindspore/lite/src/runtime/delegate/npu/op/softmax_npu.cc b/mindspore/lite/src/runtime/delegate/npu/op/softmax_npu.cc
index 77279f25b2b..e919d92cfef 100644
--- a/mindspore/lite/src/runtime/delegate/npu/op/softmax_npu.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/op/softmax_npu.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -47,7 +47,7 @@ int SoftmaxNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensor
 
 ge::Operator *SoftmaxNPUOp::GetNPUOp() { return this->softmax_; }
 
-int SoftmaxNPUOp::HandleAxis() {
+int SoftmaxNPUOp::HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) {
   axis_ = TransFormAxis(axis_);
   if (axis_ == NCHW_INVALID) {
     MS_LOG(ERROR) << "Transform axis for Softmax op failed.";
diff --git a/mindspore/lite/src/runtime/delegate/npu/op/softmax_npu.h b/mindspore/lite/src/runtime/delegate/npu/op/softmax_npu.h
index ca06dd86381..c8310dad2a2 100644
--- a/mindspore/lite/src/runtime/delegate/npu/op/softmax_npu.h
+++ b/mindspore/lite/src/runtime/delegate/npu/op/softmax_npu.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -42,7 +42,7 @@ class SoftmaxNPUOp : public NPUOp {
                    const std::vector<mindspore::MSTensor> &out_tensors,
                    const std::vector<ge::Operator *> &npu_inputs) override;
 
-  int HandleAxis() override;
+  int HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) override;
 
   ge::Operator *GetNPUOp() override;
 
diff --git a/mindspore/lite/src/runtime/delegate/npu/op/split_npu.cc b/mindspore/lite/src/runtime/delegate/npu/op/split_npu.cc
index c080ccf760e..6865a28aa83 100644
--- a/mindspore/lite/src/runtime/delegate/npu/op/split_npu.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/op/split_npu.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -86,7 +86,7 @@ int SplitNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
 
 ge::Operator *SplitNPUOp::GetNPUOp() { return this->split_; }
 
-int SplitNPUOp::HandleAxis() {
+int SplitNPUOp::HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) {
   axis_ = TransFormAxis(axis_);
   if (axis_ == NCHW_INVALID) {
     MS_LOG(ERROR) << "Transform axis for split op failed.";
diff --git a/mindspore/lite/src/runtime/delegate/npu/op/split_npu.h b/mindspore/lite/src/runtime/delegate/npu/op/split_npu.h
index 53422943811..5fdf3afc308 100644
--- a/mindspore/lite/src/runtime/delegate/npu/op/split_npu.h
+++ b/mindspore/lite/src/runtime/delegate/npu/op/split_npu.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -42,7 +42,7 @@ class SplitNPUOp : public NPUOp {
                    const std::vector<mindspore::MSTensor> &out_tensors,
                    const std::vector<ge::Operator *> &npu_inputs) override;
 
-  int HandleAxis() override;
+  int HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) override;
 
   ge::Operator *GetNPUOp() override;
 
diff --git a/mindspore/lite/src/runtime/delegate/npu/op/strided_slice_npu.cc b/mindspore/lite/src/runtime/delegate/npu/op/strided_slice_npu.cc
index 201420d8e3f..8101738bd2a 100644
--- a/mindspore/lite/src/runtime/delegate/npu/op/strided_slice_npu.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/op/strided_slice_npu.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -110,7 +110,7 @@ ge::Operator *StridedSliceNPUOp::GetNPUOp() {
   }
 }
 
-int StridedSliceNPUOp::HandleAxis() {
+int StridedSliceNPUOp::HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) {
   if (inputs_.size() < MIN_INPUT_SIZE) {
     MS_LOG(ERROR) << "StridedSlice in tensors size < " << MIN_INPUT_SIZE;
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/delegate/npu/op/strided_slice_npu.h b/mindspore/lite/src/runtime/delegate/npu/op/strided_slice_npu.h
index 695fbc0edda..a74ced16d04 100644
--- a/mindspore/lite/src/runtime/delegate/npu/op/strided_slice_npu.h
+++ b/mindspore/lite/src/runtime/delegate/npu/op/strided_slice_npu.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -49,7 +49,7 @@ class StridedSliceNPUOp : public NPUOp {
 
   ge::Operator *GetNPUOp() override;
 
-  int HandleAxis() override;
+  int HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) override;
 
  private:
   hiai::op::StridedSlice *strided_slice_ = nullptr;
diff --git a/mindspore/lite/src/runtime/delegate/npu/pass/npu_fusion_pass.cc b/mindspore/lite/src/runtime/delegate/npu/pass/npu_fusion_pass.cc
index fb958553d5b..3deb46ddc94 100644
--- a/mindspore/lite/src/runtime/delegate/npu/pass/npu_fusion_pass.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/pass/npu_fusion_pass.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021 Huawei Technologies Co., Ltd
+ * Copyright 2021-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -248,9 +248,10 @@ int NPUFusionPass::CommonFusion(NPUOp *cur_op) {
     MS_LOG(ERROR) << "UpdateOp failed.";
     return RET_ERROR;
   }
-  ret = cur_op->HandleAxis();
+  auto all_tensors = subgraph_->GetInsertTensors();
+  ret = cur_op->HandleAxisAndConstantInputs(all_tensors);
   if (ret != RET_OK) {
-    MS_LOG(ERROR) << "HandleAxis failed.";
+    MS_LOG(ERROR) << "HandleAxisAndConstantInputs failed.";
     return ret;
   }
   return RET_OK;
diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_matmul_parser.cc b/mindspore/lite/tools/converter/parser/onnx/onnx_matmul_parser.cc
index 37aa14142ac..95206ac1845 100644
--- a/mindspore/lite/tools/converter/parser/onnx/onnx_matmul_parser.cc
+++ b/mindspore/lite/tools/converter/parser/onnx/onnx_matmul_parser.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -40,7 +40,7 @@ PrimitiveCPtr OnnxMatmulParser::Parse(const onnx::GraphProto &onnx_graph, const
     }
   }
   if (!FloatCompare(alpha, 1.0f) || (!FloatCompare(beta, 1.0f) && !(onnx_node.input().size() == 2 &&
-                                                                    !FloatCompare(beta)))) {  // 2: input num is A and B
+                                                                    FloatCompare(beta)))) {  // 2: input num is A and B
     MS_LOG(ERROR) << "not support alpha * A * B + beta * C";
     return nullptr;
   }
diff --git a/mindspore/lite/tools/optimizer/fusion/mul_add_fusion.cc b/mindspore/lite/tools/optimizer/fusion/mul_add_fusion.cc
index efb858b43a7..b0b5ed1004d 100644
--- a/mindspore/lite/tools/optimizer/fusion/mul_add_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/mul_add_fusion.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021 Huawei Technologies Co., Ltd
+ * Copyright 2021-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -189,6 +189,15 @@ bool MulAddFusion::ScaleInputShapeValid(size_t *axis_offset) const {
   return true;
 }
 
+bool MulAddFusion::MulInputAnodeIsInferred(const AnfNodePtr &mul_input_anode) const {
+  auto mul_input_cnode = mul_input_anode->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(mul_input_cnode);
+  auto prim = GetValueNode<PrimitivePtr>(mul_input_cnode->input(0));
+  MS_CHECK_TRUE_RET(prim != nullptr, false);
+  auto is_inferred = prim->GetAttr(kInferDone) != nullptr && GetValue<bool>(prim->GetAttr(kInferDone));
+  return is_inferred;
+}
+
 AnfNodePtr MulAddFusion::Process(const std::string &pattern_name, const mindspore::FuncGraphPtr &func_graph,
                                  const mindspore::AnfNodePtr &node, const mindspore::EquivPtr &equiv) const {
   if (func_graph == nullptr || node == nullptr) {
@@ -232,6 +241,12 @@ AnfNodePtr MulAddFusion::Process(const std::string &pattern_name, const mindspor
   }
 
   MS_CHECK_TRUE_RET(mul_input_anode != nullptr, nullptr);
+  if (mul_input_anode->isa<CNode>()) {
+    if (!MulInputAnodeIsInferred(mul_input_anode)) {
+      MS_LOG(DEBUG) << "mul_input_anode is not inferred, don't perform the ScaleInputShapeValid method.";
+      return nullptr;
+    }
+  }
   if (FetchShapeFromAbstract(mul_input_anode->abstract(), &mul_input_shape_) != lite::RET_OK) {
     return nullptr;
   }
diff --git a/mindspore/lite/tools/optimizer/fusion/mul_add_fusion.h b/mindspore/lite/tools/optimizer/fusion/mul_add_fusion.h
index 18e6803807c..d9ea52dd246 100644
--- a/mindspore/lite/tools/optimizer/fusion/mul_add_fusion.h
+++ b/mindspore/lite/tools/optimizer/fusion/mul_add_fusion.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021 Huawei Technologies Co., Ltd
+ * Copyright 2021-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -45,6 +45,7 @@ class MulAddFusion : public MultiplePatternProcessPass {
   bool CheckAddNode(const mindspore::CNodePtr &cnode) const;
   bool CheckMulNode(const mindspore::FuncGraphPtr &func_graph, const mindspore::CNodePtr &cnode) const;
   bool ScaleInputShapeValid(size_t *axis_offset) const;
+  bool MulInputAnodeIsInferred(const AnfNodePtr &mul_input_anode) const;
   bool AdjustScaleBiasTensorShape(size_t *axis_offset) const;
 
  private:
diff --git a/mindspore/lite/tools/optimizer/graph/decrease_transpose_algo.cc b/mindspore/lite/tools/optimizer/graph/decrease_transpose_algo.cc
index d6e00600ce7..25d10364a87 100644
--- a/mindspore/lite/tools/optimizer/graph/decrease_transpose_algo.cc
+++ b/mindspore/lite/tools/optimizer/graph/decrease_transpose_algo.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2021 Huawei Technologies Co., Ltd
+ * Copyright 2021-2022 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -332,7 +332,7 @@ STATUS DecreaseTransposeAlgo::DoPreInsert(const FuncGraphPtr &func_graph, const
   auto HandleFunc = [this, &shape](const FuncGraphPtr &func_graph, const CNodePtr &cnode, size_t index,
                                    FormatTransNodeType trans_type) -> STATUS {
     auto before_perm = trans_type == kNHWC2NCHW ? kNH2NC : kNC2NH;
-    if (shape.size() == kInputSizeFour && !cnode->input(index)->isa<CNode>()) {
+    if (!cnode->input(index)->isa<CNode>()) {
       if (ConvertTensorToNCOrNH(func_graph, cnode, index, fmk_type_, train_flag_, trans_type) != lite::RET_OK) {
         MS_LOG(ERROR) << "ConvertTensorToNCOrNH failed.";
         return lite::RET_ERROR;