!43249 [MSLITE][CPU][r1.8] fix mul_add_fusion whether InferDone

Merge pull request !43249 from Greatpan/transpose_r1.8
2022-10-08 09:29:04 +00:00 · 2022-10-08 09:29:04 +00:00 · dd45074a1f
parent abd85adeb1 f0db19f1ee
commit dd45074a1f
25 changed files with 119 additions and 62 deletions
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/matmul_infer.c
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/matmul_infer.c
@ -143,7 +143,8 @@ int MatmulInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
  for (int i = 0; i < diff; ++i) {
    ShapeInsert(in->shape_, &in->shape_size_, 0, 1);
  }
-  SetDataTypeFormat(output, input0);
+  TensorC *input = input1->data_ == NULL ? input1 : input0;  // transfer the input which comes from the other node.
+  SetDataTypeFormat(output, input);
  if (parameter->quant_type_ == QuantType_QUANT_DYNAMIC || parameter->quant_type_ == QuantType_QUANT_WEIGHT) {
    output->data_type_ = kNumberTypeFloat32;
  }
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/transpose_infer.c
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/transpose_infer.c
@ -1,5 +1,5 @@
 /**
- * Copyright 2021 Huawei Technologies Co., Ltd
+ * Copyright 2021-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -66,21 +66,17 @@ int TransposeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
  const TensorC *perm_tensor = inputs[1];
  const int32_t *perm_data = (int32_t *)perm_tensor->data_;
  const int perms_num = perm_tensor->shape_[0];
-  if (perm_tensor->shape_size_ == 0) {
-    return NNACL_INFER_INVALID;
-  }
+  MS_CHECK_TRUE_RET(perm_tensor->shape_size_ != 0, NNACL_INFER_INVALID);
  if (perms_num != 0 && perm_data == NULL) {
    return NNACL_INFER_INVALID;
  }
  int perm[MAX_TRANSPOSE_DIM_SIZE] = {0};
  size_t perm_size = 0;
  for (int i = 0; i < perms_num; i++) {
-    if (perm_data[i] >= perms_num) {
-      return NNACL_ERR;
-    }
+    MS_CHECK_TRUE_RET(perm_data[i] < perms_num, NNACL_ERR);
    ShapePush(perm, &perm_size, perm_data[i]);
  }
-  if (perms_num == 4) {
+  if (perms_num == PERM_NUM_FOUR) {
    const int nchw2nhwc[4] = {0, 2, 3, 1};
    const int nhwc2nchw[4] = {0, 3, 1, 2};
    const int trans3d[3] = {0, 2, 1};
@ -95,6 +91,9 @@ int TransposeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
      ShapeSet(perm, &perm_size, trans3d, 3);
    }
  }
+  if (perms_num == PERM_NUM_THREE && perm[0] == 0 && perm[1] == 2) {
+    output->format_ = input->format_ == Format_NCHW ? Format_NHWC : Format_NCHW;
+  }
  if (parameter->quant_type_ == QuantType_QUANT_WEIGHT) {
    output->data_type_ = kNumberTypeFloat32;
  }
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/transpose.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/transpose.h
@ -22,6 +22,8 @@
 // MAX_TRANSPOSE_SERIAL_SIZE = 64 * 3 * 512 * 512
 #define MAX_TRANSPOSE_SERIAL_SIZE 50331648
 #define MAX_TRANSPOSE_DIM_SIZE 20
+#define PERM_NUM_THREE 3
+#define PERM_NUM_FOUR 4

 typedef struct TransposeParameter {
  // primitive parameter
--- a/mindspore/lite/src/runtime/delegate/npu/npu_converter_utils.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/npu_converter_utils.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -250,6 +250,8 @@ int ConverterToNPUActivationMode(schema::ActivationType type) {
      return RELU6;
    case schema::ActivationType_ELU:
      return ELU;
+    case schema::ActivationType_GELU:
+      return GELU;
    default:
      return ACTIVATION_INVALID;
  }
--- a/mindspore/lite/src/runtime/delegate/npu/op/activation_npu.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/op/activation_npu.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -28,7 +28,8 @@ int ActivationNPUOp::IsSupport(const schema::Primitive *primitive, const std::ve
  if (act_type_ != schema::ActivationType_RELU && act_type_ != schema::ActivationType_RELU6 &&
      act_type_ != schema::ActivationType_SIGMOID && act_type_ != schema::ActivationType_TANH &&
      act_type_ != schema::ActivationType_HSIGMOID && act_type_ != schema::ActivationType_LEAKY_RELU &&
-      act_type_ != schema::ActivationType_SWISH && act_type_ != schema::ActivationType_ELU) {
+      act_type_ != schema::ActivationType_SWISH && act_type_ != schema::ActivationType_ELU &&
+      act_type_ != schema::ActivationType_GELU) {
    MS_LOG(WARNING) << "Unsupported activation type for activation op " << name_ << "when running npu";
    return RET_NOT_SUPPORT;
  }
--- a/mindspore/lite/src/runtime/delegate/npu/op/arithmetic_npu.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/op/arithmetic_npu.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -16,6 +16,9 @@

 #include "src/runtime/delegate/npu/op/arithmetic_npu.h"
 #include "src/runtime/delegate/npu/npu_converter_utils.h"
+#include "src/runtime/delegate/delegate_utils.h"
+#include "src/runtime/delegate/npu/transpose_kernel.h"
+
 namespace mindspore {
 constexpr int ARITHMETIC_INPUT_NUM = 2;
 constexpr int MAX_HW_SIZE = 1664;
@ -24,20 +27,24 @@ int ArithmeticNPUOp::IsSupport(const schema::Primitive *primitive, const std::ve
  auto in_shape_0 = in_tensors[0].Shape();
  auto in_shape_1 = in_tensors[1].Shape();
  auto out_shape = out_tensors[0].Shape();
-  if (in_shape_0.size() != 0 && in_shape_1.size() != 0 && in_shape_0.size() != in_shape_1.size()) {
-    MS_LOG(WARNING) << name_ << " for the two inputs, the dimension size must be same. size 1 is: " << in_shape_0.size()
-                    << " size 2 is: " << in_shape_1.size();
-    return RET_NOT_SUPPORT;
-  }
  // a hidden limitation in npu bottom implementation
-  if (type_ == schema::PrimitiveType_MulFusion && out_shape.size() == NPU_SHAPE_SIZE) {
-    bool is_nhwc = out_tensors[0].format() == Format::NHWC;
-    auto out_h = is_nhwc ? out_shape.at(NHWC_H) : out_shape.at(NCHW_H);
-    auto out_w = is_nhwc ? out_shape.at(NHWC_W) : out_shape.at(NCHW_W);
-    // two inputs have different shape with the output, which means both of them need broadcast
-    if (in_shape_0 != out_shape && in_shape_1 != out_shape && out_h * out_w > MAX_HW_SIZE) {
-      MS_LOG(WARNING) << "The size of out_height * out_width is larger than the max value (1664) that npu supports "
-                         "during broadcasting.";
+  if (type_ == schema::PrimitiveType_MulFusion) {
+    if (out_shape.size() == NPU_SHAPE_SIZE) {
+      bool is_nhwc = out_tensors[0].format() == Format::NHWC;
+      auto out_h = is_nhwc ? out_shape.at(NHWC_H) : out_shape.at(NCHW_H);
+      auto out_w = is_nhwc ? out_shape.at(NHWC_W) : out_shape.at(NCHW_W);
+      // two inputs have different shape with the output, which means both of them need broadcast
+      if (in_shape_0 != out_shape && in_shape_1 != out_shape && out_h * out_w > MAX_HW_SIZE) {
+        MS_LOG(WARNING) << "The size of out_height * out_width is larger than the max value (1664) that npu supports "
+                           "during broadcasting.";
+        return RET_NOT_SUPPORT;
+      }
+    }
+  } else {
+    if (in_shape_0.size() != 0 && in_shape_1.size() != 0 && in_shape_0.size() != in_shape_1.size()) {
+      MS_LOG(WARNING) << name_
+                      << " for the two inputs, the dimension size must be same. size 1 is: " << in_shape_0.size()
+                      << " size 2 is: " << in_shape_1.size();
      return RET_NOT_SUPPORT;
    }
  }
@ -240,6 +247,32 @@ ge::Operator *ArithmeticNPUOp::GetNPUOp() {
  return act_;
 }

+int ArithmeticNPUOp::HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) {
+  for (size_t i = 0; i < inputs_.size(); i++) {
+    auto in_tensor = inputs_.at(i);
+    if (!in_tensor.IsConst() || in_tensor.Shape().size() != DIMENSION_4D) {
+      continue;
+    }
+    auto shape = in_tensor.Shape();
+    auto new_shape = {in_tensor.Shape().at(NHWC_N), in_tensor.Shape().at(NHWC_C), in_tensor.Shape().at(NHWC_H),
+                      in_tensor.Shape().at(NHWC_W)};
+    auto nh2nc_tensor =
+      mindspore::MSTensor::CreateTensor(in_tensor.Name() + "_nh2nc", in_tensor.DataType(), new_shape, nullptr, 0);
+    if (nh2nc_tensor == nullptr) {
+      MS_LOG(ERROR) << "New nchw tensor failed when inserting nchw2nhwc op.";
+      return RET_ERROR;
+    }
+    auto dst_data = nh2nc_tensor->MutableData();
+    MS_CHECK_TRUE_RET(dst_data != nullptr, RET_ERROR);
+    // transpose dst_data to nchw.
+    PackNHWCToNCHWFp32(in_tensor.MutableData(), dst_data, shape[NHWC_N], shape[NHWC_H] * shape[NHWC_W], shape[NHWC_C]);
+    nh2nc_tensor->SetFormat(NCHW);
+    inputs_.at(i) = *nh2nc_tensor;
+    all_tensors->push_back(nh2nc_tensor);
+  }
+  return RET_OK;
+}
+
 ArithmeticNPUOp::~ArithmeticNPUOp() {
  if (op_ != nullptr) {
    delete op_;
--- a/mindspore/lite/src/runtime/delegate/npu/op/arithmetic_npu.h
+++ b/mindspore/lite/src/runtime/delegate/npu/op/arithmetic_npu.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -47,6 +47,8 @@ class ArithmeticNPUOp : public NPUOp {

  ge::Operator *GetNPUOp() override;

+  int HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) override;
+
 private:
  int SetActivation();
  schema::ActivationType act_type_ = schema::ActivationType_NO_ACTIVATION;
--- a/mindspore/lite/src/runtime/delegate/npu/op/concat_npu.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/op/concat_npu.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -57,7 +57,7 @@ int ConcatNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors

 ge::Operator *ConcatNPUOp::GetNPUOp() { return this->concat_; }

-int ConcatNPUOp::HandleAxis() {
+int ConcatNPUOp::HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) {
  axis_ = TransFormAxis(axis_);
  if (axis_ == NCHW_INVALID) {
    MS_LOG(ERROR) << "Transform axis for concat op failed.";
--- a/mindspore/lite/src/runtime/delegate/npu/op/concat_npu.h
+++ b/mindspore/lite/src/runtime/delegate/npu/op/concat_npu.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -45,7 +45,7 @@ class ConcatNPUOp : public NPUOp {

  ge::Operator *GetNPUOp() override;

-  int HandleAxis() override;
+  int HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) override;

 private:
  hiai::op::ConcatD *concat_ = nullptr;
--- a/mindspore/lite/src/runtime/delegate/npu/op/npu_op.h
+++ b/mindspore/lite/src/runtime/delegate/npu/op/npu_op.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -77,7 +77,7 @@ class NPUOp {

  virtual ge::Operator *GetNPUOp() { return nullptr; }

-  virtual int HandleAxis() { return RET_OK; }
+  virtual int HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) { return RET_OK; }

  void set_inputs(const std::vector<mindspore::MSTensor> &in_tensors) { this->inputs_ = in_tensors; }

--- a/mindspore/lite/src/runtime/delegate/npu/op/pad_npu.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/op/pad_npu.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -116,7 +116,7 @@ int PadNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,

 ge::Operator *PadNPUOp::GetNPUOp() { return this->pad_; }

-int PadNPUOp::HandleAxis() {
+int PadNPUOp::HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) {
  if (paddings_vec_.size() != PAD_SIZE) {
    return RET_ERROR;
  }
--- a/mindspore/lite/src/runtime/delegate/npu/op/pad_npu.h
+++ b/mindspore/lite/src/runtime/delegate/npu/op/pad_npu.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -42,7 +42,7 @@ class PadNPUOp : public NPUOp {

  ge::Operator *GetNPUOp() override;

-  int HandleAxis() override;
+  int HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) override;

 private:
  hiai::op::PadV2 *pad_ = nullptr;
--- a/mindspore/lite/src/runtime/delegate/npu/op/reduce_npu.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/op/reduce_npu.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -88,7 +88,7 @@ int ReduceNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors

 ge::Operator *ReduceNPUOp::GetNPUOp() { return this->reduce_; }

-int ReduceNPUOp::HandleAxis() {
+int ReduceNPUOp::HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) {
  auto reduce_axes = inputs_.at(1);
  int num_axes = reduce_axes.Shape().at(0);
  MS_CHECK_TRUE_RET(reduce_axes.MutableData() != nullptr, RET_ERROR);
--- a/mindspore/lite/src/runtime/delegate/npu/op/reduce_npu.h
+++ b/mindspore/lite/src/runtime/delegate/npu/op/reduce_npu.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -42,7 +42,7 @@ class ReduceNPUOp : public NPUOp {

  ge::Operator *GetNPUOp() override;

-  int HandleAxis() override;
+  int HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) override;

 private:
  schema::ReduceMode reduce_mode_ = schema::ReduceMode_ReduceMean;
--- a/mindspore/lite/src/runtime/delegate/npu/op/softmax_npu.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/op/softmax_npu.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -47,7 +47,7 @@ int SoftmaxNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensor

 ge::Operator *SoftmaxNPUOp::GetNPUOp() { return this->softmax_; }

-int SoftmaxNPUOp::HandleAxis() {
+int SoftmaxNPUOp::HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) {
  axis_ = TransFormAxis(axis_);
  if (axis_ == NCHW_INVALID) {
    MS_LOG(ERROR) << "Transform axis for Softmax op failed.";
--- a/mindspore/lite/src/runtime/delegate/npu/op/softmax_npu.h
+++ b/mindspore/lite/src/runtime/delegate/npu/op/softmax_npu.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -42,7 +42,7 @@ class SoftmaxNPUOp : public NPUOp {
                   const std::vector<mindspore::MSTensor> &out_tensors,
                   const std::vector<ge::Operator *> &npu_inputs) override;

-  int HandleAxis() override;
+  int HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) override;

  ge::Operator *GetNPUOp() override;

--- a/mindspore/lite/src/runtime/delegate/npu/op/split_npu.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/op/split_npu.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -86,7 +86,7 @@ int SplitNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,

 ge::Operator *SplitNPUOp::GetNPUOp() { return this->split_; }

-int SplitNPUOp::HandleAxis() {
+int SplitNPUOp::HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) {
  axis_ = TransFormAxis(axis_);
  if (axis_ == NCHW_INVALID) {
    MS_LOG(ERROR) << "Transform axis for split op failed.";
--- a/mindspore/lite/src/runtime/delegate/npu/op/split_npu.h
+++ b/mindspore/lite/src/runtime/delegate/npu/op/split_npu.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -42,7 +42,7 @@ class SplitNPUOp : public NPUOp {
                   const std::vector<mindspore::MSTensor> &out_tensors,
                   const std::vector<ge::Operator *> &npu_inputs) override;

-  int HandleAxis() override;
+  int HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) override;

  ge::Operator *GetNPUOp() override;

--- a/mindspore/lite/src/runtime/delegate/npu/op/strided_slice_npu.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/op/strided_slice_npu.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -110,7 +110,7 @@ ge::Operator *StridedSliceNPUOp::GetNPUOp() {
  }
 }

-int StridedSliceNPUOp::HandleAxis() {
+int StridedSliceNPUOp::HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) {
  if (inputs_.size() < MIN_INPUT_SIZE) {
    MS_LOG(ERROR) << "StridedSlice in tensors size < " << MIN_INPUT_SIZE;
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/delegate/npu/op/strided_slice_npu.h
+++ b/mindspore/lite/src/runtime/delegate/npu/op/strided_slice_npu.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2020-2021 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -49,7 +49,7 @@ class StridedSliceNPUOp : public NPUOp {

  ge::Operator *GetNPUOp() override;

-  int HandleAxis() override;
+  int HandleAxisAndConstantInputs(std::vector<mindspore::MSTensor *> *all_tensors) override;

 private:
  hiai::op::StridedSlice *strided_slice_ = nullptr;
--- a/mindspore/lite/src/runtime/delegate/npu/pass/npu_fusion_pass.cc
+++ b/mindspore/lite/src/runtime/delegate/npu/pass/npu_fusion_pass.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2021 Huawei Technologies Co., Ltd
+ * Copyright 2021-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -248,9 +248,10 @@ int NPUFusionPass::CommonFusion(NPUOp *cur_op) {
    MS_LOG(ERROR) << "UpdateOp failed.";
    return RET_ERROR;
  }
-  ret = cur_op->HandleAxis();
+  auto all_tensors = subgraph_->GetInsertTensors();
+  ret = cur_op->HandleAxisAndConstantInputs(all_tensors);
  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "HandleAxis failed.";
+    MS_LOG(ERROR) << "HandleAxisAndConstantInputs failed.";
    return ret;
  }
  return RET_OK;
--- a/mindspore/lite/tools/converter/parser/onnx/onnx_matmul_parser.cc
+++ b/mindspore/lite/tools/converter/parser/onnx/onnx_matmul_parser.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -40,7 +40,7 @@ PrimitiveCPtr OnnxMatmulParser::Parse(const onnx::GraphProto &onnx_graph, const
    }
  }
  if (!FloatCompare(alpha, 1.0f) || (!FloatCompare(beta, 1.0f) && !(onnx_node.input().size() == 2 &&
-                                                                    !FloatCompare(beta)))) {  // 2: input num is A and B
+                                                                    FloatCompare(beta)))) {  // 2: input num is A and B
    MS_LOG(ERROR) << "not support alpha * A * B + beta * C";
    return nullptr;
  }
--- a/mindspore/lite/tools/optimizer/fusion/mul_add_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/mul_add_fusion.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2021 Huawei Technologies Co., Ltd
+ * Copyright 2021-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -189,6 +189,15 @@ bool MulAddFusion::ScaleInputShapeValid(size_t *axis_offset) const {
  return true;
 }

+bool MulAddFusion::MulInputAnodeIsInferred(const AnfNodePtr &mul_input_anode) const {
+  auto mul_input_cnode = mul_input_anode->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(mul_input_cnode);
+  auto prim = GetValueNode<PrimitivePtr>(mul_input_cnode->input(0));
+  MS_CHECK_TRUE_RET(prim != nullptr, false);
+  auto is_inferred = prim->GetAttr(kInferDone) != nullptr && GetValue<bool>(prim->GetAttr(kInferDone));
+  return is_inferred;
+}
+
 AnfNodePtr MulAddFusion::Process(const std::string &pattern_name, const mindspore::FuncGraphPtr &func_graph,
                                 const mindspore::AnfNodePtr &node, const mindspore::EquivPtr &equiv) const {
  if (func_graph == nullptr || node == nullptr) {
@ -232,6 +241,12 @@ AnfNodePtr MulAddFusion::Process(const std::string &pattern_name, const mindspor
  }

  MS_CHECK_TRUE_RET(mul_input_anode != nullptr, nullptr);
+  if (mul_input_anode->isa<CNode>()) {
+    if (!MulInputAnodeIsInferred(mul_input_anode)) {
+      MS_LOG(DEBUG) << "mul_input_anode is not inferred, don't perform the ScaleInputShapeValid method.";
+      return nullptr;
+    }
+  }
  if (FetchShapeFromAbstract(mul_input_anode->abstract(), &mul_input_shape_) != lite::RET_OK) {
    return nullptr;
  }
--- a/mindspore/lite/tools/optimizer/fusion/mul_add_fusion.h
+++ b/mindspore/lite/tools/optimizer/fusion/mul_add_fusion.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2021 Huawei Technologies Co., Ltd
+ * Copyright 2021-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -45,6 +45,7 @@ class MulAddFusion : public MultiplePatternProcessPass {
  bool CheckAddNode(const mindspore::CNodePtr &cnode) const;
  bool CheckMulNode(const mindspore::FuncGraphPtr &func_graph, const mindspore::CNodePtr &cnode) const;
  bool ScaleInputShapeValid(size_t *axis_offset) const;
+  bool MulInputAnodeIsInferred(const AnfNodePtr &mul_input_anode) const;
  bool AdjustScaleBiasTensorShape(size_t *axis_offset) const;

 private:
--- a/mindspore/lite/tools/optimizer/graph/decrease_transpose_algo.cc
+++ b/mindspore/lite/tools/optimizer/graph/decrease_transpose_algo.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2021 Huawei Technologies Co., Ltd
+ * Copyright 2021-2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -332,7 +332,7 @@ STATUS DecreaseTransposeAlgo::DoPreInsert(const FuncGraphPtr &func_graph, const
  auto HandleFunc = [this, &shape](const FuncGraphPtr &func_graph, const CNodePtr &cnode, size_t index,
                                   FormatTransNodeType trans_type) -> STATUS {
    auto before_perm = trans_type == kNHWC2NCHW ? kNH2NC : kNC2NH;
-    if (shape.size() == kInputSizeFour && !cnode->input(index)->isa<CNode>()) {
+    if (!cnode->input(index)->isa<CNode>()) {
      if (ConvertTensorToNCOrNH(func_graph, cnode, index, fmk_type_, train_flag_, trans_type) != lite::RET_OK) {
        MS_LOG(ERROR) << "ConvertTensorToNCOrNH failed.";
        return lite::RET_ERROR;