!32893 [MSLITE][Bug][Func] Adjust runtime shape fusion pass.

Merge pull request !32893 from wangshaocong/bugfix
2022-04-21 03:21:24 +00:00 · 2022-04-21 03:21:24 +00:00 · bd318b94af
parent c4bc1c4102 b6927fde44
commit bd318b94af
11 changed files with 56 additions and 64 deletions
--- a/mindspore/lite/src/CMakeLists.txt
+++ b/mindspore/lite/src/CMakeLists.txt
@ -114,6 +114,8 @@ set(LITE_SRC
        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/inner_allocator.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_allocator.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/infer_manager.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_shape_fusion_pass.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_pass.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/schema_tensor_wrapper.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/tensor.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/tensor_category.cc
@ -205,13 +207,6 @@ if(MSLITE_ENABLE_STRING_KERNEL)
        ${CMAKE_CURRENT_SOURCE_DIR}/common/string_util.cc
        )
 endif()
-if(MSLITE_ENABLE_RUNTIME_PASS)
-    set(LITE_SRC
-            ${LITE_SRC}
-            ${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_shape_fusion_pass.cc
-            ${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_pass.cc
-            )
-endif()

 if(MSLITE_ENABLE_CONTROLFLOW)
    set(LITE_SRC
--- a/mindspore/lite/src/lite_session.cc
+++ b/mindspore/lite/src/lite_session.cc
@ -17,9 +17,7 @@
 #include "src/lite_session.h"
 #include <set>
 #include "src/pack_weight_manager.h"
-#ifndef RUNTIME_PASS_CLIP
 #include "src/runtime/runtime_pass.h"
-#endif
 #if defined(LINUX_RUNTIME)
 #include <malloc.h>
 #endif
@ -1262,13 +1260,12 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
    is_running_.store(false);
    return RET_ERROR;
  }
-#ifndef RUNTIME_PASS_CLIP
+
  auto status = GraphOptimizePass(&kernels_);
  if (status != RET_OK) {
    MS_LOG(ERROR) << "GraphOptimizePass failed.";
    return RET_ERROR;
  }
-#endif

  is_running_.store(false);
 #if defined(LINUX_RUNTIME)
--- a/mindspore/lite/src/runtime/kernel/cpu/fp32/shape_fusion_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/cpu/fp32/shape_fusion_fp32.cc
@ -15,6 +15,7 @@
 */

 #include "src/runtime/kernel/cpu/fp32/shape_fusion_fp32.h"
+#include <algorithm>
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
 #include "src/common/log_adapter.h"
@ -34,11 +35,12 @@ int ShapeFusionCPUKernel::Prepare() {
 int ShapeFusionCPUKernel::ReSize() { return KernelInferShape(in_tensors_, out_tensors_, op_parameter_); }

 int ShapeFusionCPUKernel::Run() {
-#ifndef DELEGATE_CLIP
+  bool is_const =
+    std::all_of(out_tensors_.begin(), out_tensors_.end(), [](lite::Tensor *tensor) { return tensor->IsConst(); });
+  if (is_const) {
+    return RET_OK;
+  }
  return KernelInferShape(in_tensors_, out_tensors_, op_parameter_);
-#else
-  return RET_OK;
-#endif
 }

 REG_KERNEL(kCPU, kNumberTypeInt32, PrimType_Inner_ShapeFusion, LiteKernelCreator<ShapeFusionCPUKernel>)
--- a/mindspore/lite/src/runtime/runtime_pass.cc
+++ b/mindspore/lite/src/runtime/runtime_pass.cc
@ -17,11 +17,12 @@
 #include "src/runtime/runtime_pass.h"
 #include "nnacl/conv_parameter.h"

+namespace mindspore::lite {
+#ifndef RUNTIME_PASS_CLIP
 namespace {
 const constexpr int kMaxDepth = 2048;
 }

-namespace mindspore::lite {
 void Nc4hw4PassReplace(std::vector<kernel::KernelExec *> *kernels, std::vector<Tensor *> *tensors, size_t index) {
  kernel::KernelExec *conv_kernel = kernels->at(index);
  kernel::KernelExec *transpose_kernel = conv_kernel->out_kernels().front();
@ -287,8 +288,10 @@ STATUS DeleteRedundantTrans(std::vector<kernel::KernelExec *> *kernels) {
  }
  return RET_OK;
 }
+#endif

 STATUS RuntimePass(std::vector<kernel::KernelExec *> *subgraphs, std::vector<Tensor *> *tensors) {
+#ifndef RUNTIME_PASS_CLIP
  for (auto subgraph : *subgraphs) {
    auto sub = reinterpret_cast<kernel::SubGraphKernel *>(subgraph);
    if (RuntimePassValid(sub) == false) {
@ -305,10 +308,12 @@ STATUS RuntimePass(std::vector<kernel::KernelExec *> *subgraphs, std::vector<Ten
      return RET_ERROR;
    }
  }
+#endif
  return RET_OK;
 }

 STATUS GraphOptimizePass(std::vector<kernel::KernelExec *> *sub_graphs) {
+#ifndef RUNTIME_PASS_CLIP
  for (auto subgraph : *sub_graphs) {
    auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(subgraph);
    if (RuntimePassValid(sub_graph) == false) {
@ -321,6 +326,7 @@ STATUS GraphOptimizePass(std::vector<kernel::KernelExec *> *sub_graphs) {
      return RET_ERROR;
    }
  }
+#endif
  return RET_OK;
 }
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/runtime/runtime_pass.h
+++ b/mindspore/lite/src/runtime/runtime_pass.h
@ -17,7 +17,6 @@
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_PASS_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_PASS_H_

-#ifndef RUNTIME_PASS_CLIP
 #include <vector>
 #include "src/kernel_exec.h"
 #include "src/sub_graph_kernel.h"
@ -25,9 +24,9 @@
 #include "schema/model_generated.h"

 namespace mindspore::lite {
-
 STATUS RuntimePass(std::vector<kernel::KernelExec *> *subgraphs, std::vector<Tensor *> *tensors);
 STATUS GraphOptimizePass(std::vector<kernel::KernelExec *> *sub_graphs);
+#ifndef RUNTIME_PASS_CLIP
 /* Nc4hw4 PASS
 * before  : --(nhwc)-- CONV --(nhwc)-- TRANSPOSE --(nchw)-- IN --(nchw)-- TRANSPOSE --(nhwc)--
 * after   : --(nhwc)-- CONV --(nc4hw4)-- IN --(nhwc)--
@ -47,7 +46,6 @@ static const std::vector<schema::PrimitiveType> Nc4hw4FormatInOpList = {schema::
 static const schema::PrimitiveType ConvNormC4OpConv2DFusion = schema::PrimitiveType_Conv2DFusion;
 static const schema::PrimitiveType ConvNormC4OpActivation = schema::PrimitiveType_Activation;
 static const schema::PrimitiveType ConvNormC4OpInstanceNorm = schema::PrimitiveType_InstanceNorm;
-
-}  // namespace mindspore::lite
 #endif
+}  // namespace mindspore::lite
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_PASS_H_
--- a/mindspore/lite/src/runtime/runtime_shape_fusion_pass.cc
+++ b/mindspore/lite/src/runtime/runtime_shape_fusion_pass.cc
@ -14,6 +14,7 @@
 * limitations under the License.
 */

+#ifndef RUNTIME_PASS_CLIP
 #include "src/runtime/runtime_shape_fusion_pass.h"
 #include <set>
 #include <queue>
@ -21,13 +22,11 @@
 #include "include/errorcode.h"
 #include "src/common/log_adapter.h"
 #include "nnacl/op_base.h"
-#include "schema/inner/model_generated.h"

+namespace mindspore::lite {
 namespace {
 constexpr size_t kInitialSize = 1024;
 }  // namespace
-
-namespace mindspore::lite {
 int ShapeFusionPass::ConvertToShapeFusion(Model::Node *node) {
  MS_ASSERT(node != nullptr);
  auto input_tensor = src_tensors_->at(node->input_indices_.front());
@ -382,3 +381,4 @@ int ShapeFusionPass::GetFusionMatrixFromConstantTensor(const lite::Tensor *tenso
  return RET_OK;
 }
 }  // namespace mindspore::lite
+#endif
--- a/mindspore/lite/src/runtime/runtime_shape_fusion_pass.h
+++ b/mindspore/lite/src/runtime/runtime_shape_fusion_pass.h
@ -17,15 +17,16 @@
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_SHAPE_FUSION_PASS_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_SHAPE_FUSION_PASS_H_

-#ifndef RUNTIME_PASS_CLIP
 #include <map>
 #include <vector>
+#include <algorithm>
 #include "src/lite_model.h"
 #include "src/common/tensor_util.h"
 #include "schema/ops_generated.h"
 #include "schema/model_generated.h"

 namespace mindspore::lite {
+#ifndef RUNTIME_PASS_CLIP
 struct ShapeFusionMatrix {
  ShapeFusionMatrix() {}
  explicit ShapeFusionMatrix(size_t dim) {
@ -78,6 +79,7 @@ struct ShapeFusionMatrix {
  std::vector<std::vector<float>> shape_matrix;
  bool scalar = false;
 };
+#endif

 class ShapeFusionPass {
 public:
@ -92,10 +94,31 @@ class ShapeFusionPass {
  }
  ~ShapeFusionPass() = default;

-  int ConvertToShapeFusion(Model::Node *node);
-  int FusePostNodes(Model::Node *node, size_t subgraph_index);
+  void Run(Model::Node *node, size_t subgraph_index) {
+#ifndef RUNTIME_PASS_CLIP
+    if (ConvertToShapeFusion(node) != RET_OK) {
+      MS_LOG(WARNING) << "Convert to built-in shape failed: " << node->name_;
+    } else if (FusePostNodes(node, subgraph_index) != RET_OK) {
+      MS_LOG(WARNING) << "Fused to built-in shape failed: " << node->name_;
+    }
+    std::transform(node->output_indices_.begin(), node->output_indices_.end(),
+                   std::back_inserter(shape_fusion_outputs_),
+                   [&](uint32_t idx) { return this->src_tensors_->at(idx); });
+#endif
+  }
+  void FreeOutputTensorDataOfFusedShape() {
+#if !defined(RUNTIME_PASS_CLIP)
+    for (auto tensor : shape_fusion_outputs_) {
+      tensor->FreeData();
+      tensor->set_category(VAR);
+    }
+#endif
+  }

 private:
+#ifndef RUNTIME_PASS_CLIP
+  int ConvertToShapeFusion(Model::Node *node);
+  int FusePostNodes(Model::Node *node, size_t subgraph_index);
  Tensor *BuildTensorFromShapeFusionMatrix(const ShapeFusionMatrix &shape_fusion_matrix);
  bool CheckCanFused(const Model::Node *shape_fusion, const Model::Node *post_node, uint32_t input_idx,
                     size_t subgraph_index);
@ -108,12 +131,13 @@ class ShapeFusionPass {
                                        ShapeFusionMatrix *constant_matrix);

 private:
+  std::map<uint32_t, ShapeFusionMatrix> shape_fusion_matrices_;
+  std::vector<lite::Tensor *> shape_fusion_outputs_;
+#endif
  LiteModel *lite_model_ = nullptr;
  const std::vector<Model::Node *> *all_nodes_ = nullptr;
  std::vector<lite::Tensor *> *src_tensors_ = nullptr;
  std::map<uint32_t, std::vector<Model::Node *>> used_nodes_;
-  std::map<uint32_t, ShapeFusionMatrix> shape_fusion_matrices_;
 };
 }  // namespace mindspore::lite
-#endif
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_SHAPE_FUSION_PASS_H_
--- a/mindspore/lite/src/scheduler.cc
+++ b/mindspore/lite/src/scheduler.cc
@ -39,9 +39,7 @@
 #include "src/common/tensor_util.h"
 #include "src/common/context_util.h"
 #include "src/runtime/infer_manager.h"
-#ifndef RUNTIME_PASS_CLIP
 #include "src/runtime/runtime_pass.h"
-#endif
 #ifndef AUTO_PARALLEL_CLIP
 #include "src/sub_graph_split.h"
 #endif
@ -259,7 +257,6 @@ int Scheduler::SchedulePreProcess() {
  schema_version_ = reinterpret_cast<LiteModel *>(src_model_)->GetSchemaVersion();

  this->graph_output_node_indexes_ = GetGraphOutputNodes(src_model_);
-
  *is_infershape_ = InferSubGraphShape(kMainSubGraphIndex);
  if (*is_infershape_ != RET_OK && *is_infershape_ != RET_INFER_INVALID) {
    MS_LOG(ERROR) << "op infer shape failed.";
@ -397,10 +394,9 @@ int Scheduler::Schedule(std::vector<kernel::KernelExec *> *dst_kernels) {
    MS_LOG(ERROR) << "CheckInputParam failed! ret: " << check_input_ret;
    return check_input_ret;
  }
-#ifndef RUNTIME_PASS_CLIP
-  shape_fusion_pass_ = std::make_shared<ShapeFusionPass>(reinterpret_cast<LiteModel *>(src_model_), src_tensors_);
-#endif

+  shape_fusion_pass_ = std::make_shared<ShapeFusionPass>(reinterpret_cast<LiteModel *>(src_model_), src_tensors_);
+  MS_CHECK_TRUE_RET(shape_fusion_pass_ != nullptr, RET_ERROR);
  int ret = SchedulePreProcess();
  if (ret != RET_OK) {
    return ret;
@ -428,15 +424,9 @@ int Scheduler::Schedule(std::vector<kernel::KernelExec *> *dst_kernels) {
      return ret;
    }
  }
+  shape_fusion_pass_->FreeOutputTensorDataOfFusedShape();

 #ifndef DELEGATE_CLIP
-#ifndef RUNTIME_PASS_CLIP
-  // Free the output tensor data of shape fusion.
-  for (auto tensor : shape_fusion_outputs_) {
-    tensor->FreeData();
-    tensor->set_category(VAR);
-  }
-#endif
  ret = InitDelegateKernels(dst_kernels);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Repalce delegate kernels failed.";
@ -467,13 +457,11 @@ int Scheduler::Schedule(std::vector<kernel::KernelExec *> *dst_kernels) {
  }
 #endif

-#ifndef RUNTIME_PASS_CLIP
  auto status = RuntimePass(dst_kernels, src_tensors_);
  if (status != RET_OK) {
    MS_LOG(ERROR) << "runtime pass failed.";
    return RET_ERROR;
  }
-#endif

  ret = InitKernels(std::move(*dst_kernels));
  if (ret != RET_OK) {
@ -742,11 +730,6 @@ int Scheduler::InferNodeShape(const lite::Model::Node *node) {
        return RET_ERROR;
      }
    }
-#if !defined(RUNTIME_PASS_CLIP) && !defined(DELEGATE_CLIP)
-    if (node->node_type_ == PrimType_Inner_ShapeFusion) {
-      shape_fusion_outputs_.insert(shape_fusion_outputs_.end(), outputs.begin(), outputs.end());
-    }
-#endif
  } else if (ret != RET_INFER_INVALID) {
    FreeOpParameters();
    return RET_ERROR;
@ -877,17 +860,11 @@ int Scheduler::InferSubGraphShape(size_t subgraph_index) {
      MS_LOG(ERROR) << "Op " << node->name_ << " should exist in model!";
      return RET_ERROR;
    }
-#ifndef RUNTIME_PASS_CLIP
    if (node->node_type_ == schema::PrimitiveType_Shape) {
      // convert shape to built-in shape
      MS_CHECK_TRUE_RET(node->input_indices_.size() == 1, RET_ERROR);
-      if (shape_fusion_pass_->ConvertToShapeFusion(node) != RET_OK) {
-        MS_LOG(WARNING) << "Convert to built-in shape failed: " << node->name_;
-      } else if (shape_fusion_pass_->FusePostNodes(node, subgraph_index) != RET_OK) {
-        MS_LOG(WARNING) << "Fused to built-in shape failed: " << node->name_;
-      }
+      shape_fusion_pass_->Run(node, subgraph_index);
    }
-#endif
    auto ret = InferNodeShape(node);
    if (ret == RET_INFER_INVALID) {
      MS_LOG(INFO) << "InferShape interrupted, name: " << node->name_
--- a/mindspore/lite/src/scheduler.h
+++ b/mindspore/lite/src/scheduler.h
@ -35,9 +35,7 @@
 #ifndef CONTROLFLOW_TENSORLIST_CLIP
 #include "src/control_flow/control_flow_scheduler.h"
 #endif
-#ifndef RUNTIME_PASS_CLIP
 #include "src/runtime/runtime_shape_fusion_pass.h"
-#endif

 namespace mindspore::lite {
 constexpr int kDefaultDeviceType = -1;
@ -180,12 +178,7 @@ class Scheduler {
  int schema_version_ = SCHEMA_VERSION::SCHEMA_CUR;
  std::map<std::string, TypeId> *execution_plan_ = nullptr;
  const std::map<std::string, std::map<std::string, std::string>> *config_info_ = nullptr;
-#ifndef RUNTIME_PASS_CLIP
  std::shared_ptr<ShapeFusionPass> shape_fusion_pass_ = nullptr;
-#ifndef DELEGATE_CLIP
-  std::vector<Tensor *> shape_fusion_outputs_;
-#endif
-#endif
 };
 }  // namespace mindspore::lite

--- a/mindspore/lite/test/config_level0/cropped_size.cfg
+++ b/mindspore/lite/test/config_level0/cropped_size.cfg
@ -1 +1 @@
-856848
+858546
--- a/mindspore/lite/test/config_level1/cropped_size.cfg
+++ b/mindspore/lite/test/config_level1/cropped_size.cfg
@ -1 +1 @@
-856848
+858546