From 1200ae2f27ec883d0a3dc801e645fa92bf30b5b5 Mon Sep 17 00:00:00 2001
From: yangjie159 <yangjie159@huawei.com>
Date: Wed, 24 Mar 2021 11:33:22 +0800
Subject: [PATCH] features, add model implement and support parallel

---
 mindspore/lite/micro/cmake/file_list.cmake    |   2 +
 .../generator/component/cmake_component.cc    |   3 +
 .../generator/component/common_component.cc   |  45 +++++---
 .../generator/component/common_component.h    |   4 +-
 .../component/const_blocks/benchmark.cc       |  60 +++++++++-
 .../component/const_blocks/cmake_lists.cc     |  13 ++-
 .../generator/component/const_blocks/model.cc |  88 +++++++++++++++
 .../generator/component/const_blocks/model.h  |  26 +++++
 .../component/const_blocks/msession.cc        |  23 ++--
 .../component/const_blocks/mtensor.cc         |   2 +-
 .../component/const_blocks/thread_pool.cc     | 104 ++++++++++++++++++
 .../component/const_blocks/thread_pool.h      |  26 +++++
 .../lite/micro/coder/generator/generator.cc   |  19 +++-
 .../nnacl/fp32/activation_fp32_coder.cc       |   6 +-
 .../nnacl/fp32/conv2d_delegate_fp32_coder.cc  |   5 +-
 .../opcoders/nnacl/int8/add_int8_coder.cc     |   2 +-
 .../nnacl/int8/conv2d_1x1_int8_coder.cc       |   2 +-
 .../int8/convolution_depthwise_int8_coder.cc  |   2 +-
 .../int8/convolution_depthwise_int8_coder.h   |   2 +-
 .../opcoders/nnacl/int8/resize_int8_coder.cc  |   4 +-
 20 files changed, 382 insertions(+), 56 deletions(-)
 create mode 100644 mindspore/lite/micro/coder/generator/component/const_blocks/model.cc
 create mode 100644 mindspore/lite/micro/coder/generator/component/const_blocks/model.h
 create mode 100644 mindspore/lite/micro/coder/generator/component/const_blocks/thread_pool.cc
 create mode 100644 mindspore/lite/micro/coder/generator/component/const_blocks/thread_pool.h
diff --git a/mindspore/lite/micro/cmake/file_list.cmake b/mindspore/lite/micro/cmake/file_list.cmake
index 789cdddbbea..f3bc5c27c56 100644
--- a/mindspore/lite/micro/cmake/file_list.cmake
+++ b/mindspore/lite/micro/cmake/file_list.cmake
@@ -29,8 +29,10 @@ set(CODER_GENERATOR_SRC
         ${MICRO_DIR}/coder/generator/component/const_blocks/msession.cc
         ${MICRO_DIR}/coder/generator/component/const_blocks/mtensor.cc
         ${MICRO_DIR}/coder/generator/component/const_blocks/mstring.cc
+        ${MICRO_DIR}/coder/generator/component/const_blocks/model.cc
         ${MICRO_DIR}/coder/generator/component/const_blocks/license.cc
         ${MICRO_DIR}/coder/generator/component/const_blocks/load_input.cc
+        ${MICRO_DIR}/coder/generator/component/const_blocks/thread_pool.cc
         ${MICRO_DIR}/coder/generator/component/const_blocks/benchmark.cc
         )
 
diff --git a/mindspore/lite/micro/coder/generator/component/cmake_component.cc b/mindspore/lite/micro/coder/generator/component/cmake_component.cc
index f263a7ff96b..45aeb163b27 100644
--- a/mindspore/lite/micro/coder/generator/component/cmake_component.cc
+++ b/mindspore/lite/micro/coder/generator/component/cmake_component.cc
@@ -39,6 +39,9 @@ void CodeCMakeNetLibrary(std::ofstream &ofs, const std::unique_ptr<CoderContext>
   if (config->debug_mode()) {
     ofs << "    debug_utils.c.o\n";
   }
+  if (config->support_parallel()) {
+    ofs << "    thread_pool.c.o\n";
+  }
   ofs << ")\n";
   std::set<std::string> kernel_cmake_asm_set_files = ctx->asm_files();
   if (!kernel_cmake_asm_set_files.empty() && (config->target() == kARM32A || config->target() == kARM64)) {
diff --git a/mindspore/lite/micro/coder/generator/component/common_component.cc b/mindspore/lite/micro/coder/generator/component/common_component.cc
index 436f13dbbf8..ba1977d39a7 100644
--- a/mindspore/lite/micro/coder/generator/component/common_component.cc
+++ b/mindspore/lite/micro/coder/generator/component/common_component.cc
@@ -25,7 +25,7 @@
 #include "nnacl/op_base.h"
 
 namespace mindspore::lite::micro {
-void CodeSessionCompileGraph(std::ofstream &ofs, const std::unique_ptr<CoderContext> &ctx) {
+void CodeSessionCompileGraph(std::ofstream &ofs, const std::unique_ptr<CoderContext> &ctx, const Configurator *config) {
   auto array_tostring = [&ofs](const std::vector<int> &array, const std::string &name) {
     size_t num = array.size();
     ofs << "  Vector<int32_t> " << name << ";\n";
@@ -59,25 +59,42 @@ void CodeSessionCompileGraph(std::ofstream &ofs, const std::unique_ptr<CoderCont
         << EnumNameDataType(output->data_type()) << ", " << shape_i << ");\n";
     ofs << "  MS_ERROR_IF_NULL(outputs_[" << i << "]);\n";
   }
+  if (config->target() != kARM32M) {
+    ofs << "  int ret = Init(model->buf, dynamic_cast<MModel *>(model)->buf_size());\n"
+           "  return ret;\n"
+           "}\n\n";
+    return;
+  }
   ofs << "  return RET_OK;\n";
   ofs << "}\n\n";
 }
 
-void CodeCreateSessionImplement(std::ofstream &ofs, Target target) {
-  ofs << "session::LiteSession *session::LiteSession::CreateSession(const char *net_buf, size_t size,\n"
-         "                                                          const lite::Context *context) {\n"
-         "  session::LiteSession *session = CreateSession(context);\n"
-         "  if (session == nullptr) {\n"
-         "    return nullptr;\n"
-         "  }\n"
-         "  int ret = session->CompileGraph(nullptr);\n"
-         "  if (ret != lite::RET_OK) {\n"
-         "    return nullptr;\n"
-         "  }\n";
-  if (target != kARM32M) {
-    ofs << "  Init(const_cast<char *>(net_buf), size);\n";
+void CodeCreateSessionImplement(std::ofstream &ofs, const Configurator *config) {
+  ofs << "session::LiteSession *session::LiteSession::CreateSession(const lite::Context *context) {\n"
+         "  auto *session = new (std::nothrow) lite::LiteSession();\n"
+         "  MS_NULLPTR_IF_NULL(session);\n"
+         "  int ret = session->InitRuntimeBuffer();\n"
+         "  MS_NULLPTR_IF_ERROR(ret);\n";
+  if (config->support_parallel()) {
+    ofs << "  MS_NULLPTR_IF_NULL(context);\n"
+           "  struct ThreadPool *thread_pool =\n"
+           "    CreateThreadPool(context->thread_num_, "
+           "context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_);\n"
+           "  MS_NULLPTR_IF_NULL(thread_pool);\n"
+           "  ret = SetThreadPool(thread_pool);\n"
+           "  MS_NULLPTR_IF_ERROR(ret);\n";
   }
   ofs << "  return session;\n"
+         "}\n\n";
+  ofs << "session::LiteSession *session::LiteSession::CreateSession(const char *model_buf, size_t size,\n"
+         "                                                          const lite::Context *context) {\n"
+         "  session::LiteSession *session = CreateSession(context);\n"
+         "  MS_NULLPTR_IF_NULL(session);\n"
+         "  lite::Model *model = lite::Model::Import(model_buf, size);\n"
+         "  int ret = session->CompileGraph(model);\n"
+         "  MS_NULLPTR_IF_ERROR(ret);\n"
+         "  delete model;\n"
+         "  return session;\n"
          "}\n"
          "}  // namespace mindspore\n\n";
 }
diff --git a/mindspore/lite/micro/coder/generator/component/common_component.h b/mindspore/lite/micro/coder/generator/component/common_component.h
index b14fc148abf..3c7386ef5eb 100644
--- a/mindspore/lite/micro/coder/generator/component/common_component.h
+++ b/mindspore/lite/micro/coder/generator/component/common_component.h
@@ -27,8 +27,8 @@
 #include "coder/config.h"
 
 namespace mindspore::lite::micro {
-void CodeSessionCompileGraph(std::ofstream &ofs, const std::unique_ptr<CoderContext> &ctx);
-void CodeCreateSessionImplement(std::ofstream &ofs, Target target);
+void CodeSessionCompileGraph(std::ofstream &ofs, const std::unique_ptr<CoderContext> &ctx, const Configurator *config);
+void CodeCreateSessionImplement(std::ofstream &ofs, const Configurator *config);
 
 void CodeCopyOutputsState(std::ofstream &ofs);
 void CodeCopyOutputsImplement(std::ofstream &ofs, const std::unique_ptr<CoderContext> &ctx);
diff --git a/mindspore/lite/micro/coder/generator/component/const_blocks/benchmark.cc b/mindspore/lite/micro/coder/generator/component/const_blocks/benchmark.cc
index 6f9cf8c3d63..f203308c66b 100644
--- a/mindspore/lite/micro/coder/generator/component/const_blocks/benchmark.cc
+++ b/mindspore/lite/micro/coder/generator/component/const_blocks/benchmark.cc
@@ -58,6 +58,17 @@ void usage() {
     "args[5]: runtime thread bind mode\n\n");
 }
 
+uint64_t GetTimeUs() {
+  const int USEC = 1000000;
+  const int MSEC = 1000;
+  struct timespec ts = {0, 0};
+  if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) {
+    return 0;
+  }
+  uint64_t retval = (uint64_t)((ts.tv_sec * USEC) + (ts.tv_nsec / MSEC));
+  return retval;
+}
+
 template <typename T>
 void PrintData(void *data, size_t data_number) {
   if (data == nullptr) {
@@ -65,7 +76,7 @@ void PrintData(void *data, size_t data_number) {
   }
   auto casted_data = static_cast<T *>(data);
   for (size_t i = 0; i < 10 && i < data_number; i++) {
-    printf("%s,", std::to_string(casted_data[i]).c_str());
+    printf("%s, ", std::to_string(casted_data[i]).c_str());
   }
   printf("\n");
 }
@@ -73,12 +84,12 @@ void PrintData(void *data, size_t data_number) {
 void TensorToString(tensor::MSTensor *tensor) {
   printf("name: %s, ", tensor->tensor_name().c_str());
   printf(", DataType: %d", tensor->data_type());
-  printf(", Size: %lu", tensor->Size());
-  printf(", Shape: ");
+  printf(", Elements: %d", tensor->ElementsNum());
+  printf(", Shape: [");
   for (auto &dim : tensor->shape()) {
     printf("%d ", dim);
   }
-  printf(", Data: \n");
+  printf("], Data: \n");
   switch (tensor->data_type()) {
     case kNumberTypeFloat32: {
       PrintData<float>(tensor->MutableData(), tensor->ElementsNum());
@@ -118,11 +129,27 @@ int main(int argc, const char **argv) {
   if (argc >= 3) {
     model_buffer = static_cast<const char *>(ReadInputData(argv[2], &model_size));
   }
-  session::LiteSession *session = mindspore::session::LiteSession::CreateSession(model_buffer, model_size, nullptr);
+
+  lite::Context *context = nullptr;
+  if (argc >= 5) {
+    // config benchmark context
+    context = new (std::nothrow) lite::Context();
+    if (context == nullptr) {
+      return lite::RET_ERROR;
+    }
+    context->thread_num_ = atoi(argv[4]);
+    context->device_list_.resize(1);
+    context->device_list_[0] = {lite::DT_CPU, {{false, static_cast<lite::CpuBindMode>(atoi(argv[5]))}}};
+    printf("context: ThreadNum: %d, BindMode: %d\n", context->thread_num_,
+           context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_);
+  }
+
+  session::LiteSession *session = mindspore::session::LiteSession::CreateSession(model_buffer, model_size, context);
   if (session == nullptr) {
-    std::cerr << "create lite session failed" << std::endl;
+    printf("create lite session failed\n");
     return lite::RET_ERROR;
   }
+  delete[] model_buffer;
 
   // set model inputs tensor data
   Vector<tensor::MSTensor *> inputs = session->GetInputs();
@@ -141,20 +168,41 @@ int main(int argc, const char **argv) {
     memcpy(input_data, inputs_binbuf[i], inputs_size[i]);
   }
 
+  if (argc >= 4) {
+    int loop_count = atoi(argv[3]);
+    printf("\nloop count: %d\n", loop_count);
+    uint64_t start_time = GetTimeUs();
+    for (int i = 0; i < loop_count; ++i) {
+      ret = session->RunGraph();
+      if (ret != lite::RET_OK) {
+        return lite::RET_ERROR;
+      }
+    }
+    uint64_t end_time = GetTimeUs();
+    float total_time = (float)(end_time - start_time) / 1000.0f;
+    printf("total time: %.5fms, per time: %.5fms\n", total_time, total_time / loop_count);
+  }
   ret = session->RunGraph();
   if (ret != lite::RET_OK) {
     return lite::RET_ERROR;
   }
 
   Vector<String> outputs_name = session->GetOutputTensorNames();
+  printf("\noutputs: \n");
   for (const auto &name : outputs_name) {
     auto output = session->GetOutputByTensorName(name);
     TensorToString(output);
   }
   printf("========run success=======\n");
   delete session;
+  session = nullptr;
+  if (context != nullptr) {
+    delete context;
+    context = nullptr;
+  }
   for (size_t i = 0; i < inputs_num; ++i) {
     free(inputs_binbuf[i]);
+    inputs_binbuf[i] = nullptr;
   }
   return lite::RET_OK;
 }
diff --git a/mindspore/lite/micro/coder/generator/component/const_blocks/cmake_lists.cc b/mindspore/lite/micro/coder/generator/component/const_blocks/cmake_lists.cc
index 4ab8e74618a..c06313021b6 100644
--- a/mindspore/lite/micro/coder/generator/component/const_blocks/cmake_lists.cc
+++ b/mindspore/lite/micro/coder/generator/component/const_blocks/cmake_lists.cc
@@ -59,15 +59,17 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=default")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=default")
 else()
-    set(CMAKE_C_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes \
+    message(STATUS "build benchmark release version")
+    set(CMAKE_C_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O3 -Wall -Werror -fstack-protector-strong -Wno-attributes \
     -Wno-deprecated-declarations -Wno-missing-braces ${CMAKE_C_FLAGS}")
-    set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes \
+    set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O3 -Wall -Werror -fstack-protector-strong -Wno-attributes \
     -Wno-deprecated-declarations -Wno-missing-braces -Wno-overloaded-virtual ${CMAKE_CXX_FLAGS}")
+    string(REPLACE "-g" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+    string(REPLACE "-g" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 endif()
 
 add_subdirectory(src)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../src/)
 include_directories(${HEADER_PATH})
 set(SRC_FILES
         benchmark/benchmark.cc
@@ -96,7 +98,6 @@ message("operator header path: ${OP_HEADER_PATH}")
 
 add_compile_definitions(NOT_USE_STL)
 
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include)
 include_directories(${OP_HEADER_PATH})
 include_directories(${HEADER_PATH})
 
@@ -123,15 +124,19 @@ endif()
 set(CMAKE_C_FLAGS "${CMAKE_ENABLE_C99} ${CMAKE_C_FLAGS}")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
 if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
+    message(STATUS "build net library with debug info")
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DDebug -g")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDebug -g")
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=default")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=default")
 else()
+    message(STATUS "build net library release version")
     set(CMAKE_C_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O3 -Wall -Werror -fstack-protector-strong -Wno-attributes \
     -Wno-deprecated-declarations -Wno-missing-braces ${CMAKE_C_FLAGS}")
     set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O3 -Wall -Werror -fstack-protector-strong -Wno-attributes \
     -Wno-deprecated-declarations -Wno-missing-braces -Wno-overloaded-virtual ${CMAKE_CXX_FLAGS}")
+    string(REPLACE "-g" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+    string(REPLACE "-g" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 endif()
 
 function(create_library)
diff --git a/mindspore/lite/micro/coder/generator/component/const_blocks/model.cc b/mindspore/lite/micro/coder/generator/component/const_blocks/model.cc
new file mode 100644
index 00000000000..fad515963d4
--- /dev/null
+++ b/mindspore/lite/micro/coder/generator/component/const_blocks/model.cc
@@ -0,0 +1,88 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coder/generator/component/const_blocks/model.h"
+
+namespace mindspore::lite::micro {
+
+const char *model_header = R"RAW(
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_LIBRARY_SOURCE_MODEL_H_
+#define MINDSPORE_LITE_LIBRARY_SOURCE_MODEL_H_
+
+#include "include/model.h"
+#include "session.h"
+#include <new>
+#include <string.h>
+
+namespace mindspore::lite {
+class MModel : public Model {
+ public:
+  void Free() override {
+    if (this->buf != nullptr) {
+      free(this->buf);
+      this->buf = nullptr;
+      this->buf_size_ = 0;
+    }
+  }
+
+  void Destroy() override { Free(); }
+
+  ~MModel() override { Destroy(); }
+
+  void set_buf_size(size_t size) { buf_size_ = size; }
+  size_t buf_size() const { return buf_size_; }
+
+ private:
+  size_t buf_size_{0};
+};
+
+Model *Model::Import(const char *model_buf, size_t size) {
+  MS_NULLPTR_IF_NULL(model_buf);
+  MModel *model = new (std::nothrow) MModel();
+  MS_NULLPTR_IF_NULL(model);
+  if (size == 0) {
+    delete model;
+    return nullptr;
+  }
+  model->buf = reinterpret_cast<char *>(malloc(size));
+  if (model->buf == nullptr) {
+    delete model;
+    return nullptr;
+  }
+  memcpy(model->buf, model_buf, size);
+  model->set_buf_size(size);
+  return model;
+}
+}  // namespace mindspore::lite
+#endif  // MINDSPORE_LITE_LIBRARY_SOURCE_MODEL_H_
+)RAW";
+
+}  // namespace mindspore::lite::micro
diff --git a/mindspore/lite/micro/coder/generator/component/const_blocks/model.h b/mindspore/lite/micro/coder/generator/component/const_blocks/model.h
new file mode 100644
index 00000000000..bd53cbd6800
--- /dev/null
+++ b/mindspore/lite/micro/coder/generator/component/const_blocks/model.h
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_MICRO_GENERATOR_CONST_BLOCK_MMODEL_H_
+#define MINDSPORE_LITE_MICRO_GENERATOR_CONST_BLOCK_MMODEL_H_
+
+namespace mindspore::lite::micro {
+
+extern const char *model_header;
+
+}  // namespace mindspore::lite::micro
+
+#endif  // MINDSPORE_LITE_MICRO_GENERATOR_CONST_BLOCK_MMODEL_H_
diff --git a/mindspore/lite/micro/coder/generator/component/const_blocks/msession.cc b/mindspore/lite/micro/coder/generator/component/const_blocks/msession.cc
index 9daa07054c7..9e3d87235e6 100644
--- a/mindspore/lite/micro/coder/generator/component/const_blocks/msession.cc
+++ b/mindspore/lite/micro/coder/generator/component/const_blocks/msession.cc
@@ -53,6 +53,20 @@ namespace lite {
     }                                    \
   } while (0)
 
+#define MS_NULLPTR_IF_NULL(ptr) \
+  do {                          \
+    if ((ptr) == nullptr) {     \
+      return nullptr;           \
+    }                           \
+  } while (0)
+
+#define MS_NULLPTR_IF_ERROR(ptr)            \
+  do {                                      \
+    if ((ptr) != mindspore::lite::RET_OK) { \
+      return nullptr;                       \
+    }                                       \
+  } while (0)
+
 class LiteSession : public session::LiteSession {
  public:
   LiteSession() = default;
@@ -176,15 +190,6 @@ mindspore::tensor::MSTensor *LiteSession::GetOutputByTensorName(const String &te
 }
 
 }  // namespace lite
-
-session::LiteSession *session::LiteSession::CreateSession(const lite::Context *context) {
-  auto *session = new (std::nothrow) lite::LiteSession();
-  if (session == nullptr) {
-    return nullptr;
-  }
-  session->InitRuntimeBuffer();
-  return session;
-}
 )RAW";
 
 }  // namespace mindspore::lite::micro
diff --git a/mindspore/lite/micro/coder/generator/component/const_blocks/mtensor.cc b/mindspore/lite/micro/coder/generator/component/const_blocks/mtensor.cc
index 2e8e1f15b0f..038ed12df27 100644
--- a/mindspore/lite/micro/coder/generator/component/const_blocks/mtensor.cc
+++ b/mindspore/lite/micro/coder/generator/component/const_blocks/mtensor.cc
@@ -67,7 +67,7 @@ class MTensor : public mindspore::tensor::MSTensor {
   int ElementsNum() const override;
   size_t Size() const override;
   String tensor_name() const override { return tensor_name_; }
-  void set_tensor_name(const String name) override { tensor_name_ = name; }
+  void set_tensor_name(const String &name) override { tensor_name_ = name; }
   void *MutableData() override;
   void *data() override { return data_; }
   void set_data(void *data) override { data_ = data; }
diff --git a/mindspore/lite/micro/coder/generator/component/const_blocks/thread_pool.cc b/mindspore/lite/micro/coder/generator/component/const_blocks/thread_pool.cc
new file mode 100644
index 00000000000..e675ddb84f6
--- /dev/null
+++ b/mindspore/lite/micro/coder/generator/component/const_blocks/thread_pool.cc
@@ -0,0 +1,104 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "coder/generator/component/const_blocks/thread_pool.h"
+
+namespace mindspore::lite::micro {
+
+const char *thread_header = R"RAW(
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_THREAD_POOL_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_THREAD_POOL_H_
+
+#include <stdbool.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_TASK_NUM (2)
+
+/// \brief BindMode defined for holding bind cpu strategy argument.
+typedef enum {
+  NO_BIND_MODE = 0, /**< no bind */
+  HIGHER_MODE = 1,  /**< bind higher cpu first */
+  MID_MODE = 2      /**< bind middle cpu first */
+} BindMode;
+
+struct ThreadPool;
+
+struct ThreadPool *CreateThreadPool(int thread_num, int mode);
+
+/**
+ *
+ * @param session_index, support multi session
+ * @param job
+ * @param content
+ * @param task_num
+ */
+int ParallelLaunch(struct ThreadPool *thread_pool, int (*job)(void *, int), void *content, int task_num);
+
+/**
+ * bind each thread to specified cpu core
+ * @param is_bind
+ * @param mode
+ */
+int BindThreads(struct ThreadPool *thread_pool, bool is_bind, int mode);
+
+/**
+ * activate the thread pool
+ * @param thread_pool_id
+ */
+void ActivateThreadPool(struct ThreadPool *thread_pool);
+
+/**
+ * deactivate the thread pool
+ * @param thread_pool_id
+ */
+void DeactivateThreadPool(struct ThreadPool *thread_pool);
+
+/**
+ *
+ * @return current thread num
+ */
+int GetCurrentThreadNum(struct ThreadPool *thread_pool);
+
+/**
+ * destroy thread pool, and release resource
+ */
+void DestroyThreadPool(struct ThreadPool *thread_pool);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_THREAD_POOL_H_
+)RAW";
+
+}  // namespace mindspore::lite::micro
diff --git a/mindspore/lite/micro/coder/generator/component/const_blocks/thread_pool.h b/mindspore/lite/micro/coder/generator/component/const_blocks/thread_pool.h
new file mode 100644
index 00000000000..0c2a58d1c94
--- /dev/null
+++ b/mindspore/lite/micro/coder/generator/component/const_blocks/thread_pool.h
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_MICRO_GENERATOR_CONST_BLOCK_THREAD_POOL_H_
+#define MINDSPORE_LITE_MICRO_GENERATOR_CONST_BLOCK_THREAD_POOL_H_
+
+namespace mindspore::lite::micro {
+
+extern const char *thread_header;
+
+}  // namespace mindspore::lite::micro
+
+#endif  // MINDSPORE_LITE_MICRO_GENERATOR_CONST_BLOCK_THREAD_POOL_H_
diff --git a/mindspore/lite/micro/coder/generator/generator.cc b/mindspore/lite/micro/coder/generator/generator.cc
index 5edd015d5db..f8b4c16cb4b 100644
--- a/mindspore/lite/micro/coder/generator/generator.cc
+++ b/mindspore/lite/micro/coder/generator/generator.cc
@@ -27,6 +27,8 @@
 #include "coder/generator/component/const_blocks/msession.h"
 #include "coder/generator/component/const_blocks/mtensor.h"
 #include "coder/generator/component/const_blocks/mstring.h"
+#include "coder/generator/component/const_blocks/model.h"
+#include "coder/generator/component/const_blocks/thread_pool.h"
 #include "coder/generator/component/const_blocks/benchmark.h"
 #include "coder/generator/component/const_blocks/license.h"
 #include "coder/log.h"
@@ -91,7 +93,11 @@ int Generator::CodeStaticContent() {
     {net_src_file_path_ + "session.h", session_header},
     {net_src_file_path_ + "tensor.h", tensor_header},
     {net_src_file_path_ + "tensor.cc", tensor_source},
-    {net_src_file_path_ + "string.cc", string_source}};
+    {net_src_file_path_ + "string.cc", string_source},
+    {net_src_file_path_ + "model.h", model_header}};
+  if (config_->support_parallel()) {
+    const_blocks.emplace_back(std::make_pair(net_src_file_path_ + "thread_pool.h", thread_header));
+  }
   if (config_->debug_mode()) {
     const_blocks.emplace_back(std::make_pair(net_src_file_path_ + "debug_utils.h", debug_utils_h));
     const_blocks.emplace_back(std::make_pair(net_src_file_path_ + "debug_utils.c", debug_utils_c));
@@ -111,11 +117,12 @@ int Generator::CodeSessionImplement() {
   MS_LOG(INFO) << "write " << cfile;
   ofs << g_hwLicense;
   ofs << "#include \"session.h\"\n";
-  ofs << "#include \"net.h\"\n\n";
+  ofs << "#include \"model.h\"\n";
+  ofs << "#include \"net.h\"\n";
   ofs << "#include <new>\n\n";
-  CodeSessionCompileGraph(ofs, ctx_);
+  CodeSessionCompileGraph(ofs, ctx_, config_);
   ofs << session_source;
-  CodeCreateSessionImplement(ofs, config_->target());
+  CodeCreateSessionImplement(ofs, config_);
   return RET_OK;
 }
 
@@ -134,8 +141,8 @@ int Generator::CodeWeightFile() {
   MS_LOG(INFO) << "write " << cfile;
   cofs << g_hwLicense;
   cofs << "#include \"" << net_weight_hfile_ << "\"\n\n";
-  cofs << "int " << gThreadNum << " = 1;\n";
-  cofs << "unsigned char * " << ctx_->buffer_name() << " = 0;\n";
+  cofs << "int  " << gThreadNum << " = 1; \n";
+  cofs << "unsigned char * " << ctx_->buffer_name() << " = 0; \n";
 
   if (config_->target() != kARM32M) {
     std::string net_file = net_src_file_path_ + "net.bin";
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc
index f560221ebe1..abd33249e2f 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc
@@ -33,11 +33,7 @@ int ActivationFP32Coder::DoCode(CoderContext *const context) {
   int stride = UP_DIV(length, thread_num_);
   int count = MSMIN(stride, length - stride * task_id);
 
-  if (activation_parameter->type_ == schema::ActivationType_SIGMOID) {
-    Collect(context, {"runtime/kernel/fp32/sigmoid_fp32.h"}, {"sigmoid_fp32.c"});
-  } else {
-    Collect(context, {"nnacl/fp32/activation_fp32.h"}, {"activation_fp32.c"});
-  }
+  Collect(context, {"nnacl/fp32/activation_fp32.h"}, {"activation_fp32.c"});
   NNaclFp32Serializer code;
   switch (activation_parameter->type_) {
     case schema::ActivationType_RELU:
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.cc
index 95c91876181..f88b066e130 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/conv2d_delegate_fp32_coder.cc
@@ -76,8 +76,6 @@ std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderSelect(const std::vector<T
     return nullptr;
   }
   auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_));
-  bool use_winograd = false;
-  int out_unit = 0;
   int kernel_h = conv_param->kernel_h_;
   int kernel_w = conv_param->kernel_w_;
   conv_param->input_h_ = in_tensors.at(kInputIndex)->Height();
@@ -87,7 +85,8 @@ std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderSelect(const std::vector<T
   conv_param->output_w_ = out_tensors.at(kOutputIndex)->Width();
   conv_param->output_channel_ = out_tensors.at(kOutputIndex)->Channel();
   conv_param->op_parameter_.thread_num_ = 1;
-  use_winograd = CheckIfUseWinograd(&out_unit, conv_param);
+  int out_unit = 0;
+  bool use_winograd = CheckIfUseWinograd(&out_unit, conv_param);
   free(conv_param);
   std::unique_ptr<OperatorCoder> coder;
   if (kernel_h == 1 && kernel_w == 1) {
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/add_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/add_int8_coder.cc
index 830be1a660c..36f2218ea44 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/add_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/add_int8_coder.cc
@@ -137,7 +137,7 @@ int AddInt8Coder::ReSize() {
 
 int AddInt8Coder::DoCode(CoderContext *const context) {
   Collect(context, {"wrapper/int8/add_int8_wrapper.h"},
-          {"add_int8_wrapper.c", "add_int8.c", "arithmetic_base.c", "arithmetic_int8.c", "thread_pool.c"});
+          {"add_int8_wrapper.c", "add_int8.c", "arithmetic_base.c", "arithmetic_int8.c"});
 
   nnacl::NNaclInt8Serializer code;
 
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_1x1_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_1x1_int8_coder.cc
index 2ec6685f85d..c9750843f8d 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_1x1_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_1x1_int8_coder.cc
@@ -46,7 +46,7 @@ int Conv2D1x1Int8Coder::DoCode(CoderContext *const context) {
           {"nnacl/int8/conv1x1_int8.h", "nnacl/common_func.h", "wrapper/int8/conv1x1_init_int8_wrapper.h",
            "wrapper/int8/conv1x1_run_int8_wrapper.h"},
           {"common_func.c", "pack_int8.c", "conv1x1_int8.c", "matmul_int8.c", "fixed_point.c",
-           "conv1x1_init_int8_wrapper.c", "conv1x1_run_int8_wrapper.c", "thread_pool.c", "conv1x1_base.c"});
+           "conv1x1_init_int8_wrapper.c", "conv1x1_run_int8_wrapper.c", "conv1x1_base.c"});
 
   nnacl::NNaclInt8Serializer code;
 
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.cc
index 1c2360abece..da6c0778fd1 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.h b/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.h
index 6b42a73a85b..08464a462a5 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/resize_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/resize_int8_coder.cc
index 066bcc8c1ca..bf38430718f 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/resize_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/resize_int8_coder.cc
@@ -79,7 +79,7 @@ int ResizeInt8Coder::DoCode(CoderContext *const context) {
       bool align_corners = coordinate_transform_mode_ == schema::CoordinateTransformMode_ALIGN_CORNERS;
       if (same_zp && same_scale) {
         code.CodeBaseStruct("ResizeInt8Args", kRunArgs, input_tensor_, output_tensor_, "&input_shape", "&output_shape",
-                            align_corners, thread_num_);
+                            align_corners, gThreadNum);
         if (support_parallel_) {
           code.CodeFunction(kParallelLaunch, gThreadPool, "ResizeInt8Run", kRunArgsAddr, gThreadNum);
         } else {
@@ -91,7 +91,7 @@ int ResizeInt8Coder::DoCode(CoderContext *const context) {
         code.CodeStruct("quant_out", *quant_out_);
         code.CodeStruct("multiplier", *multiplier_);
         code.CodeFunction("ResizeNearestNeighborInt8", input_tensor_, output_tensor_, "&input_shape", "&output_shape",
-                          align_corners, "multiplier", "quant_in", "quant_out", 0, thread_num_);
+                          align_corners, "multiplier", "quant_in", "quant_out", kDefaultTaskId, gThreadNum);
       }
       break;
     }