support marco for difference feature

2022-03-18 11:15:02 +08:00 · 2022-03-18 11:15:02 +08:00 · ecc840fe06
parent 81428386d6
commit ecc840fe06
17 changed files with 111 additions and 44 deletions
--- a/mindspore/lite/CMakeLists.txt
+++ b/mindspore/lite/CMakeLists.txt
@ -41,6 +41,10 @@ option(MSLITE_ENABLE_RUNTIME_GLOG "enable runtime glog" off)
 option(MSLITE_ENABLE_COVERAGE "enable code coverage" off)
 option(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL "enable sharing memory with OpenGL" off)
 option(MSLITE_ENABLE_SERVER_INFERENCE "enable inference on server" off)
+option(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE "enable distribute thread dynamically" off)
+option(MSLITE_ENABLE_BFC_MEMORY "enable distribute BFC memory" off)
+option(MSLITE_ENABLE_PARALLEL_INFERENCE "enable parallel inference interface" off)
+option(MSLITE_ENABLE_SHARING_MODEL_WEIGHT "enable sharing model weight" off)

 #Option that can be configured through manually
 option(ENABLE_VERBOSE "" off)
@ -148,11 +152,46 @@ endif()
 if(DEFINED ENV{MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL})
    set(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL $ENV{MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL})
 endif()
-if(DEFINED ENV{MSLITE_ENABLE_SERVING})
-    set(MSLITE_ENABLE_SERVING $ENV{MSLITE_ENABLE_SERVING})
-endif()
+
+option(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE "enable distribute thread dynamically" off)
+option(MSLITE_ENABLE_BFC_MEMORY "enable distribute BFC memory" off)
+option(MSLITE_ENABLE_PARALLEL_INFERENCE "enable parallel inference interface" off)
+option(MSLITE_ENABLE_SHARING_MODEL_WEIGHT "enable sharing model weight" off)
+
 if(DEFINED ENV{MSLITE_ENABLE_SERVER_INFERENCE})
    set(MSLITE_ENABLE_SERVER_INFERENCE $ENV{MSLITE_ENABLE_SERVER_INFERENCE})
+    set(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE on)
+    set(MSLITE_ENABLE_BFC_MEMORY on)
+    set(MSLITE_ENABLE_PARALLEL_INFERENCE on)
+    set(MSLITE_ENABLE_SHARING_MODEL_WEIGHT on)
+endif()
+
+if(DEFINED ENV{MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE})
+    set(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE $ENV{MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE})
+endif()
+if(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE)
+    add_compile_definitions(DYNAMIC_THREAD_DISTRIBUTE)
+endif()
+
+if(DEFINED ENV{MSLITE_ENABLE_BFC_MEMORY})
+    set(MSLITE_ENABLE_BFC_MEMORY $ENV{MSLITE_ENABLE_BFC_MEMORY})
+endif()
+if(MSLITE_ENABLE_BFC_MEMORY)
+    add_compile_definitions(BFC_MEMORY)
+endif()
+
+if(DEFINED ENV{MSLITE_ENABLE_PARALLEL_INFERENCE})
+    set(MSLITE_ENABLE_PARALLEL_INFERENCE $ENV{MSLITE_ENABLE_PARALLEL_INFERENCE})
+endif()
+if(MSLITE_ENABLE_PARALLEL_INFERENCE)
+    add_compile_definitions(PARALLEL_INFERENCE)
+endif()
+
+if(DEFINED ENV{MSLITE_ENABLE_SHARING_MODEL_WEIGHT})
+    set(MSLITE_ENABLE_SHARING_MODEL_WEIGHT $ENV{MSLITE_ENABLE_SHARING_MODEL_WEIGHT})
+endif()
+if(MSLITE_ENABLE_SHARING_MODEL_WEIGHT)
+    add_compile_definitions(SHARING_MODEL_WEIGHT)
 endif()

 if(MACHINE_LINUX_ARM64)
@ -321,6 +360,10 @@ message(STATUS "\tMSLITE_ENABLE_RUNTIME_GLOG        = \t${MSLITE_ENABLE_RUNTIME_
 message(STATUS "\tMSLITE_ENABLE_COVERAGE            = \t${MSLITE_ENABLE_COVERAGE}")
 message(STATUS "\tMSLITE_ENABLE_SHARING_MEM_WITH_OPENGL      = \t${MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL}")
 message(STATUS "\tMSLITE_ENABLE_SERVER_INFERENCE    = \t${MSLITE_ENABLE_SERVER_INFERENCE}")
+message(STATUS "\tMSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE = \t${MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE}")
+message(STATUS "\tMSLITE_ENABLE_BFC_MEMORY          = \t${MSLITE_ENABLE_BFC_MEMORY}")
+message(STATUS "\tMSLITE_ENABLE_PARALLEL_INFERENCE  = \t${MSLITE_ENABLE_PARALLEL_INFERENCE}")
+message(STATUS "\tMSLITE_ENABLE_SHARING_MODEL_WEIGHT = \t${MSLITE_ENABLE_SHARING_MODEL_WEIGHT}")

 if((MSLITE_ENABLE_CONVERTER OR MSLITE_ENABLE_TESTCASES) AND (
        NOT MSLITE_ENABLE_MINDRT
--- a/mindspore/lite/java/native/CMakeLists.txt
+++ b/mindspore/lite/java/native/CMakeLists.txt
@ -85,7 +85,7 @@ set(JNI_SRC
        ${NEW_NATIVE_DIR}/version.cpp
  )

-if(MSLITE_ENABLE_SERVER_INFERENCE)
+if(MSLITE_ENABLE_PARALLEL_INFERENCE)
    set(JNI_SRC
            ${JNI_SRC}
            ${NEW_NATIVE_DIR}/runner_config.cpp
--- a/mindspore/lite/src/CMakeLists.txt
+++ b/mindspore/lite/src/CMakeLists.txt
@ -65,7 +65,7 @@ file(GLOB CXX_API_SRCS
        ${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/graph/*.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/tensor/*.cc
        )
-if(MSLITE_ENABLE_SERVER_INFERENCE)
+if(MSLITE_ENABLE_PARALLEL_INFERENCE)
    set(CXX_API_SRCS
            ${CXX_API_SRCS}
            ${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/model_pool/predict_task_queue.cc
@ -140,17 +140,30 @@ if(MSLITE_ENABLE_MODEL_ENCRYPTION)
            )
 endif()

-if(MSLITE_ENABLE_SERVER_INFERENCE)
-set(LITE_SRC
+if(MSLITE_ENABLE_BFC_MEMORY)
+    set(LITE_SRC
        ${LITE_SRC}
        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/dynamic_mem_allocator.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/dynamic_mem_manager.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/numa_adapter.cc
+        )
+endif()
+
+if(MSLITE_ENABLE_SHARING_MODEL_WEIGHT)
+    set(LITE_SRC
+        ${LITE_SRC}
        ${CMAKE_CURRENT_SOURCE_DIR}/pack_weight_manager.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/thread_cost_model.cc
        )
 endif()

+if(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE)
+    set(LITE_SRC
+        ${LITE_SRC}
+        ${CMAKE_CURRENT_SOURCE_DIR}/thread_cost_model.cc
+        )
+endif()
+
 if(MSLITE_ENABLE_CONTROLFLOW)
    file(GLOB CONTROL_FLOW_SRC
            ${CMAKE_CURRENT_SOURCE_DIR}/control_flow/*.cc
--- a/mindspore/lite/src/common/common.h
+++ b/mindspore/lite/src/common/common.h
@ -66,7 +66,7 @@ static const char *const kMSCacheVocabSize = "vocab_size";
 static const char *const kMSCacheDeviceSize = "device_cache_size";
 static const char *const kMSCacheSerializePath = "serialize_path";
 // config
-#ifdef SERVER_INFERENCE
+#ifdef BFC_MEMORY
 static const char *const kConfigServerInference = "server_inference";
 static const char *const kConfigNUMANodeId = "numa_node_id";
 #endif
--- a/mindspore/lite/src/common/utils.cc
+++ b/mindspore/lite/src/common/utils.cc
@ -27,7 +27,7 @@
 #include <sys/types.h>
 #include <sys/param.h>
 #endif
-#ifdef SERVER_INFERENCE
+#ifdef BFC_MEMORY
 #include <sys/sysinfo.h>
 #endif

@ -182,7 +182,7 @@ size_t GetMaxMallocSize() {
  return max_malloc_size;
 }

-#ifdef SERVER_INFERENCE
+#ifdef BFC_MEMORY
 int64_t GetFreeMemory() {
  struct sysinfo info;
  auto ret = sysinfo(&info);
--- a/mindspore/lite/src/common/utils.h
+++ b/mindspore/lite/src/common/utils.h
@ -43,7 +43,7 @@ uint64_t GetTimeUs();
 bool IsSupportSDot();

 size_t GetMaxMallocSize();
-#ifdef SERVER_INFERENCE
+#ifdef BFC_MEMORY
 int64_t GetFreeMemory();
 #endif

--- a/mindspore/lite/src/cxx_api/model/model_impl.cc
+++ b/mindspore/lite/src/cxx_api/model/model_impl.cc
@ -689,7 +689,7 @@ lite::LiteSession *ModelImpl::CreateLiteSession(lite::InnerContext *context) {
    delete context;
    return nullptr;
  }
-#ifdef SERVER_INFERENCE
+#ifdef BFC_MEMORY
  auto iter = config_info_.find(lite::kConfigServerInference);
  if (iter != config_info_.end()) {
    auto numa_iter = iter->second.find(lite::kConfigNUMANodeId);
--- a/mindspore/lite/src/cxx_api/model_pool/model_worker.cc
+++ b/mindspore/lite/src/cxx_api/model_pool/model_worker.cc
@ -63,9 +63,11 @@ Status ModelWorker::Init(const char *model_buf, size_t size, const std::shared_p
                         int node_id) {
  model_ = std::make_shared<Model>();
  mindspore::ModelType model_type = kMindIR_Lite;
+#ifdef BFC_MEMORY
  if (node_id != -1) {
    model_->UpdateConfig(lite::kConfigServerInference, {lite::kConfigNUMANodeId, std::to_string(node_id)});
  }
+#endif
  auto status = model_->Build(model_buf, size, model_type, model_context);
  if (status != kSuccess) {
    MS_LOG(ERROR) << "model build failed in ModelPool Init";
--- a/mindspore/lite/src/inner_context.cc
+++ b/mindspore/lite/src/inner_context.cc
@ -139,7 +139,7 @@ int InnerContext::Init() {
  }

  if (this->allocator == nullptr) {
-#ifdef SERVER_INFERENCE
+#ifdef BFC_MEMORY
    this->allocator = std::make_shared<DynamicMemAllocator>(node_id_);
 #else
    this->allocator = mindspore::Allocator::Create();
--- a/mindspore/lite/src/inner_context.h
+++ b/mindspore/lite/src/inner_context.h
@ -20,7 +20,7 @@
 #include <string>
 #include <unordered_map>
 #include "include/context.h"
-#ifdef SERVER_INFERENCE
+#ifdef BFC_MEMORY
 #include "src/runtime/dynamic_mem_allocator.h"
 #else
 #include "src/runtime/inner_allocator.h"
@ -86,7 +86,7 @@ struct InnerContext : public Context {

  void ReplaceLinkInfoSenderWithNewOne(void *new_sender, void *old_sender);

-#ifdef SERVER_INFERENCE
+#ifdef BFC_MEMORY
  /// \brief Set NUMA node id.
  ///
  /// \param[in] node Define the NUMA node id.
@ -110,7 +110,7 @@ struct InnerContext : public Context {

  bool device_and_pkg_support_fp16_ = false;

-#ifdef SERVER_INFERENCE
+#ifdef BFC_MEMORY
  int node_id_ = -1;
 #endif

--- a/mindspore/lite/src/lite_model.cc
+++ b/mindspore/lite/src/lite_model.cc
@ -28,7 +28,7 @@
 #include "src/common/graph_util.h"
 #include "src/common/file_utils.h"
 #include "src/tensor.h"
-#ifdef SERVER_INFERENCE
+#ifdef SHARING_MODEL_WEIGHT
 #include "src/pack_weight_manager.h"
 #endif
 #ifdef ENABLE_V0
@ -108,7 +108,7 @@ int LiteModel::ConvertAttrToTensors() {
 #endif

 void LiteModel::Free() {
-#ifdef SERVER_INFERENCE
+#ifdef SHARING_MODEL_WEIGHT
  lite::PackWeightManager::GetInstance()->DeleteSavedModelPtr(this);
 #endif
  if (this->buf != nullptr) {
@ -603,7 +603,7 @@ Model *ImportFromBuffer(const char *model_buf, size_t size, bool take_buf) {
    MS_LOG(ERROR) << "new model fail!";
    return nullptr;
  }
-#ifdef SERVER_INFERENCE
+#ifdef SHARING_MODEL_WEIGHT
  lite::PackWeightManager::GetInstance()->StoreLiteModel(model_buf, model);
 #endif
  auto status = model->ConstructModel(model_buf, size, take_buf);
--- a/mindspore/lite/src/lite_session.cc
+++ b/mindspore/lite/src/lite_session.cc
@ -16,7 +16,7 @@

 #include "src/lite_session.h"
 #include <set>
-#ifdef SERVER_INFERENCE
+#ifdef SHARING_MODEL_WEIGHT
 #include "src/pack_weight_manager.h"
 #endif
 #ifndef RUNTIME_PASS_CLIP
@ -40,9 +40,6 @@
 #include "src/lite_model.h"
 #include "src/weight_decoder.h"
 #include "src/runtime/runtime_allocator.h"
-#ifdef SERVER_INFERENCE
-#include "src/runtime/dynamic_mem_allocator.h"
-#endif
 #include "src/lite_kernel_util.h"
 #ifndef CUSTOM_KERNEL_REGISTRY_CLIP
 #include "src/registry/register_kernel_impl.h"
@ -666,7 +663,7 @@ void LiteSession::FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kern
    }
  }
 }
-#ifdef SERVER_INFERENCE
+#ifdef SHARING_MODEL_WEIGHT
 int LiteSession::IniPackWeightData(Model *model) {
  auto lite_model = reinterpret_cast<LiteModel *>(model);
  auto kernel_num = model->all_nodes_.size();
@ -709,7 +706,7 @@ int LiteSession::CompileGraph(Model *model) {
    is_running_.store(false);
    return ret;
  }
-#ifdef SERVER_INFERENCE
+#ifdef SHARING_MODEL_WEIGHT
  ret = IniPackWeightData(model);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "IniPackWeightData failed.";
@ -1818,7 +1815,7 @@ const char *lite::LiteSession::LoadModelByPath(const std::string &file, mindspor
    delete[] model_buf;
    model_buf = nullptr;
  }
-#ifdef SERVER_INFERENCE
+#ifdef SHARING_MODEL_WEIGHT
  lite::PackWeightManager::GetInstance()->InitWeightManagerByPath(file, model_buf);
 #endif
  return lite_buf;
@ -1842,7 +1839,7 @@ const char *lite::LiteSession::LoadModelByPath(const std::string &file, mindspor
    delete[] model_buf;
    model_buf = nullptr;
  }
-#ifdef SERVER_INFERENCE
+#ifdef SHARING_MODEL_WEIGHT
  lite::PackWeightManager::GetInstance()->InitWeightManagerByPath(file, model_buf);
 #endif
  return lite_buf;
--- a/mindspore/lite/src/lite_session.h
+++ b/mindspore/lite/src/lite_session.h
@ -119,7 +119,7 @@ class LiteSession : public session::LiteSession {
    const std::vector<kernel::LiteKernel *> &kernels,
    const std::unordered_map<Tensor *, Tensor *> &isolate_input_map = std::unordered_map<Tensor *, Tensor *>());
  static void FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kernels);
-#ifdef SERVER_INFERENCE
+#ifdef SHARING_MODEL_WEIGHT
  int IniPackWeightData(Model *model);
 #endif

--- a/mindspore/lite/src/pack_weight_manager.cc
+++ b/mindspore/lite/src/pack_weight_manager.cc
@ -13,7 +13,7 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#ifdef SERVER_INFERENCE
+#ifdef SHARING_MODEL_WEIGHT
 #include "src/pack_weight_manager.h"
 namespace mindspore::lite {
 namespace {
--- a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc
+++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc
@ -41,7 +41,7 @@
 #include "include/mpi_sys.h"
 #include "include/mpi_vb.h"
 #endif
-#ifdef SERVER_INFERENCE
+#ifdef PARALLEL_INFERENCE
 #include <thread>
 #endif
 namespace mindspore {
@ -51,7 +51,7 @@ constexpr int kFrequencyDefault = 3;
 constexpr int kPercentageDivisor = 100;
 constexpr int kDumpInputsAndOutputs = 0;
 constexpr int kDumpOutputs = 2;
-#ifdef SERVER_INFERENCE
+#ifdef PARALLEL_INFERENCE
 constexpr int kMaxRequestNum = 200;
 #endif
 namespace lite {
@ -221,7 +221,7 @@ int BenchmarkUnifiedApi::LoadInput() {
 }

 int BenchmarkUnifiedApi::GenerateInputData() {
-#ifdef SERVER_INFERENCE
+#ifdef PARALLEL_INFERENCE
  if (flags_->enable_parallel_predict_) {
    std::vector<MSTensor> inputs;
    for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
@ -298,7 +298,7 @@ void BenchmarkUnifiedApi::UpdateConfigInfo() {
 }

 int BenchmarkUnifiedApi::ReadInputFile() {
-#ifdef SERVER_INFERENCE
+#ifdef PARALLEL_INFERENCE
  if (flags_->enable_parallel_predict_) {
    std::vector<MSTensor> inputs;
    for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
@ -486,7 +486,7 @@ int BenchmarkUnifiedApi::InitMSContext(const std::shared_ptr<mindspore::Context>

  return RET_OK;
 }
-#ifdef SERVER_INFERENCE
+#ifdef PARALLEL_INFERENCE
 int BenchmarkUnifiedApi::CompareOutputForModelPool(std::vector<mindspore::MSTensor> *outputs) {
  if (outputs->empty()) {
    MS_LOG(ERROR) << "outputs is empty.";
@ -897,7 +897,7 @@ int BenchmarkUnifiedApi::MarkAccuracy() {
 int BenchmarkUnifiedApi::PrintInputData() {
  for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
    mindspore::MSTensor input;
-#ifdef SERVER_INFERENCE
+#ifdef PARALLEL_INFERENCE
    if (flags_->enable_parallel_predict_) {
      input = all_inputs_[0][i];
    } else {
@ -947,7 +947,7 @@ int BenchmarkUnifiedApi::PrintInputData() {
  }
  return RET_OK;
 }
-#ifdef SERVER_INFERENCE
+#ifdef PARALLEL_INFERENCE
 int BenchmarkUnifiedApi::RunModelPool(std::shared_ptr<mindspore::Context> context) {
  if (flags_->warm_up_loop_count_ > kMaxRequestNum) {
    MS_LOG(WARNING) << "in parallel predict warm up loop count should less than" << kMaxRequestNum;
@ -1113,7 +1113,7 @@ int BenchmarkUnifiedApi::RunBenchmark() {
  }

  UpdateConfigInfo();
-#ifdef SERVER_INFERENCE
+#ifdef PARALLEL_INFERENCE
  if (flags_->enable_parallel_predict_) {
    status = RunModelPool(context);
    if (status != RET_OK) {
--- a/mindspore/lite/tools/benchmark/benchmark_unified_api.h
+++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.h
@ -42,7 +42,7 @@
 #ifdef ENABLE_OPENGL_TEXTURE
 #include "tools/common/opengl_util.h"
 #endif
-#ifdef SERVER_INFERENCE
+#ifdef PARALLEL_INFERENCE
 #include "include/api/model_parallel_runner.h"
 #endif

@ -86,7 +86,7 @@ class MS_API BenchmarkUnifiedApi : public BenchmarkBase {
  int GetDataTypeByTensorName(const std::string &tensor_name) override;

  int CompareOutput() override;
-#ifdef SERVER_INFERENCE
+#ifdef PARALLEL_INFERENCE
  int CompareOutputForModelPool(std::vector<mindspore::MSTensor> *outputs);
 #endif
  int CompareOutputByCosineDistance(float cosine_distance_threshold);
@ -100,7 +100,7 @@ class MS_API BenchmarkUnifiedApi : public BenchmarkBase {
  int InitPrintTensorDataCallbackParameter() override;

  int PrintInputData();
-#ifdef SERVER_INFERENCE
+#ifdef PARALLEL_INFERENCE
  int RunModelPool(std::shared_ptr<mindspore::Context> context);
 #endif

--- a/mindspore/lite/tools/converter/CMakeLists.txt
+++ b/mindspore/lite/tools/converter/CMakeLists.txt
@ -125,13 +125,25 @@ set(LITE_SRC ${API_SRC}
        ${SRC_DIR}/huffman_decode.cc
        ${SRC_DIR}/delegate/tensorrt/distribution/distribution_base.cc
        )
-if(MSLITE_ENABLE_SERVER_INFERENCE)
-set(LITE_SRC
+if(MSLITE_ENABLE_BFC_MEMORY)
+    set(LITE_SRC
        ${LITE_SRC}
-        ${SRC_DIR}/pack_weight_manager.cc
        ${SRC_DIR}/runtime/dynamic_mem_allocator.cc
        ${SRC_DIR}/runtime/dynamic_mem_manager.cc
        ${SRC_DIR}/runtime/numa_adapter.cc
+        )
+endif()
+
+if(MSLITE_ENABLE_SHARING_MODEL_WEIGHT)
+    set(LITE_SRC
+        ${LITE_SRC}
+        ${SRC_DIR}/pack_weight_manager.cc
+        )
+endif()
+
+if(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE)
+    set(LITE_SRC
+        ${LITE_SRC}
        ${SRC_DIR}/thread_cost_model.cc
        )
 endif()