support big model: read tensor data for build by buf

2022-05-28 08:41:32 +08:00 · 2022-05-28 08:41:32 +08:00 · 368aa3a3d0
parent df9b900c04
commit 368aa3a3d0
8 changed files with 51 additions and 8 deletions
--- a/mindspore/lite/src/common/common.h
+++ b/mindspore/lite/src/common/common.h
@ -65,6 +65,9 @@ static const char *const kMSCacheModelPath = "cache_model_path";
 static const char *const kMSCacheVocabSize = "vocab_size";
 static const char *const kMSCacheDeviceSize = "device_cache_size";
 static const char *const kMSCacheSerializePath = "serialize_path";
+// weight path
+static const char *const kWeight = "weight";
+static const char *const kWeightPath = "weight_path";
 }  // namespace lite
 }  // namespace mindspore

--- a/mindspore/lite/src/runtime/cxx_api/model_pool/model_worker.cc
+++ b/mindspore/lite/src/runtime/cxx_api/model_pool/model_worker.cc
@ -83,6 +83,11 @@ Status ModelWorker::ResizeInit() {
  std::vector<std::vector<int64_t>> new_input_shape;
  for (size_t input_idx = 0; input_idx < inputs.size(); input_idx++) {
    new_input_shape.push_back(inputs[input_idx].Shape());
+    for (size_t i = 1; i < new_input_shape.size(); i++) {
+      if (new_input_shape[input_idx][i] == -1) {
+        return kSuccess;
+      }
+    }
    if (new_input_shape[input_idx][0] == -1) {
      // only support resize for batch dim
      new_input_shape[input_idx][0] = kNumInitBatch;
--- a/mindspore/lite/src/runtime/lite_model.cc
+++ b/mindspore/lite/src/runtime/lite_model.cc
@ -504,7 +504,8 @@ bool LiteModel::CheckQuantAllInit(

 Model *ImportFromPath(const char *model_path) { return LiteImportFromPath(model_path); }

-Model *ImportFromBuffer(const char *model_buf, size_t size, bool take_buf, mindspore::ModelType model_type) {
+Model *ImportFromBuffer(const char *model_buf, size_t size, bool take_buf, mindspore::ModelType model_type,
+                        const std::string &path) {
  auto model_loader = mindspore::infer::ModelLoaderRegistry::GetInstance()->GetModelLoader(model_type);
  if (model_loader != nullptr) {
    MS_LOG(INFO) << "import model from model loader";
@ -516,7 +517,7 @@ Model *ImportFromBuffer(const char *model_buf, size_t size, bool take_buf, minds
  }

  MS_LOG(INFO) << "import model from lite model";
-  auto *model = new (std::nothrow) LiteModel();
+  auto *model = new (std::nothrow) LiteModel(path);
  if (model == nullptr) {
    MS_LOG(ERROR) << "new model fail!";
    return nullptr;
--- a/mindspore/lite/src/runtime/lite_model.h
+++ b/mindspore/lite/src/runtime/lite_model.h
@ -322,7 +322,8 @@ class LiteModel : public Model {
 };

 Model *ImportFromBuffer(const char *model_buf, size_t size, bool take_buf,
-                        mindspore::ModelType model_type = mindspore::ModelType::kMindIR_Lite);
+                        mindspore::ModelType model_type = mindspore::ModelType::kMindIR_Lite,
+                        const std::string &path = "");
 LiteModel *LiteImportFromPath(const char *model_path);
 Model *ImportFromPath(const char *model_path);
 }  // namespace lite
--- a/mindspore/lite/src/runtime/lite_session.cc
+++ b/mindspore/lite/src/runtime/lite_session.cc
@ -1706,6 +1706,20 @@ int lite::LiteSession::LoadModelAndCompileByBuf(const char *model_buf, mindspore
  return RET_OK;
 }

+std::string lite::LiteSession::ParseWeightPath() {
+  std::string weight_path = "";
+  if (config_info_ != nullptr) {
+    auto ms_weight = config_info_->find(kWeight);
+    if (ms_weight != config_info_->end()) {
+      auto ms_weight_iter = ms_weight->second;
+      if (ms_weight_iter.find(kWeightPath) != ms_weight_iter.end()) {
+        weight_path = ms_weight_iter[kWeightPath];
+      }
+    }
+  }
+  return weight_path;
+}
+
 int lite::LiteSession::LoadModelAndCompileByBuf(const char *model_buf, mindspore::ModelType model_type,
                                                const size_t &buf_size,
                                                const std::shared_ptr<mindspore::Context> &ms_context) {
@ -1716,7 +1730,8 @@ int lite::LiteSession::LoadModelAndCompileByBuf(const char *model_buf, mindspore
    MS_LOG(ERROR) << "Invalid model_buf";
    return RET_ERROR;
  }
-  auto *model = lite::ImportFromBuffer(lite_buf, lite_buf_size, true, model_type);
+  auto weight_path = ParseWeightPath();
+  auto *model = lite::ImportFromBuffer(lite_buf, lite_buf_size, true, model_type, weight_path);
  if (model == nullptr) {
    MS_LOG(ERROR) << "Import model failed";
    return RET_ERROR;
@ -1743,7 +1758,7 @@ int lite::LiteSession::LoadModelAndCompileByPath(const std::string &model_path,
    MS_LOG(ERROR) << "Read model file failed";
    return RET_ERROR;
  }
-  auto *model = lite::ImportFromBuffer(model_buf, model_size, true, model_type);
+  auto *model = lite::ImportFromBuffer(model_buf, model_size, true, model_type, model_path);
  if (model == nullptr) {
    MS_LOG(ERROR) << "Import model failed";
    return RET_ERROR;
@ -1768,7 +1783,7 @@ int lite::LiteSession::LoadModelAndCompileByPath(const std::string &model_path,
    MS_LOG(ERROR) << "Read model file failed";
    return RET_ERROR;
  }
-  auto *model = lite::ImportFromBuffer(model_buf, model_size, true, model_type);
+  auto *model = lite::ImportFromBuffer(model_buf, model_size, true, model_type, model_path);
  if (model == nullptr) {
    MS_LOG(ERROR) << "Import model failed";
    delete[] model_buf;
--- a/mindspore/lite/src/runtime/lite_session.h
+++ b/mindspore/lite/src/runtime/lite_session.h
@ -143,6 +143,7 @@ class LiteSession {
    const std::vector<kernel::KernelExec *> &kernels,
    const std::unordered_map<Tensor *, Tensor *> &isolate_input_map = std::unordered_map<Tensor *, Tensor *>());
  static void FreePackOpWeight(const std::vector<kernel::KernelExec *> &kernels);
+  std::string ParseWeightPath();

 private:
  int PreCheck(Model *model);
--- a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc
+++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc
@ -953,6 +953,20 @@ void BenchmarkUnifiedApi::ModelParallelRunnerRun(int task_num, int parallel_idx)
  }
 }

+int BenchmarkUnifiedApi::AddConfigInfo(const std::shared_ptr<RunnerConfig> &runner_config) {
+  auto env = std::getenv("BENCHMARK_WEIGHT_PATH");
+  if (env == nullptr) {
+    return RET_OK;
+  }
+  auto weight_path = std::string(env);
+  if (weight_path != "") {
+    std::map<std::string, std::string> config;
+    config[kWeightPath] = weight_path;
+    runner_config->SetConfigInfo(kWeight, config);
+  }
+  return RET_OK;
+}
+
 int BenchmarkUnifiedApi::ParallelInference(std::shared_ptr<mindspore::Context> context) {
  if (flags_->warm_up_loop_count_ > kMaxRequestNum || flags_->parallel_num_ > kMaxRequestNum) {
    MS_LOG(WARNING) << "in parallel predict warm up loop count should less than" << kMaxRequestNum;
@ -965,6 +979,8 @@ int BenchmarkUnifiedApi::ParallelInference(std::shared_ptr<mindspore::Context> c
  auto runner_config = std::make_shared<RunnerConfig>();
  runner_config->SetContext(context);
  runner_config->SetWorkersNum(flags_->workers_num_);
+  auto status = AddConfigInfo(runner_config);
+  MS_CHECK_FALSE_MSG(status != kSuccess, RET_ERROR, "add config info for parallel predict failed.");
  auto model_init_start = GetTimeUs();
  auto ret = model_runner_.Init(flags_->model_file_, runner_config);
  MS_CHECK_FALSE_MSG(ret != kSuccess, RET_ERROR, "model pool init failed.");
@ -974,13 +990,13 @@ int BenchmarkUnifiedApi::ParallelInference(std::shared_ptr<mindspore::Context> c
  ms_inputs_for_api_ = model_runner_.GetInputs();
  MS_CHECK_FALSE_MSG(ms_inputs_for_api_.empty(), RET_ERROR, "model pool input is empty.");
  for (int i = 0; i < flags_->parallel_num_ + flags_->warm_up_loop_count_; i++) {
-    auto status = LoadInput();
+    status = LoadInput();
    MS_CHECK_FALSE_MSG(status != RET_OK, status, "Generate input data error");
    std::vector<MSTensor> output;
    all_outputs_.push_back(output);
  }
  if (!flags_->benchmark_data_file_.empty()) {
-    auto status = PrintInputData();
+    status = PrintInputData();
    MS_CHECK_FALSE_MSG(status != RET_OK, status, "PrintInputData error ");
    status = ReadCalibData();
    MS_CHECK_FALSE_MSG(status != RET_OK, status, "ReadCalibData error ");
--- a/mindspore/lite/tools/benchmark/benchmark_unified_api.h
+++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.h
@ -99,6 +99,7 @@ class MS_API BenchmarkUnifiedApi : public BenchmarkBase {
  void ModelParallelRunnerWarmUp(int index);
  void ModelParallelRunnerRun(int task_num, int parallel_idx);
  int ParallelInference(std::shared_ptr<mindspore::Context> context);
+  int AddConfigInfo(const std::shared_ptr<RunnerConfig> &runner_config);
 #endif

  template <typename T>