From 278d9b3eed4058ffeac7129baa4a0da7ae357488 Mon Sep 17 00:00:00 2001 From: yangruoqi713 Date: Thu, 15 Jul 2021 10:20:48 +0800 Subject: [PATCH] [MSLITE][DEVELOP] sync r1.3 fix bug: delegate model input & output, affinity mode value, npu memory leak --- mindspore/lite/src/cxx_api/context.cc | 8 ++++++-- mindspore/lite/src/cxx_api/converters.cc | 5 +++++ mindspore/lite/src/cxx_api/converters.h | 4 ++++ mindspore/lite/src/delegate/npu/npu_graph.cc | 8 ++++++++ mindspore/lite/src/inner_context.cc | 1 + mindspore/lite/src/lite_session.cc | 1 + mindspore/lite/src/mindrt_executor.cc | 15 ++++++++++++--- mindspore/lite/src/scheduler.cc | 6 ++++-- mindspore/lite/src/scheduler.h | 2 ++ 9 files changed, 43 insertions(+), 7 deletions(-) diff --git a/mindspore/lite/src/cxx_api/context.cc b/mindspore/lite/src/cxx_api/context.cc index b6905dcf248..f3fc38f1f2c 100644 --- a/mindspore/lite/src/cxx_api/context.cc +++ b/mindspore/lite/src/cxx_api/context.cc @@ -39,7 +39,7 @@ struct Context::Data { int32_t thread_num = 2; bool enable_parallel_ = false; std::vector affinity_core_list_; - int affinity_mode_ = 2; + int affinity_mode_ = 0; std::shared_ptr delegate = nullptr; }; @@ -80,6 +80,7 @@ void Context::SetThreadNum(int32_t thread_num) { } data_->thread_num = thread_num; } + int32_t Context::GetThreadNum() const { if (data_ == nullptr) { MS_LOG(ERROR) << "Invalid context."; @@ -111,9 +112,9 @@ void Context::SetThreadAffinity(int mode) { return; } data_->affinity_mode_ = mode; - return; } + int Context::GetThreadAffinityMode() const { if (data_ == nullptr) { MS_LOG(ERROR) << "Invalid context."; @@ -131,6 +132,7 @@ void Context::SetThreadAffinity(const std::vector &core_list) { return; } + std::vector Context::GetThreadAffinityCoreList() const { if (data_ == nullptr) { MS_LOG(ERROR) << "Invalid context."; @@ -221,6 +223,7 @@ void CPUDeviceInfo::SetEnableFP16(bool is_fp16) { } data_->params[kModelOptionCpuEnableFP16] = is_fp16; } + bool CPUDeviceInfo::GetEnableFP16() const { if (data_ == nullptr) { MS_LOG(ERROR) << "Invalid context."; @@ -251,6 +254,7 @@ void KirinNPUDeviceInfo::SetFrequency(int frequency) { } data_->params[kModelOptionKirinNpuFrequency] = frequency; } + int KirinNPUDeviceInfo::GetFrequency() const { if (data_ == nullptr) { MS_LOG(ERROR) << "Invalid context."; diff --git a/mindspore/lite/src/cxx_api/converters.cc b/mindspore/lite/src/cxx_api/converters.cc index 422e57af8a0..7fd3fa4d7fb 100644 --- a/mindspore/lite/src/cxx_api/converters.cc +++ b/mindspore/lite/src/cxx_api/converters.cc @@ -60,6 +60,11 @@ Status A2L_ConvertContext(Context *a_context, lite::Context *l_context) { cpu_context->SetAllocator(l_context->allocator); } + if (!IsAffinityModeValid(a_context->GetThreadAffinityMode())) { + MS_LOG(ERROR) + << "Invalid affinity mode, only supports 0: no affinities, 1: big cores first, 2: little cores first."; + return kLiteInputParamInvalid; + } lite::CpuBindMode mode = A2L_ConvertAffinityMode(a_context->GetThreadAffinityMode()); lite::DeviceInfo cpu_info = {0}; diff --git a/mindspore/lite/src/cxx_api/converters.h b/mindspore/lite/src/cxx_api/converters.h index b5af0339d1e..8fd984a79cf 100644 --- a/mindspore/lite/src/cxx_api/converters.h +++ b/mindspore/lite/src/cxx_api/converters.h @@ -55,6 +55,10 @@ inline lite::CpuBindMode A2L_ConvertAffinityMode(int affinity_mode) { } } +inline bool IsAffinityModeValid(int affinity_mode) { + return affinity_mode >= lite::NO_BIND && affinity_mode <= lite::MID_CPU; +} + Status A2L_ConvertContext(Context *a_context, lite::Context *l_context); Status A2L_ConvertConfig(const TrainCfg *a_train_cfg, lite::TrainCfg *l_train_cfg); diff --git a/mindspore/lite/src/delegate/npu/npu_graph.cc b/mindspore/lite/src/delegate/npu/npu_graph.cc index 712c5f95a42..1e0e82e6f28 100644 --- a/mindspore/lite/src/delegate/npu/npu_graph.cc +++ b/mindspore/lite/src/delegate/npu/npu_graph.cc @@ -22,6 +22,14 @@ #include "src/delegate/npu/transpose_kernel.h" namespace mindspore { NPUGraph::~NPUGraph() { + for (int i = 0; i < all_kernels_.size(); i++) { + for (auto output : all_kernels_[i]->outputs()) { + if (find(outputs_.begin(), outputs_.end(), output) != outputs_.end()) { + free(output.MutableData()); + output.SetData(nullptr); + } + } + } for (auto *kernel : all_kernels_) { delete kernel; } diff --git a/mindspore/lite/src/inner_context.cc b/mindspore/lite/src/inner_context.cc index f959525a8c5..36b833d3487 100644 --- a/mindspore/lite/src/inner_context.cc +++ b/mindspore/lite/src/inner_context.cc @@ -30,6 +30,7 @@ InnerContext::InnerContext(const Context *context) { this->allocator = context->allocator; this->thread_num_ = context->thread_num_; this->enable_parallel_ = context->enable_parallel_; + this->affinity_core_list_ = context->affinity_core_list_; SetContextDevice(context); #if defined(ENABLE_ARM) && defined(ENABLE_FP16) CpuInfo cpu_info; diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc index ce754efd8f3..1779644fe0f 100644 --- a/mindspore/lite/src/lite_session.cc +++ b/mindspore/lite/src/lite_session.cc @@ -710,6 +710,7 @@ void LiteSession::BindThread(bool if_bind) { } LiteSession::~LiteSession() { + delegate_.reset(); bool expected = false; if (!is_running_.compare_exchange_strong(expected, true)) { MS_LOG(ERROR) << "Not support multi-threading"; diff --git a/mindspore/lite/src/mindrt_executor.cc b/mindspore/lite/src/mindrt_executor.cc index b424b7616f8..f6841f78d1d 100644 --- a/mindspore/lite/src/mindrt_executor.cc +++ b/mindspore/lite/src/mindrt_executor.cc @@ -112,7 +112,12 @@ void MindrtExecutor::TransferGraphOutput() { reinterpret_cast(dst_tensor->data_c()), dst_tensor->ElementsNum()); } else { dst_tensor->set_data(src_tensor->data()); - src_tensor->set_data(nullptr); + if (src_tensor->own_data() == true && src_tensor->allocator() == nullptr) { + dst_tensor->set_own_data(false); + src_tensor->IncRefCount(); + } else { + src_tensor->set_data(nullptr); + } } src_tensor->DecRefCount(); } @@ -128,8 +133,12 @@ void MindrtExecutor::FreeOutputTensor() { } else { if (dst_tensor->data_type() == src_tensor->data_type()) { /* user set graph-output-tensor from outside */ - src_tensor->set_data(dst_tensor->data()); - src_tensor->set_own_data(false); + if (dst_tensor->data() == nullptr || dst_tensor->own_data() == false) { + src_tensor->set_own_data(true); + } else { + src_tensor->set_data(dst_tensor->data()); + src_tensor->set_own_data(false); + } src_tensor->set_allocator(nullptr); } } diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index 2fe97939e3d..581c378c89e 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -172,9 +172,11 @@ int Scheduler::ReplaceDelegateKernels(std::vector *dst_ker kernels.push_back((*dst_kernels)[i]->kernel()); } + ms_inputs_ = LiteTensorsToMSTensors(inputs_); + ms_outputs_ = LiteTensorsToMSTensors(outputs_); auto schema_version = static_cast(VersionManager::GetInstance()->GetSchemaVersion()); - DelegateModel *model = new (std::nothrow) DelegateModel( - &kernels, LiteTensorsToMSTensors(inputs_), LiteTensorsToMSTensors(outputs_), primitives_, schema_version); + DelegateModel *model = + new (std::nothrow) DelegateModel(&kernels, ms_inputs_, ms_outputs_, primitives_, schema_version); if (model == nullptr) { MS_LOG(ERROR) << "New delegate model failed."; return RET_NULL_PTR; diff --git a/mindspore/lite/src/scheduler.h b/mindspore/lite/src/scheduler.h index 5d2b0143c42..ccc8e7ca62a 100644 --- a/mindspore/lite/src/scheduler.h +++ b/mindspore/lite/src/scheduler.h @@ -124,6 +124,8 @@ class Scheduler { std::vector *src_tensors_; const std::vector &inputs_; const std::vector &outputs_; + std::vector ms_inputs_; + std::vector ms_outputs_; std::vector graph_output_node_indexes_; std::map op_parameters_; bool is_train_session_ = false;