forked from mindspore-Ecosystem/mindspore
[MSLITE][DEVELOP] sync r1.3 fix bug: delegate model input & output, affinity mode value, npu memory leak
This commit is contained in:
parent
8494d56dc0
commit
278d9b3eed
|
@ -39,7 +39,7 @@ struct Context::Data {
|
|||
int32_t thread_num = 2;
|
||||
bool enable_parallel_ = false;
|
||||
std::vector<int32_t> affinity_core_list_;
|
||||
int affinity_mode_ = 2;
|
||||
int affinity_mode_ = 0;
|
||||
std::shared_ptr<Delegate> delegate = nullptr;
|
||||
};
|
||||
|
||||
|
@ -80,6 +80,7 @@ void Context::SetThreadNum(int32_t thread_num) {
|
|||
}
|
||||
data_->thread_num = thread_num;
|
||||
}
|
||||
|
||||
int32_t Context::GetThreadNum() const {
|
||||
if (data_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid context.";
|
||||
|
@ -111,9 +112,9 @@ void Context::SetThreadAffinity(int mode) {
|
|||
return;
|
||||
}
|
||||
data_->affinity_mode_ = mode;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int Context::GetThreadAffinityMode() const {
|
||||
if (data_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid context.";
|
||||
|
@ -131,6 +132,7 @@ void Context::SetThreadAffinity(const std::vector<int> &core_list) {
|
|||
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<int32_t> Context::GetThreadAffinityCoreList() const {
|
||||
if (data_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid context.";
|
||||
|
@ -221,6 +223,7 @@ void CPUDeviceInfo::SetEnableFP16(bool is_fp16) {
|
|||
}
|
||||
data_->params[kModelOptionCpuEnableFP16] = is_fp16;
|
||||
}
|
||||
|
||||
bool CPUDeviceInfo::GetEnableFP16() const {
|
||||
if (data_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid context.";
|
||||
|
@ -251,6 +254,7 @@ void KirinNPUDeviceInfo::SetFrequency(int frequency) {
|
|||
}
|
||||
data_->params[kModelOptionKirinNpuFrequency] = frequency;
|
||||
}
|
||||
|
||||
int KirinNPUDeviceInfo::GetFrequency() const {
|
||||
if (data_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid context.";
|
||||
|
|
|
@ -60,6 +60,11 @@ Status A2L_ConvertContext(Context *a_context, lite::Context *l_context) {
|
|||
cpu_context->SetAllocator(l_context->allocator);
|
||||
}
|
||||
|
||||
if (!IsAffinityModeValid(a_context->GetThreadAffinityMode())) {
|
||||
MS_LOG(ERROR)
|
||||
<< "Invalid affinity mode, only supports 0: no affinities, 1: big cores first, 2: little cores first.";
|
||||
return kLiteInputParamInvalid;
|
||||
}
|
||||
lite::CpuBindMode mode = A2L_ConvertAffinityMode(a_context->GetThreadAffinityMode());
|
||||
|
||||
lite::DeviceInfo cpu_info = {0};
|
||||
|
|
|
@ -55,6 +55,10 @@ inline lite::CpuBindMode A2L_ConvertAffinityMode(int affinity_mode) {
|
|||
}
|
||||
}
|
||||
|
||||
inline bool IsAffinityModeValid(int affinity_mode) {
|
||||
return affinity_mode >= lite::NO_BIND && affinity_mode <= lite::MID_CPU;
|
||||
}
|
||||
|
||||
Status A2L_ConvertContext(Context *a_context, lite::Context *l_context);
|
||||
|
||||
Status A2L_ConvertConfig(const TrainCfg *a_train_cfg, lite::TrainCfg *l_train_cfg);
|
||||
|
|
|
@ -22,6 +22,14 @@
|
|||
#include "src/delegate/npu/transpose_kernel.h"
|
||||
namespace mindspore {
|
||||
NPUGraph::~NPUGraph() {
|
||||
for (int i = 0; i < all_kernels_.size(); i++) {
|
||||
for (auto output : all_kernels_[i]->outputs()) {
|
||||
if (find(outputs_.begin(), outputs_.end(), output) != outputs_.end()) {
|
||||
free(output.MutableData());
|
||||
output.SetData(nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto *kernel : all_kernels_) {
|
||||
delete kernel;
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ InnerContext::InnerContext(const Context *context) {
|
|||
this->allocator = context->allocator;
|
||||
this->thread_num_ = context->thread_num_;
|
||||
this->enable_parallel_ = context->enable_parallel_;
|
||||
this->affinity_core_list_ = context->affinity_core_list_;
|
||||
SetContextDevice(context);
|
||||
#if defined(ENABLE_ARM) && defined(ENABLE_FP16)
|
||||
CpuInfo cpu_info;
|
||||
|
|
|
@ -710,6 +710,7 @@ void LiteSession::BindThread(bool if_bind) {
|
|||
}
|
||||
|
||||
LiteSession::~LiteSession() {
|
||||
delegate_.reset();
|
||||
bool expected = false;
|
||||
if (!is_running_.compare_exchange_strong(expected, true)) {
|
||||
MS_LOG(ERROR) << "Not support multi-threading";
|
||||
|
|
|
@ -112,7 +112,12 @@ void MindrtExecutor::TransferGraphOutput() {
|
|||
reinterpret_cast<float *>(dst_tensor->data_c()), dst_tensor->ElementsNum());
|
||||
} else {
|
||||
dst_tensor->set_data(src_tensor->data());
|
||||
src_tensor->set_data(nullptr);
|
||||
if (src_tensor->own_data() == true && src_tensor->allocator() == nullptr) {
|
||||
dst_tensor->set_own_data(false);
|
||||
src_tensor->IncRefCount();
|
||||
} else {
|
||||
src_tensor->set_data(nullptr);
|
||||
}
|
||||
}
|
||||
src_tensor->DecRefCount();
|
||||
}
|
||||
|
@ -128,8 +133,12 @@ void MindrtExecutor::FreeOutputTensor() {
|
|||
} else {
|
||||
if (dst_tensor->data_type() == src_tensor->data_type()) {
|
||||
/* user set graph-output-tensor from outside */
|
||||
src_tensor->set_data(dst_tensor->data());
|
||||
src_tensor->set_own_data(false);
|
||||
if (dst_tensor->data() == nullptr || dst_tensor->own_data() == false) {
|
||||
src_tensor->set_own_data(true);
|
||||
} else {
|
||||
src_tensor->set_data(dst_tensor->data());
|
||||
src_tensor->set_own_data(false);
|
||||
}
|
||||
src_tensor->set_allocator(nullptr);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -172,9 +172,11 @@ int Scheduler::ReplaceDelegateKernels(std::vector<kernel::LiteKernel *> *dst_ker
|
|||
kernels.push_back((*dst_kernels)[i]->kernel());
|
||||
}
|
||||
|
||||
ms_inputs_ = LiteTensorsToMSTensors(inputs_);
|
||||
ms_outputs_ = LiteTensorsToMSTensors(outputs_);
|
||||
auto schema_version = static_cast<SchemaVersion>(VersionManager::GetInstance()->GetSchemaVersion());
|
||||
DelegateModel *model = new (std::nothrow) DelegateModel(
|
||||
&kernels, LiteTensorsToMSTensors(inputs_), LiteTensorsToMSTensors(outputs_), primitives_, schema_version);
|
||||
DelegateModel *model =
|
||||
new (std::nothrow) DelegateModel(&kernels, ms_inputs_, ms_outputs_, primitives_, schema_version);
|
||||
if (model == nullptr) {
|
||||
MS_LOG(ERROR) << "New delegate model failed.";
|
||||
return RET_NULL_PTR;
|
||||
|
|
|
@ -124,6 +124,8 @@ class Scheduler {
|
|||
std::vector<Tensor *> *src_tensors_;
|
||||
const std::vector<Tensor *> &inputs_;
|
||||
const std::vector<Tensor *> &outputs_;
|
||||
std::vector<mindspore::MSTensor> ms_inputs_;
|
||||
std::vector<mindspore::MSTensor> ms_outputs_;
|
||||
std::vector<size_t> graph_output_node_indexes_;
|
||||
std::map<int, OpParameter *> op_parameters_;
|
||||
bool is_train_session_ = false;
|
||||
|
|
Loading…
Reference in New Issue