!23301 optimzie allocator for lite inference

Merge pull request !23301 from ling/sr
This commit is contained in:
i-robot 2021-09-24 09:46:51 +00:00 committed by Gitee
commit 8d7483186f
15 changed files with 575 additions and 102 deletions

View File

@ -92,6 +92,7 @@ set(LITE_SRC
${CMAKE_CURRENT_SOURCE_DIR}/common/prim_util.cc
${CMAKE_CURRENT_SOURCE_DIR}/common/tensor_util.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/inner_allocator.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/optimize_allocator.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/infer_manager.cc
${CMAKE_CURRENT_SOURCE_DIR}/tensor.cc
${CMAKE_CURRENT_SOURCE_DIR}/ms_tensor.cc

View File

@ -141,7 +141,7 @@ int LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *act
for (LiteQuantParam quant : old_tensor->quant_params()) {
new_tensor->AddQuantParam(quant);
}
isolate_input_map_.insert(std::make_pair(new_tensor, old_tensor));
isolate_input_map_->insert(std::make_pair(new_tensor, old_tensor));
ReplaceNodeInTensor(kernel_, old_tensor, new_tensor);
/* set subgraph input for copy data */
kernel_->set_in_tensor(new_tensor, i);
@ -149,7 +149,10 @@ int LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *act
return RET_OK;
}
int LiteOpActor::LiteActorInit(std::vector<std::shared_ptr<LiteOpActor>> *actors) {
int LiteOpActor::LiteActorInit(std::vector<std::shared_ptr<LiteOpActor>> *actors,
std::unordered_map<Tensor *, Tensor *> *input_map) {
isolate_input_map_ = input_map;
/* Init output arrow */
auto ret = CompileArrow();
if (ret != RET_OK) {
@ -175,7 +178,7 @@ int LiteOpActor::LiteActorInit(std::vector<std::shared_ptr<LiteOpActor>> *actors
int LiteOpActor::ResizeGraphInput(const std::vector<mindspore::tensor::MSTensor *> &inputs,
const std::vector<std::vector<int>> &dims) {
for (auto map : isolate_input_map_) {
for (auto map : *isolate_input_map_) {
auto isolate_tensor = map.first;
auto src_tensor = map.second;
for (size_t i = 0; i < inputs.size(); i++) {

View File

@ -51,11 +51,6 @@ class LiteOpActor : public OpActor<lite::Tensor> {
#endif
}
~LiteOpActor() override {
for (auto map : isolate_input_map_) {
auto isolate_input_tensor = map.first;
isolate_input_tensor->set_data(nullptr);
delete isolate_input_tensor;
}
delete call_node_;
delete partial_node_;
}
@ -69,7 +64,8 @@ class LiteOpActor : public OpActor<lite::Tensor> {
}
return ret;
}
int LiteActorInit(std::vector<std::shared_ptr<LiteOpActor>> *actors);
int LiteActorInit(std::vector<std::shared_ptr<LiteOpActor>> *actors,
std::unordered_map<Tensor *, Tensor *> *input_map);
int ResizeGraphInput(const std::vector<mindspore::tensor::MSTensor *> &inputs,
const std::vector<std::vector<int>> &dims);
@ -93,7 +89,7 @@ class LiteOpActor : public OpActor<lite::Tensor> {
std::unordered_map<kernel::LiteKernel *, AID> subgraph_to_actor_{};
std::vector<OpDataPtr<Tensor>> outputs_data_{};
std::vector<Tensor *> inputs_data_{};
std::unordered_map<Tensor *, Tensor *> isolate_input_map_{}; /* <calculate-tensor, src-input-tensor> */
std::unordered_map<Tensor *, Tensor *> *isolate_input_map_ = nullptr; /* real obj in session */
private:
void ReplaceNodeInTensor(kernel::LiteKernel *kernel, Tensor *old_tensor, Tensor *new_tensor);

View File

@ -31,6 +31,7 @@
#include "src/kernel_registry.h"
#include "src/lite_model.h"
#include "src/weight_decoder.h"
#include "src/runtime/optimize_allocator.h"
#ifdef ENABLE_MINDRT
#include "src/mindrt_executor.h"
#endif
@ -430,7 +431,7 @@ int LiteSession::IsolateOutputTensor() {
}
src_tensor->set_ref_count(1);
graph_output_map_.insert(std::make_pair(new_tensor, src_tensor));
isolate_graph_output_map_.insert(std::make_pair(new_tensor, src_tensor));
/* set new tensor for calculate */
for (auto subgraph : kernels_) {
@ -471,6 +472,8 @@ int LiteSession::IsolateOutputTensor() {
}
void LiteSession::FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kernels) {
// For reducing runtime RAM
// free pack-op weight because pack-op will not access origin weight in runtime
for (auto *kernel : kernels) {
MS_ASSERT(kernel != nullptr);
if (kernel->subgraph_type() == kernel::kNotSubGraph) {
@ -493,29 +496,14 @@ void LiteSession::FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kern
}
int LiteSession::CompileGraph(Model *model) {
bool expected = false;
if (!is_running_.compare_exchange_strong(expected, true)) {
MS_LOG(ERROR) << "Not support multi-threading";
return RET_ERROR;
}
// model.MetaGraph ==> kernels
if (model == nullptr) {
MS_LOG(ERROR) << "The input model is nullptr.";
auto ret = PreCheck(model);
if (ret != RET_OK) {
MS_LOG(ERROR) << "schedule check failed: " << ret;
is_running_.store(false);
return RET_PARAM_INVALID;
}
if (model->buf == nullptr) {
MS_LOG(ERROR) << "The input model buf is nullptr.";
is_running_.store(false);
return RET_PARAM_INVALID;
}
if (!reinterpret_cast<LiteModel *>(model)->ModelVerify()) {
MS_LOG(ERROR) << "wrong model input, please check";
is_running_.store(false);
return RET_ERROR;
return ret;
}
auto ret = ConvertTensors(model);
ret = ConvertTensors(model);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvertTensors failed: " << ret;
is_running_.store(false);
@ -523,14 +511,10 @@ int LiteSession::CompileGraph(Model *model) {
}
InitGraphInputTensors(model);
InitGraphOutputTensors(model);
#ifndef ENABLE_FP16
if (context_->GetCpuInfo().enable_float16_) {
MS_LOG(WARNING) << unsupport_fp16_log;
}
#endif
// scheduler kernels
Scheduler scheduler(context_, ms_context_, model, &tensors_, inputs_, outputs_, is_train_session_, execution_plan_,
delegate_, delegate_device_type_);
Scheduler scheduler(context_, ms_context_, model, &tensors_, inputs_, outputs_, is_train_session_, &is_infershape_,
&is_control_flow_, execution_plan_, delegate_, delegate_device_type_);
scheduler.SetupSchedulerCb(std::move(sched_cb_));
ret = scheduler.Schedule(&kernels_);
if (ret != RET_OK) {
@ -552,33 +536,22 @@ int LiteSession::CompileGraph(Model *model) {
return RET_OK;
}
#ifdef ENABLE_MINDRT
ret = IsolateOutputTensor();
ret = InitExecutor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Isolate output tensor failed.";
is_running_.store(false);
return ret;
}
executor_ = new (std::nothrow) MindrtExecutor(&graph_output_map_);
#else
executor_ = new (std::nothrow) Executor();
#endif
if (executor_ == nullptr) {
MS_LOG(ERROR) << "New Executor failed";
is_running_.store(false);
return RET_ERROR;
}
ret = executor_->Prepare(this->kernels_, this->inputs_, this->outputs_, context_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare executor failed: " << ret;
MS_LOG(ERROR) << "InitExecutor failed: " << ret;
is_running_.store(false);
return ret;
}
// For reducing runtime RAM, free packop weight because packop will pack weight and will not access to origin weight
FreePackOpWeight(kernels_);
ret = OptimizeRuntimeAllocator();
if (ret != RET_OK) {
MS_LOG(ERROR) << "OptimizeRuntimeAllocator failed.";
is_running_.store(false);
return ret;
}
is_running_.store(false);
return RET_OK;
}
@ -824,19 +797,25 @@ LiteSession::~LiteSession() {
tensor = nullptr;
}
for (auto item : graph_output_map_) {
for (auto item : isolate_graph_output_map_) {
auto isolate_output_tensor = item.first;
isolate_output_tensor->set_data(nullptr);
delete isolate_output_tensor;
isolate_output_tensor = nullptr;
}
for (auto map : isolate_input_map_) {
auto isolate_input_tensor = map.first;
isolate_input_tensor->set_data(nullptr);
delete isolate_input_tensor;
}
// Tensor * in input_map output_map are freed in tensors
input_map_.clear();
output_node_map_.clear();
output_tensor_map_.clear();
input_vec_.clear();
graph_output_map_.clear();
isolate_graph_output_map_.clear();
delete this->executor_;
this->executor_ = nullptr;
@ -986,6 +965,157 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
return RET_OK;
}
int LiteSession::PreCheck(Model *model) {
bool expected = false;
if (!is_running_.compare_exchange_strong(expected, true)) {
MS_LOG(ERROR) << "Not support multi-threading";
return RET_ERROR;
}
if (model == nullptr) {
MS_LOG(ERROR) << "The input model is nullptr.";
return RET_PARAM_INVALID;
}
if (model->buf == nullptr) {
MS_LOG(ERROR) << "The input model buf is nullptr.";
return RET_PARAM_INVALID;
}
if (!reinterpret_cast<LiteModel *>(model)->ModelVerify()) {
MS_LOG(ERROR) << "wrong model input, please check";
return RET_ERROR;
}
#ifndef ENABLE_FP16
if (context_->GetCpuInfo().enable_float16_) {
MS_LOG(WARNING) << unsupport_fp16_log;
}
#endif
return RET_OK;
}
int LiteSession::InitExecutor() {
int ret = RET_OK;
#ifdef ENABLE_MINDRT
ret = IsolateOutputTensor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Isolate output tensor failed.";
return ret;
}
executor_ = new (std::nothrow) MindrtExecutor(&isolate_graph_output_map_, &isolate_input_map_);
#else
executor_ = new (std::nothrow) Executor();
#endif
if (executor_ == nullptr) {
MS_LOG(ERROR) << "New Executor failed";
return RET_ERROR;
}
ret = executor_->Prepare(kernels_, inputs_, outputs_, context_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare executor failed: " << ret;
return ret;
}
return RET_OK;
}
int LiteSession::OptimizeRuntimeAllocator() {
return RET_OK;
if (is_infershape_ != RET_OK) {
MS_LOG(ERROR) << "Not support opt allocator in runtime-infershape.";
return RET_OK;
}
if (is_control_flow_ == true) {
MS_LOG(ERROR) << "Not support opt allocator in control flow model.";
return RET_OK;
}
AllocatorPtr default_allocator = context_->allocator;
OptAllocatorPtr optimize_allocator = std::make_shared<OptimizeAllocator>();
std::unordered_map<lite::Tensor *, int> ref_count;
for (auto subgraph : kernels_) {
if (subgraph->desc().arch != kernel::KERNEL_ARCH::kCPU) {
continue;
}
for (auto in_tensor : subgraph->in_tensors()) {
auto iter = isolate_input_map_.find(in_tensor);
if (isolate_input_map_.end() == iter) break;
auto src_t = iter->second;
if (src_t->data_type() == in_tensor->data_type()) {
in_tensor->set_allocator(src_t->allocator());
ref_count[src_t] += in_tensor->init_ref_count();
continue;
}
if (src_t->allocator() == default_allocator) {
src_t->set_allocator(optimize_allocator);
ref_count[src_t] = src_t->init_ref_count();
optimize_allocator->MallocTensorData(src_t);
}
if (ref_count[in_tensor]-- <= 0) {
optimize_allocator->FreeTensorData(in_tensor);
}
}
auto kernel_list = reinterpret_cast<kernel::SubGraphKernel *>(subgraph)->nodes();
for (auto kernel : kernel_list) {
/* malloc for output */
for (auto tensor : kernel->out_tensors()) {
if (tensor->IsGraphOutput() == true) {
continue;
}
if (tensor->allocator() != default_allocator) {
continue;
}
tensor->set_allocator(optimize_allocator);
ref_count[tensor] = tensor->init_ref_count();
optimize_allocator->MallocTensorData(tensor);
}
/* free input after run */
for (auto tensor : kernel->in_tensors()) {
if (tensor->allocator() != optimize_allocator) {
continue;
}
if (ref_count[tensor]-- <= 0) {
optimize_allocator->FreeTensorData(tensor);
}
}
}
}
auto ret = OptAllocatorSetData(optimize_allocator);
if (ret != RET_OK) {
MS_LOG(ERROR) << "using optimize allocator failed.";
return ret;
}
return RET_OK;
}
int LiteSession::OptAllocatorSetData(OptAllocatorPtr opt_allocator) {
void *data = opt_allocator->MallocOptData();
if (data == nullptr) {
MS_LOG(ERROR) << "malloc optimize data failed.";
return RET_ERROR;
}
int8_t *int8_data = reinterpret_cast<int8_t *>(data);
auto offset_map = opt_allocator->GetOffsetMap();
for (auto tensor : tensors_) {
if (tensor->allocator() != opt_allocator) {
continue;
}
auto offset_iter = offset_map.find(tensor);
if (offset_iter == offset_map.end()) {
return RET_ERROR;
}
tensor->set_data(int8_data + offset_iter->second);
}
return RET_OK;
}
int LiteSession::InitGPURuntime() {
if (context_->IsCpuEnabled()) {
CpuBindMode cpu_bind_mode = context_->GetCpuDeviceInfo()->cpu_bind_mode_;

View File

@ -28,6 +28,7 @@
#include "include/lite_session.h"
#include "include/model.h"
#include "src/inner_context.h"
#include "src/runtime/optimize_allocator.h"
#include "schema/model_generated.h"
#include "src/executor.h"
#include "src/tensor.h"
@ -125,12 +126,19 @@ class LiteSession : public session::LiteSession {
static void FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kernels);
private:
int PreCheck(Model *model);
int InitExecutor();
void ResetInputsShape(const std::vector<std::vector<int>> &dims);
int InitGPURuntime();
bool IsIsolatedSubGraph(kernel::LiteKernel *kernel);
int OptimizeRuntimeAllocator();
int OptAllocatorSetData(OptAllocatorPtr opt_allocator);
protected:
InnerContext *context_ = nullptr;
mindspore::Context *ms_context_ = nullptr;
@ -150,7 +158,11 @@ class LiteSession : public session::LiteSession {
std::vector<std::string> output_tensor_names_;
// graph output tensor name -- output tensor
std::unordered_map<std::string, mindspore::tensor::MSTensor *> output_tensor_map_;
std::unordered_map<Tensor *, Tensor *> graph_output_map_; /* <calculate-tensor, graph-output-tensor> */
// graph isolate tensors
std::unordered_map<Tensor *, Tensor *> isolate_graph_output_map_; /* <calculate-tensor, graph-output-tensor> */
std::unordered_map<Tensor *, Tensor *> isolate_input_map_; /* <calculate-tensor, src-input-tensor> */
Executor *executor_ = nullptr;
Model *model_ = nullptr;
std::atomic<bool> is_running_ = {false};
@ -159,6 +171,8 @@ class LiteSession : public session::LiteSession {
#if GPU_OPENCL
opencl::OpenCLRuntimeInnerWrapper *opencl_runtime_wrapper_{nullptr};
#endif
int is_infershape_{RET_ERROR};
bool is_control_flow_ = false;
std::unique_ptr<SchedulerCb> sched_cb_;
std::shared_ptr<Delegate> delegate_ = nullptr;
int delegate_device_type_ = -1; // -1: not specified; 0: CPU; 1: GPU; 2: NPU

View File

@ -55,13 +55,13 @@ int MindrtExecutor::PrepareOutputData(const std::vector<kernel::LiteKernel *> &k
continue;
}
auto current_output_map =
std::find_if(output_tensor_map_->begin(), output_tensor_map_->end(), [&](const auto output_map_tensor) {
std::find_if(isolate_output_map_->begin(), isolate_output_map_->end(), [&](const auto output_map_tensor) {
if (graph_output_tensor == output_map_tensor.second) {
return true;
}
return false;
});
MS_ASSERT(current_output_map != output_tensor_map_->end());
MS_ASSERT(current_output_map != isolate_output_map_->end());
Tensor *subgraph_output_tensor = current_output_map->first;
for (size_t j = 0; j < kernels.size(); ++j) {
@ -120,7 +120,7 @@ int MindrtExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels, co
}
for (auto actor : op_actors_) {
ret = actor->LiteActorInit(&op_actors_);
ret = actor->LiteActorInit(&op_actors_, isolate_input_map_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "LiteActorInit failed, actor aid: " << actor->GetAID();
return ret;
@ -131,7 +131,7 @@ int MindrtExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels, co
}
void MindrtExecutor::TransferGraphOutput() {
for (auto tensor_map : *output_tensor_map_) {
for (auto tensor_map : *isolate_output_map_) {
auto dst_tensor = tensor_map.second;
auto src_tensor = tensor_map.first;
dst_tensor->set_shape(src_tensor->shape());
@ -151,7 +151,7 @@ void MindrtExecutor::TransferGraphOutput() {
}
void MindrtExecutor::FreeOutputTensor() {
for (auto tensor_map : *output_tensor_map_) {
for (auto tensor_map : *isolate_output_map_) {
auto src_tensor = tensor_map.first;
auto dst_tensor = tensor_map.second;
if (dst_tensor->allocator() != nullptr) {

View File

@ -29,7 +29,9 @@
namespace mindspore::lite {
class MindrtExecutor : public Executor {
public:
explicit MindrtExecutor(std::unordered_map<Tensor *, Tensor *> *output_map) : output_tensor_map_(output_map) {}
explicit MindrtExecutor(std::unordered_map<Tensor *, Tensor *> *output_map,
std::unordered_map<Tensor *, Tensor *> *input_map)
: isolate_output_map_(output_map), isolate_input_map_(input_map) {}
virtual ~MindrtExecutor() { MindrtTerminate(op_actors_); }
int Prepare(const std::vector<kernel::LiteKernel *> &kernels, const std::vector<Tensor *> &inputs,
@ -52,7 +54,8 @@ class MindrtExecutor : public Executor {
std::vector<std::shared_ptr<LiteOpActor>> op_actors_;
std::vector<OpDataPtr<Tensor>> input_data_;
std::vector<OpDataPtr<Tensor>> output_data_;
std::unordered_map<Tensor *, Tensor *> *output_tensor_map_;
std::unordered_map<Tensor *, Tensor *> *isolate_output_map_;
std::unordered_map<Tensor *, Tensor *> *isolate_input_map_;
};
} // namespace mindspore::lite

View File

@ -0,0 +1,102 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/optimize_allocator.h"
namespace mindspore {
OptimizeAllocator::OptimizeAllocator(size_t aligned_size) {
aligned_size_ = aligned_size;
return;
}
OptimizeAllocator::~OptimizeAllocator() {
if (data_ == nullptr) {
free(data_);
data_ = nullptr;
}
}
void *OptimizeAllocator::MallocOptData() {
if (data_ == nullptr) {
data_ = malloc(total_size_);
}
return data_;
}
size_t OptimizeAllocator::FindMinFree(size_t size) {
size_t min_size = total_size_;
size_t min_addr = total_size_;
for (auto const &itr : free_list_) {
if (itr.second >= size && min_size > itr.second) {
min_size = itr.second;
min_addr = itr.first;
}
}
return min_addr;
}
void OptimizeAllocator::FreeTensorData(lite::Tensor *tensor) {
size_t offset = offset_map_[tensor];
free_list_[offset] = used_list_[offset];
used_list_.erase(offset);
size_t length = free_list_[offset];
size_t post_offset = offset + length;
auto post_iter = free_list_.find(post_offset);
if (post_iter != free_list_.end()) {
size_t post_length = post_iter->second;
free_list_[offset] = length + post_length;
free_list_.erase(post_offset);
}
auto pre_iter = free_list_.lower_bound(offset);
if (pre_iter != free_list_.begin()) {
pre_iter--;
size_t pre_offset = pre_iter->first;
if ((pre_offset + free_list_[pre_offset]) == offset) {
free_list_[pre_offset] = free_list_[pre_offset] + length;
free_list_.erase(offset);
}
}
}
void OptimizeAllocator::MallocTensorData(lite::Tensor *tensor) {
size_t size = tensor->Size();
size_t offset = FindMinFree(size);
if (offset > total_size_) {
if (free_list_.empty()) {
offset = total_size_;
} else {
offset = free_list_.rbegin()->first;
if (offset + free_list_[offset] < total_size_) {
offset = total_size_;
} else {
free_list_.erase(offset);
}
}
} else {
if (free_list_[offset] > size) {
free_list_[offset + size] = free_list_[offset] - size;
}
free_list_.erase(offset);
}
used_list_[offset] = size;
offset_map_[tensor] = offset;
}
} // namespace mindspore

View File

@ -0,0 +1,61 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_OPTIMIZE_ALLOCATOR_H_
#define MINDSPORE_LITE_SRC_RUNTIME_OPTIMIZE_ALLOCATOR_H_
#include <memory>
#include <map>
#include <unordered_map>
#include "include/api/allocator.h"
#include "include/errorcode.h"
#include "src/tensor.h"
namespace mindspore {
class OptimizeAllocator : public Allocator {
public:
explicit OptimizeAllocator(size_t aligned_size = 32);
~OptimizeAllocator() override;
public:
void *Malloc(size_t size) override { return nullptr; }
void Free(void *ptr) override { return; }
int RefCount(void *ptr) override { return lite::RET_OK; }
int SetRefCount(void *ptr, int ref_count) override { return lite::RET_OK; }
int IncRefCount(void *ptr, int ref_count) override { return lite::RET_OK; }
int DecRefCount(void *ptr, int ref_count) override { return lite::RET_OK; }
public:
void MallocTensorData(lite::Tensor *tensor);
void FreeTensorData(lite::Tensor *tensor);
void *MallocOptData();
const std::unordered_map<lite::Tensor *, size_t> &GetOffsetMap() const { return offset_map_; }
private:
size_t FindMinFree(size_t size);
private:
void *data_ = nullptr;
size_t total_size_;
std::unordered_map<lite::Tensor *, size_t> offset_map_;
std::map<size_t, size_t> free_list_; /* offset, size */
std::map<size_t, size_t> used_list_; /* offset, size */
};
using OptAllocatorPtr = std::shared_ptr<OptimizeAllocator>;
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_OPTIMIZE_ALLOCATOR_H_

View File

@ -241,15 +241,17 @@ int Scheduler::InitKernels(std::vector<kernel::LiteKernel *> dst_kernels) {
}
int Scheduler::SchedulePreProcess() {
schema_version_ = reinterpret_cast<LiteModel *>(src_model_)->GetSchemaVersion();
this->graph_output_node_indexes_ = GetGraphOutputNodes(src_model_);
int infershape_ret = InferSubGraphShape(kMainSubGraphIndex);
if (infershape_ret != RET_OK && infershape_ret != RET_INFER_INVALID) {
*is_infershape_ = InferSubGraphShape(kMainSubGraphIndex);
if (*is_infershape_ != RET_OK && *is_infershape_ != RET_INFER_INVALID) {
MS_LOG(ERROR) << "op infer shape failed.";
return infershape_ret;
return *is_infershape_;
}
if (context_->enable_parallel_ && infershape_ret != RET_INFER_INVALID) {
if (context_->enable_parallel_ && *is_infershape_ != RET_INFER_INVALID) {
#ifndef AUTO_PARALLEL_CLIP
auto search_sub_graph =
SearchSubGraph(context_, src_model_, src_tensors_, &op_parameters_, &graph_output_node_indexes_);
@ -275,6 +277,21 @@ int Scheduler::CheckCpuValid(std::vector<kernel::LiteKernel *> *dst_kernels) {
return RET_OK;
}
int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *dst_kernels) {
#ifndef CONTROLFLOW_TENSORLIST_CLIP
if (IsControlFlowParttern(*dst_kernels)) {
*is_control_flow_ = true;
return ConstructControlFlowMainGraph(dst_kernels);
}
#endif
*is_control_flow_ = false;
auto src_kernel = *dst_kernels;
dst_kernels->clear();
std::map<const kernel::LiteKernel *, bool> is_kernel_finish;
return ConstructNormalSubGraphs(src_kernel, dst_kernels, &is_kernel_finish);
}
int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
int check_input_ret = CheckInputParam(dst_kernels);
if (check_input_ret != RET_OK) {
@ -282,8 +299,6 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
return check_input_ret;
}
schema_version_ = reinterpret_cast<LiteModel *>(src_model_)->GetSchemaVersion();
int ret = SchedulePreProcess();
if (ret != RET_OK) {
return ret;
@ -307,7 +322,6 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
MS_LOG(ERROR) << "Repalce delegate kernels failed.";
return ret;
}
context_->thread_pool()->SetSpinCountMinValue();
#endif
ret = CheckCpuValid(dst_kernels);
@ -322,26 +336,11 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
RuntimePass(context_, dst_kernels, src_tensors_);
#endif
#ifndef CONTROLFLOW_TENSORLIST_CLIP
if (IsControlFlowParttern(*dst_kernels)) {
ret = ConstructControlFlowMainGraph(dst_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConstructControlFlowMainGraph failed.";
return ret;
}
} else {
#endif
auto src_kernel = *dst_kernels;
dst_kernels->clear();
std::map<const kernel::LiteKernel *, bool> is_kernel_finish;
ret = ConstructSubGraphs(src_kernel, dst_kernels, &is_kernel_finish);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConstructSubGraphs failed.";
return ret;
}
#ifndef CONTROLFLOW_TENSORLIST_CLIP
ret = ConstructSubGraphs(dst_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConstructSubGraphs failed.";
return ret;
}
#endif
ret = InitKernels(*dst_kernels);
if (ret != RET_OK) {
@ -457,6 +456,9 @@ int Scheduler::InitDelegateKernels(std::vector<kernel::LiteKernel *> *dst_kernel
return RET_OK;
}
/* set delegate spin count */
context_->thread_pool()->SetSpinCountMinValue();
/* external delegate */
if (delegate_device_type_ == -1) {
auto ret = ReplaceDelegateKernels(dst_kernels);
@ -1521,9 +1523,9 @@ kernel::LiteKernel *FindAllSubGraphKernels(const std::vector<kernel::LiteKernel
}
} // namespace
int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> src_kernel,
std::vector<kernel::LiteKernel *> *dst_kernel,
std::map<const kernel::LiteKernel *, bool> *is_kernel_finish) {
int Scheduler::ConstructNormalSubGraphs(std::vector<kernel::LiteKernel *> src_kernel,
std::vector<kernel::LiteKernel *> *dst_kernel,
std::map<const kernel::LiteKernel *, bool> *is_kernel_finish) {
if (src_kernel.empty()) {
return RET_OK;
}

View File

@ -41,9 +41,9 @@ class Scheduler {
public:
Scheduler(const InnerContext *ctx, const mindspore::Context *ms_ctx, Model *src_model,
std::vector<Tensor *> *src_tensors, const std::vector<Tensor *> &input_tensors,
const std::vector<Tensor *> &output_tensors, bool is_train_session,
std::map<std::string, TypeId> *executions, std::shared_ptr<Delegate> delegate = nullptr,
int delegate_device_type = -1)
const std::vector<Tensor *> &output_tensors, bool is_train_session, int *is_infershape,
bool *is_control_flow, std::map<std::string, TypeId> *executions,
std::shared_ptr<Delegate> delegate = nullptr, int delegate_device_type = -1)
: context_(ctx),
ms_context_(ms_ctx),
src_model_(src_model),
@ -51,6 +51,8 @@ class Scheduler {
inputs_(input_tensors),
outputs_(output_tensors),
is_train_session_(is_train_session),
is_control_flow_(is_control_flow),
is_infershape_(is_infershape),
delegate_(delegate),
delegate_device_type_(delegate_device_type),
execution_plan_(executions) {}
@ -102,8 +104,12 @@ class Scheduler {
// find in_kernels_ and out_kernels of kernel, sub_graph and nodes_ in sub_graph
static void FindAllInoutKernels(const std::vector<kernel::LiteKernel *> &kernels);
// vector<LiteKernel/SubGraphKernel> --> vector<SubGraphKernel>
int ConstructSubGraphs(std::vector<kernel::LiteKernel *> src_kernel, std::vector<kernel::LiteKernel *> *dst_kernel,
std::map<const kernel::LiteKernel *, bool> *sinked_kernel_map);
int ConstructNormalSubGraphs(std::vector<kernel::LiteKernel *> src_kernel,
std::vector<kernel::LiteKernel *> *dst_kernel,
std::map<const kernel::LiteKernel *, bool> *sinked_kernel_map);
int ConstructSubGraphs(std::vector<kernel::LiteKernel *> *dst_kernel);
// create subgraph_kernel from a vector of kernel
std::vector<kernel::LiteKernel *> ScheduleMainSubGraphToKernels();
kernel::LiteKernel *SchedulePartialToSubGraphKernel(const int &subgraph_index);
@ -147,6 +153,8 @@ class Scheduler {
std::vector<size_t> graph_output_node_indexes_;
std::map<int, OpParameter *> op_parameters_;
bool is_train_session_ = false;
bool *is_control_flow_ = nullptr;
int *is_infershape_ = nullptr;
std::unique_ptr<SchedulerCb> sched_cb_;
std::map<kernel::Kernel *, const schema::Primitive *> primitives_;
std::shared_ptr<Delegate> delegate_ = nullptr;

View File

@ -22,6 +22,7 @@ file(GLOB_RECURSE TEST_UT_SRC
${TEST_DIR}/ut/src/registry/registry_custom_op_test.cc
${TEST_DIR}/ut/src/runtime/runtime_pass_tests.cc
${TEST_DIR}/st/multiple_device_test.cc
${TEST_DIR}/st/optimize_allocator_test.cc
${TEST_DIR}/st/mindrt_parallel_runtime_test.cc
${TEST_DIR}/st/mix_data_type_test.cc
${TEST_DIR}/ut/nnacl/infer/*.cc

View File

@ -95,5 +95,8 @@ echo 'run custom delegate st test'
echo 'runtime pass'
./lite-test --gtest_filter="RuntimePass.*"
echo 'Optimize Allocator'
./lite-test --gtest_filter="OptAllocator.*"
echo 'Runtime config file test'
./lite-test --gtest_filter="MixDataTypeTest.Config1"

View File

@ -0,0 +1,148 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either address or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common/common_test.h"
#include "schema/inner/model_generated.h"
#include "src/lite_session.h"
#include "src/sub_graph_kernel.h"
#include "ir/dtype/type_id.h"
#include "include/version.h"
#include "include/model.h"
namespace mindspore {
class OptAllocator : public mindspore::CommonTest {
public:
OptAllocator() = default;
};
void CreateModel1(mindspore::schema::MetaGraphT *meta_graph) {
meta_graph->name = "graph";
meta_graph->version = mindspore::lite::Version();
/* cos
* / \
* sin |
* \ /
* add
* |
* */
auto cos = std::make_unique<mindspore::schema::CNodeT>();
cos->inputIndex = {0};
cos->outputIndex = {1};
cos->primitive = std::make_unique<mindspore::schema::PrimitiveT>();
cos->primitive->value.type = mindspore::schema::PrimitiveType_Cos;
auto cos_primitive = new mindspore::schema::CosT;
cos->primitive->value.value = cos_primitive;
cos->name = "cos";
auto sin = std::make_unique<mindspore::schema::CNodeT>();
sin->inputIndex = {1};
sin->outputIndex = {2};
sin->primitive = std::make_unique<mindspore::schema::PrimitiveT>();
sin->primitive->value.type = mindspore::schema::PrimitiveType_Sin;
auto sin_primitive = new mindspore::schema::SinT;
sin->primitive->value.value = sin_primitive;
sin->name = "sin";
auto add = std::make_unique<mindspore::schema::CNodeT>();
add->inputIndex = {1, 2};
add->outputIndex = {3};
add->primitive = std::make_unique<mindspore::schema::PrimitiveT>();
add->primitive->value.type = mindspore::schema::PrimitiveType_AddFusion;
auto add_primitive = new mindspore::schema::AddFusionT;
add->primitive->value.value = add_primitive;
add->name = "add";
/* tensors */
auto tensor0 = std::make_unique<mindspore::schema::TensorT>();
tensor0->nodeType = mindspore::lite::NodeType_ValueNode;
tensor0->format = mindspore::schema::Format_NHWC;
tensor0->dataType = mindspore::TypeId::kNumberTypeFloat32;
tensor0->dims = {4};
tensor0->offset = -1;
tensor0->name = "input";
auto tensor1 = std::make_unique<mindspore::schema::TensorT>();
tensor1->nodeType = mindspore::lite::NodeType_ValueNode;
tensor1->format = mindspore::schema::Format_NHWC;
tensor1->dataType = mindspore::TypeId::kNumberTypeFloat32;
tensor1->dims = {4};
tensor1->offset = -1;
tensor1->name = "cos";
auto tensor2 = std::make_unique<mindspore::schema::TensorT>();
tensor2->nodeType = mindspore::lite::NodeType_ValueNode;
tensor2->format = mindspore::schema::Format_NHWC;
tensor2->dataType = mindspore::TypeId::kNumberTypeFloat32;
tensor2->dims = {4};
tensor2->offset = -1;
tensor2->name = "sin";
auto tensor3 = std::make_unique<mindspore::schema::TensorT>();
tensor3->nodeType = mindspore::lite::NodeType_ValueNode;
tensor3->format = mindspore::schema::Format_NHWC;
tensor3->dataType = mindspore::TypeId::kNumberTypeFloat32;
tensor3->dims = {4};
tensor3->offset = -1;
tensor3->name = "add";
meta_graph->nodes.emplace_back(std::move(cos));
meta_graph->nodes.emplace_back(std::move(sin));
meta_graph->nodes.emplace_back(std::move(add));
meta_graph->allTensors.emplace_back(std::move(tensor0));
meta_graph->allTensors.emplace_back(std::move(tensor1));
meta_graph->allTensors.emplace_back(std::move(tensor2));
meta_graph->allTensors.emplace_back(std::move(tensor3));
meta_graph->inputIndex = {0};
meta_graph->outputIndex = {3};
}
TEST_F(OptAllocator, OptAllocator1) {
auto meta_graph = std::make_shared<mindspore::schema::MetaGraphT>();
CreateModel1(meta_graph.get());
flatbuffers::FlatBufferBuilder builder(1024);
auto offset = mindspore::schema::MetaGraph::Pack(builder, meta_graph.get());
builder.Finish(offset);
mindspore::schema::FinishMetaGraphBuffer(builder, offset);
size_t size = builder.GetSize();
const char *content = reinterpret_cast<char *>(builder.GetBufferPointer());
auto context = std::make_shared<mindspore::lite::Context>();
auto *lite_session = mindspore::session::LiteSession::CreateSession(content, size, context.get());
ASSERT_NE(lite_session, nullptr);
auto input = lite_session->GetInputs().front();
std::vector<float> in_data = {1.0, 2.0, 3.0, 4.0};
memcpy(input->MutableData(), in_data.data(), input->Size());
auto ret = lite_session->RunGraph();
ASSERT_EQ(mindspore::lite::RET_OK, ret);
/* checkout output */
void *out_data = lite_session->GetOutputs().begin()->second->MutableData();
float *fp32_data = reinterpret_cast<float *>(out_data);
ASSERT_LE(fabs(fp32_data[0] - (1.054698)), 0.01);
ASSERT_LE(fabs(fp32_data[1] - (-0.820386)), 0.01);
ASSERT_LE(fabs(fp32_data[2] - (-1.826014)), 0.01);
ASSERT_LE(fabs(fp32_data[3] - (-1.261727)), 0.01);
delete lite_session;
}
} // namespace mindspore

View File

@ -159,6 +159,7 @@ set(LITE_SRC
${SRC_DIR}/common/prim_util.cc
${SRC_DIR}/common/tensor_util.cc
${SRC_DIR}/runtime/inner_allocator.cc
${SRC_DIR}/runtime/optimize_allocator.cc
${SRC_DIR}/runtime/infer_manager.cc
${SRC_DIR}/runtime/runtime_pass.cc
${SRC_DIR}/inner_context.cc