!23301 optimzie allocator for lite inference
Merge pull request !23301 from ling/sr
This commit is contained in:
commit
8d7483186f
|
@ -92,6 +92,7 @@ set(LITE_SRC
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/common/prim_util.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/common/tensor_util.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/inner_allocator.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/optimize_allocator.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/infer_manager.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tensor.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ms_tensor.cc
|
||||
|
|
|
@ -141,7 +141,7 @@ int LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *act
|
|||
for (LiteQuantParam quant : old_tensor->quant_params()) {
|
||||
new_tensor->AddQuantParam(quant);
|
||||
}
|
||||
isolate_input_map_.insert(std::make_pair(new_tensor, old_tensor));
|
||||
isolate_input_map_->insert(std::make_pair(new_tensor, old_tensor));
|
||||
ReplaceNodeInTensor(kernel_, old_tensor, new_tensor);
|
||||
/* set subgraph input for copy data */
|
||||
kernel_->set_in_tensor(new_tensor, i);
|
||||
|
@ -149,7 +149,10 @@ int LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *act
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int LiteOpActor::LiteActorInit(std::vector<std::shared_ptr<LiteOpActor>> *actors) {
|
||||
int LiteOpActor::LiteActorInit(std::vector<std::shared_ptr<LiteOpActor>> *actors,
|
||||
std::unordered_map<Tensor *, Tensor *> *input_map) {
|
||||
isolate_input_map_ = input_map;
|
||||
|
||||
/* Init output arrow */
|
||||
auto ret = CompileArrow();
|
||||
if (ret != RET_OK) {
|
||||
|
@ -175,7 +178,7 @@ int LiteOpActor::LiteActorInit(std::vector<std::shared_ptr<LiteOpActor>> *actors
|
|||
|
||||
int LiteOpActor::ResizeGraphInput(const std::vector<mindspore::tensor::MSTensor *> &inputs,
|
||||
const std::vector<std::vector<int>> &dims) {
|
||||
for (auto map : isolate_input_map_) {
|
||||
for (auto map : *isolate_input_map_) {
|
||||
auto isolate_tensor = map.first;
|
||||
auto src_tensor = map.second;
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
|
|
|
@ -51,11 +51,6 @@ class LiteOpActor : public OpActor<lite::Tensor> {
|
|||
#endif
|
||||
}
|
||||
~LiteOpActor() override {
|
||||
for (auto map : isolate_input_map_) {
|
||||
auto isolate_input_tensor = map.first;
|
||||
isolate_input_tensor->set_data(nullptr);
|
||||
delete isolate_input_tensor;
|
||||
}
|
||||
delete call_node_;
|
||||
delete partial_node_;
|
||||
}
|
||||
|
@ -69,7 +64,8 @@ class LiteOpActor : public OpActor<lite::Tensor> {
|
|||
}
|
||||
return ret;
|
||||
}
|
||||
int LiteActorInit(std::vector<std::shared_ptr<LiteOpActor>> *actors);
|
||||
int LiteActorInit(std::vector<std::shared_ptr<LiteOpActor>> *actors,
|
||||
std::unordered_map<Tensor *, Tensor *> *input_map);
|
||||
int ResizeGraphInput(const std::vector<mindspore::tensor::MSTensor *> &inputs,
|
||||
const std::vector<std::vector<int>> &dims);
|
||||
|
||||
|
@ -93,7 +89,7 @@ class LiteOpActor : public OpActor<lite::Tensor> {
|
|||
std::unordered_map<kernel::LiteKernel *, AID> subgraph_to_actor_{};
|
||||
std::vector<OpDataPtr<Tensor>> outputs_data_{};
|
||||
std::vector<Tensor *> inputs_data_{};
|
||||
std::unordered_map<Tensor *, Tensor *> isolate_input_map_{}; /* <calculate-tensor, src-input-tensor> */
|
||||
std::unordered_map<Tensor *, Tensor *> *isolate_input_map_ = nullptr; /* real obj in session */
|
||||
|
||||
private:
|
||||
void ReplaceNodeInTensor(kernel::LiteKernel *kernel, Tensor *old_tensor, Tensor *new_tensor);
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "src/kernel_registry.h"
|
||||
#include "src/lite_model.h"
|
||||
#include "src/weight_decoder.h"
|
||||
#include "src/runtime/optimize_allocator.h"
|
||||
#ifdef ENABLE_MINDRT
|
||||
#include "src/mindrt_executor.h"
|
||||
#endif
|
||||
|
@ -430,7 +431,7 @@ int LiteSession::IsolateOutputTensor() {
|
|||
}
|
||||
src_tensor->set_ref_count(1);
|
||||
|
||||
graph_output_map_.insert(std::make_pair(new_tensor, src_tensor));
|
||||
isolate_graph_output_map_.insert(std::make_pair(new_tensor, src_tensor));
|
||||
|
||||
/* set new tensor for calculate */
|
||||
for (auto subgraph : kernels_) {
|
||||
|
@ -471,6 +472,8 @@ int LiteSession::IsolateOutputTensor() {
|
|||
}
|
||||
|
||||
void LiteSession::FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kernels) {
|
||||
// For reducing runtime RAM
|
||||
// free pack-op weight because pack-op will not access origin weight in runtime
|
||||
for (auto *kernel : kernels) {
|
||||
MS_ASSERT(kernel != nullptr);
|
||||
if (kernel->subgraph_type() == kernel::kNotSubGraph) {
|
||||
|
@ -493,29 +496,14 @@ void LiteSession::FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kern
|
|||
}
|
||||
|
||||
int LiteSession::CompileGraph(Model *model) {
|
||||
bool expected = false;
|
||||
if (!is_running_.compare_exchange_strong(expected, true)) {
|
||||
MS_LOG(ERROR) << "Not support multi-threading";
|
||||
return RET_ERROR;
|
||||
}
|
||||
// model.MetaGraph ==> kernels
|
||||
if (model == nullptr) {
|
||||
MS_LOG(ERROR) << "The input model is nullptr.";
|
||||
auto ret = PreCheck(model);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "schedule check failed: " << ret;
|
||||
is_running_.store(false);
|
||||
return RET_PARAM_INVALID;
|
||||
}
|
||||
if (model->buf == nullptr) {
|
||||
MS_LOG(ERROR) << "The input model buf is nullptr.";
|
||||
is_running_.store(false);
|
||||
return RET_PARAM_INVALID;
|
||||
}
|
||||
if (!reinterpret_cast<LiteModel *>(model)->ModelVerify()) {
|
||||
MS_LOG(ERROR) << "wrong model input, please check";
|
||||
is_running_.store(false);
|
||||
return RET_ERROR;
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto ret = ConvertTensors(model);
|
||||
ret = ConvertTensors(model);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConvertTensors failed: " << ret;
|
||||
is_running_.store(false);
|
||||
|
@ -523,14 +511,10 @@ int LiteSession::CompileGraph(Model *model) {
|
|||
}
|
||||
InitGraphInputTensors(model);
|
||||
InitGraphOutputTensors(model);
|
||||
#ifndef ENABLE_FP16
|
||||
if (context_->GetCpuInfo().enable_float16_) {
|
||||
MS_LOG(WARNING) << unsupport_fp16_log;
|
||||
}
|
||||
#endif
|
||||
|
||||
// scheduler kernels
|
||||
Scheduler scheduler(context_, ms_context_, model, &tensors_, inputs_, outputs_, is_train_session_, execution_plan_,
|
||||
delegate_, delegate_device_type_);
|
||||
Scheduler scheduler(context_, ms_context_, model, &tensors_, inputs_, outputs_, is_train_session_, &is_infershape_,
|
||||
&is_control_flow_, execution_plan_, delegate_, delegate_device_type_);
|
||||
scheduler.SetupSchedulerCb(std::move(sched_cb_));
|
||||
ret = scheduler.Schedule(&kernels_);
|
||||
if (ret != RET_OK) {
|
||||
|
@ -552,33 +536,22 @@ int LiteSession::CompileGraph(Model *model) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_MINDRT
|
||||
ret = IsolateOutputTensor();
|
||||
ret = InitExecutor();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Isolate output tensor failed.";
|
||||
is_running_.store(false);
|
||||
return ret;
|
||||
}
|
||||
executor_ = new (std::nothrow) MindrtExecutor(&graph_output_map_);
|
||||
#else
|
||||
executor_ = new (std::nothrow) Executor();
|
||||
#endif
|
||||
if (executor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New Executor failed";
|
||||
is_running_.store(false);
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = executor_->Prepare(this->kernels_, this->inputs_, this->outputs_, context_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare executor failed: " << ret;
|
||||
MS_LOG(ERROR) << "InitExecutor failed: " << ret;
|
||||
is_running_.store(false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// For reducing runtime RAM, free packop weight because packop will pack weight and will not access to origin weight
|
||||
FreePackOpWeight(kernels_);
|
||||
|
||||
ret = OptimizeRuntimeAllocator();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "OptimizeRuntimeAllocator failed.";
|
||||
is_running_.store(false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
is_running_.store(false);
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -824,19 +797,25 @@ LiteSession::~LiteSession() {
|
|||
tensor = nullptr;
|
||||
}
|
||||
|
||||
for (auto item : graph_output_map_) {
|
||||
for (auto item : isolate_graph_output_map_) {
|
||||
auto isolate_output_tensor = item.first;
|
||||
isolate_output_tensor->set_data(nullptr);
|
||||
delete isolate_output_tensor;
|
||||
isolate_output_tensor = nullptr;
|
||||
}
|
||||
|
||||
for (auto map : isolate_input_map_) {
|
||||
auto isolate_input_tensor = map.first;
|
||||
isolate_input_tensor->set_data(nullptr);
|
||||
delete isolate_input_tensor;
|
||||
}
|
||||
|
||||
// Tensor * in input_map output_map are freed in tensors
|
||||
input_map_.clear();
|
||||
output_node_map_.clear();
|
||||
output_tensor_map_.clear();
|
||||
input_vec_.clear();
|
||||
graph_output_map_.clear();
|
||||
isolate_graph_output_map_.clear();
|
||||
|
||||
delete this->executor_;
|
||||
this->executor_ = nullptr;
|
||||
|
@ -986,6 +965,157 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int LiteSession::PreCheck(Model *model) {
|
||||
bool expected = false;
|
||||
if (!is_running_.compare_exchange_strong(expected, true)) {
|
||||
MS_LOG(ERROR) << "Not support multi-threading";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (model == nullptr) {
|
||||
MS_LOG(ERROR) << "The input model is nullptr.";
|
||||
return RET_PARAM_INVALID;
|
||||
}
|
||||
if (model->buf == nullptr) {
|
||||
MS_LOG(ERROR) << "The input model buf is nullptr.";
|
||||
return RET_PARAM_INVALID;
|
||||
}
|
||||
if (!reinterpret_cast<LiteModel *>(model)->ModelVerify()) {
|
||||
MS_LOG(ERROR) << "wrong model input, please check";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
#ifndef ENABLE_FP16
|
||||
if (context_->GetCpuInfo().enable_float16_) {
|
||||
MS_LOG(WARNING) << unsupport_fp16_log;
|
||||
}
|
||||
#endif
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LiteSession::InitExecutor() {
|
||||
int ret = RET_OK;
|
||||
#ifdef ENABLE_MINDRT
|
||||
ret = IsolateOutputTensor();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Isolate output tensor failed.";
|
||||
return ret;
|
||||
}
|
||||
executor_ = new (std::nothrow) MindrtExecutor(&isolate_graph_output_map_, &isolate_input_map_);
|
||||
#else
|
||||
executor_ = new (std::nothrow) Executor();
|
||||
#endif
|
||||
if (executor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New Executor failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = executor_->Prepare(kernels_, inputs_, outputs_, context_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare executor failed: " << ret;
|
||||
return ret;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LiteSession::OptimizeRuntimeAllocator() {
|
||||
return RET_OK;
|
||||
|
||||
if (is_infershape_ != RET_OK) {
|
||||
MS_LOG(ERROR) << "Not support opt allocator in runtime-infershape.";
|
||||
return RET_OK;
|
||||
}
|
||||
if (is_control_flow_ == true) {
|
||||
MS_LOG(ERROR) << "Not support opt allocator in control flow model.";
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
AllocatorPtr default_allocator = context_->allocator;
|
||||
OptAllocatorPtr optimize_allocator = std::make_shared<OptimizeAllocator>();
|
||||
std::unordered_map<lite::Tensor *, int> ref_count;
|
||||
|
||||
for (auto subgraph : kernels_) {
|
||||
if (subgraph->desc().arch != kernel::KERNEL_ARCH::kCPU) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (auto in_tensor : subgraph->in_tensors()) {
|
||||
auto iter = isolate_input_map_.find(in_tensor);
|
||||
if (isolate_input_map_.end() == iter) break;
|
||||
auto src_t = iter->second;
|
||||
|
||||
if (src_t->data_type() == in_tensor->data_type()) {
|
||||
in_tensor->set_allocator(src_t->allocator());
|
||||
ref_count[src_t] += in_tensor->init_ref_count();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (src_t->allocator() == default_allocator) {
|
||||
src_t->set_allocator(optimize_allocator);
|
||||
ref_count[src_t] = src_t->init_ref_count();
|
||||
optimize_allocator->MallocTensorData(src_t);
|
||||
}
|
||||
if (ref_count[in_tensor]-- <= 0) {
|
||||
optimize_allocator->FreeTensorData(in_tensor);
|
||||
}
|
||||
}
|
||||
|
||||
auto kernel_list = reinterpret_cast<kernel::SubGraphKernel *>(subgraph)->nodes();
|
||||
for (auto kernel : kernel_list) {
|
||||
/* malloc for output */
|
||||
for (auto tensor : kernel->out_tensors()) {
|
||||
if (tensor->IsGraphOutput() == true) {
|
||||
continue;
|
||||
}
|
||||
if (tensor->allocator() != default_allocator) {
|
||||
continue;
|
||||
}
|
||||
tensor->set_allocator(optimize_allocator);
|
||||
ref_count[tensor] = tensor->init_ref_count();
|
||||
optimize_allocator->MallocTensorData(tensor);
|
||||
}
|
||||
|
||||
/* free input after run */
|
||||
for (auto tensor : kernel->in_tensors()) {
|
||||
if (tensor->allocator() != optimize_allocator) {
|
||||
continue;
|
||||
}
|
||||
if (ref_count[tensor]-- <= 0) {
|
||||
optimize_allocator->FreeTensorData(tensor);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto ret = OptAllocatorSetData(optimize_allocator);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "using optimize allocator failed.";
|
||||
return ret;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LiteSession::OptAllocatorSetData(OptAllocatorPtr opt_allocator) {
|
||||
void *data = opt_allocator->MallocOptData();
|
||||
if (data == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc optimize data failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
int8_t *int8_data = reinterpret_cast<int8_t *>(data);
|
||||
auto offset_map = opt_allocator->GetOffsetMap();
|
||||
|
||||
for (auto tensor : tensors_) {
|
||||
if (tensor->allocator() != opt_allocator) {
|
||||
continue;
|
||||
}
|
||||
auto offset_iter = offset_map.find(tensor);
|
||||
if (offset_iter == offset_map.end()) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
tensor->set_data(int8_data + offset_iter->second);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LiteSession::InitGPURuntime() {
|
||||
if (context_->IsCpuEnabled()) {
|
||||
CpuBindMode cpu_bind_mode = context_->GetCpuDeviceInfo()->cpu_bind_mode_;
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "include/lite_session.h"
|
||||
#include "include/model.h"
|
||||
#include "src/inner_context.h"
|
||||
#include "src/runtime/optimize_allocator.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/executor.h"
|
||||
#include "src/tensor.h"
|
||||
|
@ -125,12 +126,19 @@ class LiteSession : public session::LiteSession {
|
|||
static void FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kernels);
|
||||
|
||||
private:
|
||||
int PreCheck(Model *model);
|
||||
|
||||
int InitExecutor();
|
||||
|
||||
void ResetInputsShape(const std::vector<std::vector<int>> &dims);
|
||||
|
||||
int InitGPURuntime();
|
||||
|
||||
bool IsIsolatedSubGraph(kernel::LiteKernel *kernel);
|
||||
|
||||
int OptimizeRuntimeAllocator();
|
||||
int OptAllocatorSetData(OptAllocatorPtr opt_allocator);
|
||||
|
||||
protected:
|
||||
InnerContext *context_ = nullptr;
|
||||
mindspore::Context *ms_context_ = nullptr;
|
||||
|
@ -150,7 +158,11 @@ class LiteSession : public session::LiteSession {
|
|||
std::vector<std::string> output_tensor_names_;
|
||||
// graph output tensor name -- output tensor
|
||||
std::unordered_map<std::string, mindspore::tensor::MSTensor *> output_tensor_map_;
|
||||
std::unordered_map<Tensor *, Tensor *> graph_output_map_; /* <calculate-tensor, graph-output-tensor> */
|
||||
|
||||
// graph isolate tensors
|
||||
std::unordered_map<Tensor *, Tensor *> isolate_graph_output_map_; /* <calculate-tensor, graph-output-tensor> */
|
||||
std::unordered_map<Tensor *, Tensor *> isolate_input_map_; /* <calculate-tensor, src-input-tensor> */
|
||||
|
||||
Executor *executor_ = nullptr;
|
||||
Model *model_ = nullptr;
|
||||
std::atomic<bool> is_running_ = {false};
|
||||
|
@ -159,6 +171,8 @@ class LiteSession : public session::LiteSession {
|
|||
#if GPU_OPENCL
|
||||
opencl::OpenCLRuntimeInnerWrapper *opencl_runtime_wrapper_{nullptr};
|
||||
#endif
|
||||
int is_infershape_{RET_ERROR};
|
||||
bool is_control_flow_ = false;
|
||||
std::unique_ptr<SchedulerCb> sched_cb_;
|
||||
std::shared_ptr<Delegate> delegate_ = nullptr;
|
||||
int delegate_device_type_ = -1; // -1: not specified; 0: CPU; 1: GPU; 2: NPU
|
||||
|
|
|
@ -55,13 +55,13 @@ int MindrtExecutor::PrepareOutputData(const std::vector<kernel::LiteKernel *> &k
|
|||
continue;
|
||||
}
|
||||
auto current_output_map =
|
||||
std::find_if(output_tensor_map_->begin(), output_tensor_map_->end(), [&](const auto output_map_tensor) {
|
||||
std::find_if(isolate_output_map_->begin(), isolate_output_map_->end(), [&](const auto output_map_tensor) {
|
||||
if (graph_output_tensor == output_map_tensor.second) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
MS_ASSERT(current_output_map != output_tensor_map_->end());
|
||||
MS_ASSERT(current_output_map != isolate_output_map_->end());
|
||||
Tensor *subgraph_output_tensor = current_output_map->first;
|
||||
|
||||
for (size_t j = 0; j < kernels.size(); ++j) {
|
||||
|
@ -120,7 +120,7 @@ int MindrtExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels, co
|
|||
}
|
||||
|
||||
for (auto actor : op_actors_) {
|
||||
ret = actor->LiteActorInit(&op_actors_);
|
||||
ret = actor->LiteActorInit(&op_actors_, isolate_input_map_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "LiteActorInit failed, actor aid: " << actor->GetAID();
|
||||
return ret;
|
||||
|
@ -131,7 +131,7 @@ int MindrtExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels, co
|
|||
}
|
||||
|
||||
void MindrtExecutor::TransferGraphOutput() {
|
||||
for (auto tensor_map : *output_tensor_map_) {
|
||||
for (auto tensor_map : *isolate_output_map_) {
|
||||
auto dst_tensor = tensor_map.second;
|
||||
auto src_tensor = tensor_map.first;
|
||||
dst_tensor->set_shape(src_tensor->shape());
|
||||
|
@ -151,7 +151,7 @@ void MindrtExecutor::TransferGraphOutput() {
|
|||
}
|
||||
|
||||
void MindrtExecutor::FreeOutputTensor() {
|
||||
for (auto tensor_map : *output_tensor_map_) {
|
||||
for (auto tensor_map : *isolate_output_map_) {
|
||||
auto src_tensor = tensor_map.first;
|
||||
auto dst_tensor = tensor_map.second;
|
||||
if (dst_tensor->allocator() != nullptr) {
|
||||
|
|
|
@ -29,7 +29,9 @@
|
|||
namespace mindspore::lite {
|
||||
class MindrtExecutor : public Executor {
|
||||
public:
|
||||
explicit MindrtExecutor(std::unordered_map<Tensor *, Tensor *> *output_map) : output_tensor_map_(output_map) {}
|
||||
explicit MindrtExecutor(std::unordered_map<Tensor *, Tensor *> *output_map,
|
||||
std::unordered_map<Tensor *, Tensor *> *input_map)
|
||||
: isolate_output_map_(output_map), isolate_input_map_(input_map) {}
|
||||
virtual ~MindrtExecutor() { MindrtTerminate(op_actors_); }
|
||||
|
||||
int Prepare(const std::vector<kernel::LiteKernel *> &kernels, const std::vector<Tensor *> &inputs,
|
||||
|
@ -52,7 +54,8 @@ class MindrtExecutor : public Executor {
|
|||
std::vector<std::shared_ptr<LiteOpActor>> op_actors_;
|
||||
std::vector<OpDataPtr<Tensor>> input_data_;
|
||||
std::vector<OpDataPtr<Tensor>> output_data_;
|
||||
std::unordered_map<Tensor *, Tensor *> *output_tensor_map_;
|
||||
std::unordered_map<Tensor *, Tensor *> *isolate_output_map_;
|
||||
std::unordered_map<Tensor *, Tensor *> *isolate_input_map_;
|
||||
};
|
||||
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/optimize_allocator.h"
|
||||
|
||||
namespace mindspore {
|
||||
OptimizeAllocator::OptimizeAllocator(size_t aligned_size) {
|
||||
aligned_size_ = aligned_size;
|
||||
return;
|
||||
}
|
||||
|
||||
OptimizeAllocator::~OptimizeAllocator() {
|
||||
if (data_ == nullptr) {
|
||||
free(data_);
|
||||
data_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void *OptimizeAllocator::MallocOptData() {
|
||||
if (data_ == nullptr) {
|
||||
data_ = malloc(total_size_);
|
||||
}
|
||||
return data_;
|
||||
}
|
||||
|
||||
size_t OptimizeAllocator::FindMinFree(size_t size) {
|
||||
size_t min_size = total_size_;
|
||||
size_t min_addr = total_size_;
|
||||
for (auto const &itr : free_list_) {
|
||||
if (itr.second >= size && min_size > itr.second) {
|
||||
min_size = itr.second;
|
||||
min_addr = itr.first;
|
||||
}
|
||||
}
|
||||
return min_addr;
|
||||
}
|
||||
|
||||
void OptimizeAllocator::FreeTensorData(lite::Tensor *tensor) {
|
||||
size_t offset = offset_map_[tensor];
|
||||
free_list_[offset] = used_list_[offset];
|
||||
used_list_.erase(offset);
|
||||
|
||||
size_t length = free_list_[offset];
|
||||
|
||||
size_t post_offset = offset + length;
|
||||
auto post_iter = free_list_.find(post_offset);
|
||||
if (post_iter != free_list_.end()) {
|
||||
size_t post_length = post_iter->second;
|
||||
free_list_[offset] = length + post_length;
|
||||
free_list_.erase(post_offset);
|
||||
}
|
||||
|
||||
auto pre_iter = free_list_.lower_bound(offset);
|
||||
if (pre_iter != free_list_.begin()) {
|
||||
pre_iter--;
|
||||
size_t pre_offset = pre_iter->first;
|
||||
if ((pre_offset + free_list_[pre_offset]) == offset) {
|
||||
free_list_[pre_offset] = free_list_[pre_offset] + length;
|
||||
free_list_.erase(offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OptimizeAllocator::MallocTensorData(lite::Tensor *tensor) {
|
||||
size_t size = tensor->Size();
|
||||
size_t offset = FindMinFree(size);
|
||||
|
||||
if (offset > total_size_) {
|
||||
if (free_list_.empty()) {
|
||||
offset = total_size_;
|
||||
} else {
|
||||
offset = free_list_.rbegin()->first;
|
||||
if (offset + free_list_[offset] < total_size_) {
|
||||
offset = total_size_;
|
||||
} else {
|
||||
free_list_.erase(offset);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (free_list_[offset] > size) {
|
||||
free_list_[offset + size] = free_list_[offset] - size;
|
||||
}
|
||||
free_list_.erase(offset);
|
||||
}
|
||||
|
||||
used_list_[offset] = size;
|
||||
offset_map_[tensor] = offset;
|
||||
}
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,61 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_OPTIMIZE_ALLOCATOR_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_OPTIMIZE_ALLOCATOR_H_
|
||||
|
||||
#include <memory>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include "include/api/allocator.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "src/tensor.h"
|
||||
|
||||
namespace mindspore {
|
||||
class OptimizeAllocator : public Allocator {
|
||||
public:
|
||||
explicit OptimizeAllocator(size_t aligned_size = 32);
|
||||
~OptimizeAllocator() override;
|
||||
|
||||
public:
|
||||
void *Malloc(size_t size) override { return nullptr; }
|
||||
void Free(void *ptr) override { return; }
|
||||
int RefCount(void *ptr) override { return lite::RET_OK; }
|
||||
int SetRefCount(void *ptr, int ref_count) override { return lite::RET_OK; }
|
||||
int IncRefCount(void *ptr, int ref_count) override { return lite::RET_OK; }
|
||||
int DecRefCount(void *ptr, int ref_count) override { return lite::RET_OK; }
|
||||
|
||||
public:
|
||||
void MallocTensorData(lite::Tensor *tensor);
|
||||
void FreeTensorData(lite::Tensor *tensor);
|
||||
void *MallocOptData();
|
||||
const std::unordered_map<lite::Tensor *, size_t> &GetOffsetMap() const { return offset_map_; }
|
||||
|
||||
private:
|
||||
size_t FindMinFree(size_t size);
|
||||
|
||||
private:
|
||||
void *data_ = nullptr;
|
||||
size_t total_size_;
|
||||
std::unordered_map<lite::Tensor *, size_t> offset_map_;
|
||||
std::map<size_t, size_t> free_list_; /* offset, size */
|
||||
std::map<size_t, size_t> used_list_; /* offset, size */
|
||||
};
|
||||
|
||||
using OptAllocatorPtr = std::shared_ptr<OptimizeAllocator>;
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_OPTIMIZE_ALLOCATOR_H_
|
|
@ -241,15 +241,17 @@ int Scheduler::InitKernels(std::vector<kernel::LiteKernel *> dst_kernels) {
|
|||
}
|
||||
|
||||
int Scheduler::SchedulePreProcess() {
|
||||
schema_version_ = reinterpret_cast<LiteModel *>(src_model_)->GetSchemaVersion();
|
||||
|
||||
this->graph_output_node_indexes_ = GetGraphOutputNodes(src_model_);
|
||||
|
||||
int infershape_ret = InferSubGraphShape(kMainSubGraphIndex);
|
||||
if (infershape_ret != RET_OK && infershape_ret != RET_INFER_INVALID) {
|
||||
*is_infershape_ = InferSubGraphShape(kMainSubGraphIndex);
|
||||
if (*is_infershape_ != RET_OK && *is_infershape_ != RET_INFER_INVALID) {
|
||||
MS_LOG(ERROR) << "op infer shape failed.";
|
||||
return infershape_ret;
|
||||
return *is_infershape_;
|
||||
}
|
||||
|
||||
if (context_->enable_parallel_ && infershape_ret != RET_INFER_INVALID) {
|
||||
if (context_->enable_parallel_ && *is_infershape_ != RET_INFER_INVALID) {
|
||||
#ifndef AUTO_PARALLEL_CLIP
|
||||
auto search_sub_graph =
|
||||
SearchSubGraph(context_, src_model_, src_tensors_, &op_parameters_, &graph_output_node_indexes_);
|
||||
|
@ -275,6 +277,21 @@ int Scheduler::CheckCpuValid(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
||||
#ifndef CONTROLFLOW_TENSORLIST_CLIP
|
||||
if (IsControlFlowParttern(*dst_kernels)) {
|
||||
*is_control_flow_ = true;
|
||||
return ConstructControlFlowMainGraph(dst_kernels);
|
||||
}
|
||||
#endif
|
||||
|
||||
*is_control_flow_ = false;
|
||||
auto src_kernel = *dst_kernels;
|
||||
dst_kernels->clear();
|
||||
std::map<const kernel::LiteKernel *, bool> is_kernel_finish;
|
||||
return ConstructNormalSubGraphs(src_kernel, dst_kernels, &is_kernel_finish);
|
||||
}
|
||||
|
||||
int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
||||
int check_input_ret = CheckInputParam(dst_kernels);
|
||||
if (check_input_ret != RET_OK) {
|
||||
|
@ -282,8 +299,6 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
|||
return check_input_ret;
|
||||
}
|
||||
|
||||
schema_version_ = reinterpret_cast<LiteModel *>(src_model_)->GetSchemaVersion();
|
||||
|
||||
int ret = SchedulePreProcess();
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
|
@ -307,7 +322,6 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
|||
MS_LOG(ERROR) << "Repalce delegate kernels failed.";
|
||||
return ret;
|
||||
}
|
||||
context_->thread_pool()->SetSpinCountMinValue();
|
||||
#endif
|
||||
|
||||
ret = CheckCpuValid(dst_kernels);
|
||||
|
@ -322,26 +336,11 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
|||
RuntimePass(context_, dst_kernels, src_tensors_);
|
||||
#endif
|
||||
|
||||
#ifndef CONTROLFLOW_TENSORLIST_CLIP
|
||||
if (IsControlFlowParttern(*dst_kernels)) {
|
||||
ret = ConstructControlFlowMainGraph(dst_kernels);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConstructControlFlowMainGraph failed.";
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
#endif
|
||||
auto src_kernel = *dst_kernels;
|
||||
dst_kernels->clear();
|
||||
std::map<const kernel::LiteKernel *, bool> is_kernel_finish;
|
||||
ret = ConstructSubGraphs(src_kernel, dst_kernels, &is_kernel_finish);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConstructSubGraphs failed.";
|
||||
return ret;
|
||||
}
|
||||
#ifndef CONTROLFLOW_TENSORLIST_CLIP
|
||||
ret = ConstructSubGraphs(dst_kernels);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConstructSubGraphs failed.";
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
ret = InitKernels(*dst_kernels);
|
||||
if (ret != RET_OK) {
|
||||
|
@ -457,6 +456,9 @@ int Scheduler::InitDelegateKernels(std::vector<kernel::LiteKernel *> *dst_kernel
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
/* set delegate spin count */
|
||||
context_->thread_pool()->SetSpinCountMinValue();
|
||||
|
||||
/* external delegate */
|
||||
if (delegate_device_type_ == -1) {
|
||||
auto ret = ReplaceDelegateKernels(dst_kernels);
|
||||
|
@ -1521,9 +1523,9 @@ kernel::LiteKernel *FindAllSubGraphKernels(const std::vector<kernel::LiteKernel
|
|||
}
|
||||
} // namespace
|
||||
|
||||
int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> src_kernel,
|
||||
std::vector<kernel::LiteKernel *> *dst_kernel,
|
||||
std::map<const kernel::LiteKernel *, bool> *is_kernel_finish) {
|
||||
int Scheduler::ConstructNormalSubGraphs(std::vector<kernel::LiteKernel *> src_kernel,
|
||||
std::vector<kernel::LiteKernel *> *dst_kernel,
|
||||
std::map<const kernel::LiteKernel *, bool> *is_kernel_finish) {
|
||||
if (src_kernel.empty()) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -41,9 +41,9 @@ class Scheduler {
|
|||
public:
|
||||
Scheduler(const InnerContext *ctx, const mindspore::Context *ms_ctx, Model *src_model,
|
||||
std::vector<Tensor *> *src_tensors, const std::vector<Tensor *> &input_tensors,
|
||||
const std::vector<Tensor *> &output_tensors, bool is_train_session,
|
||||
std::map<std::string, TypeId> *executions, std::shared_ptr<Delegate> delegate = nullptr,
|
||||
int delegate_device_type = -1)
|
||||
const std::vector<Tensor *> &output_tensors, bool is_train_session, int *is_infershape,
|
||||
bool *is_control_flow, std::map<std::string, TypeId> *executions,
|
||||
std::shared_ptr<Delegate> delegate = nullptr, int delegate_device_type = -1)
|
||||
: context_(ctx),
|
||||
ms_context_(ms_ctx),
|
||||
src_model_(src_model),
|
||||
|
@ -51,6 +51,8 @@ class Scheduler {
|
|||
inputs_(input_tensors),
|
||||
outputs_(output_tensors),
|
||||
is_train_session_(is_train_session),
|
||||
is_control_flow_(is_control_flow),
|
||||
is_infershape_(is_infershape),
|
||||
delegate_(delegate),
|
||||
delegate_device_type_(delegate_device_type),
|
||||
execution_plan_(executions) {}
|
||||
|
@ -102,8 +104,12 @@ class Scheduler {
|
|||
// find in_kernels_ and out_kernels of kernel, sub_graph and nodes_ in sub_graph
|
||||
static void FindAllInoutKernels(const std::vector<kernel::LiteKernel *> &kernels);
|
||||
// vector<LiteKernel/SubGraphKernel> --> vector<SubGraphKernel>
|
||||
int ConstructSubGraphs(std::vector<kernel::LiteKernel *> src_kernel, std::vector<kernel::LiteKernel *> *dst_kernel,
|
||||
std::map<const kernel::LiteKernel *, bool> *sinked_kernel_map);
|
||||
int ConstructNormalSubGraphs(std::vector<kernel::LiteKernel *> src_kernel,
|
||||
std::vector<kernel::LiteKernel *> *dst_kernel,
|
||||
std::map<const kernel::LiteKernel *, bool> *sinked_kernel_map);
|
||||
|
||||
int ConstructSubGraphs(std::vector<kernel::LiteKernel *> *dst_kernel);
|
||||
|
||||
// create subgraph_kernel from a vector of kernel
|
||||
std::vector<kernel::LiteKernel *> ScheduleMainSubGraphToKernels();
|
||||
kernel::LiteKernel *SchedulePartialToSubGraphKernel(const int &subgraph_index);
|
||||
|
@ -147,6 +153,8 @@ class Scheduler {
|
|||
std::vector<size_t> graph_output_node_indexes_;
|
||||
std::map<int, OpParameter *> op_parameters_;
|
||||
bool is_train_session_ = false;
|
||||
bool *is_control_flow_ = nullptr;
|
||||
int *is_infershape_ = nullptr;
|
||||
std::unique_ptr<SchedulerCb> sched_cb_;
|
||||
std::map<kernel::Kernel *, const schema::Primitive *> primitives_;
|
||||
std::shared_ptr<Delegate> delegate_ = nullptr;
|
||||
|
|
|
@ -22,6 +22,7 @@ file(GLOB_RECURSE TEST_UT_SRC
|
|||
${TEST_DIR}/ut/src/registry/registry_custom_op_test.cc
|
||||
${TEST_DIR}/ut/src/runtime/runtime_pass_tests.cc
|
||||
${TEST_DIR}/st/multiple_device_test.cc
|
||||
${TEST_DIR}/st/optimize_allocator_test.cc
|
||||
${TEST_DIR}/st/mindrt_parallel_runtime_test.cc
|
||||
${TEST_DIR}/st/mix_data_type_test.cc
|
||||
${TEST_DIR}/ut/nnacl/infer/*.cc
|
||||
|
|
|
@ -95,5 +95,8 @@ echo 'run custom delegate st test'
|
|||
echo 'runtime pass'
|
||||
./lite-test --gtest_filter="RuntimePass.*"
|
||||
|
||||
echo 'Optimize Allocator'
|
||||
./lite-test --gtest_filter="OptAllocator.*"
|
||||
|
||||
echo 'Runtime config file test'
|
||||
./lite-test --gtest_filter="MixDataTypeTest.Config1"
|
||||
|
|
|
@ -0,0 +1,148 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either address or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "common/common_test.h"
|
||||
#include "schema/inner/model_generated.h"
|
||||
#include "src/lite_session.h"
|
||||
#include "src/sub_graph_kernel.h"
|
||||
#include "ir/dtype/type_id.h"
|
||||
#include "include/version.h"
|
||||
#include "include/model.h"
|
||||
|
||||
namespace mindspore {
|
||||
class OptAllocator : public mindspore::CommonTest {
|
||||
public:
|
||||
OptAllocator() = default;
|
||||
};
|
||||
|
||||
void CreateModel1(mindspore::schema::MetaGraphT *meta_graph) {
|
||||
meta_graph->name = "graph";
|
||||
meta_graph->version = mindspore::lite::Version();
|
||||
|
||||
/* cos
|
||||
* / \
|
||||
* sin |
|
||||
* \ /
|
||||
* add
|
||||
* |
|
||||
* */
|
||||
|
||||
auto cos = std::make_unique<mindspore::schema::CNodeT>();
|
||||
cos->inputIndex = {0};
|
||||
cos->outputIndex = {1};
|
||||
cos->primitive = std::make_unique<mindspore::schema::PrimitiveT>();
|
||||
cos->primitive->value.type = mindspore::schema::PrimitiveType_Cos;
|
||||
auto cos_primitive = new mindspore::schema::CosT;
|
||||
cos->primitive->value.value = cos_primitive;
|
||||
cos->name = "cos";
|
||||
|
||||
auto sin = std::make_unique<mindspore::schema::CNodeT>();
|
||||
sin->inputIndex = {1};
|
||||
sin->outputIndex = {2};
|
||||
sin->primitive = std::make_unique<mindspore::schema::PrimitiveT>();
|
||||
sin->primitive->value.type = mindspore::schema::PrimitiveType_Sin;
|
||||
auto sin_primitive = new mindspore::schema::SinT;
|
||||
sin->primitive->value.value = sin_primitive;
|
||||
sin->name = "sin";
|
||||
|
||||
auto add = std::make_unique<mindspore::schema::CNodeT>();
|
||||
add->inputIndex = {1, 2};
|
||||
add->outputIndex = {3};
|
||||
add->primitive = std::make_unique<mindspore::schema::PrimitiveT>();
|
||||
add->primitive->value.type = mindspore::schema::PrimitiveType_AddFusion;
|
||||
auto add_primitive = new mindspore::schema::AddFusionT;
|
||||
add->primitive->value.value = add_primitive;
|
||||
add->name = "add";
|
||||
|
||||
/* tensors */
|
||||
auto tensor0 = std::make_unique<mindspore::schema::TensorT>();
|
||||
tensor0->nodeType = mindspore::lite::NodeType_ValueNode;
|
||||
tensor0->format = mindspore::schema::Format_NHWC;
|
||||
tensor0->dataType = mindspore::TypeId::kNumberTypeFloat32;
|
||||
tensor0->dims = {4};
|
||||
tensor0->offset = -1;
|
||||
tensor0->name = "input";
|
||||
|
||||
auto tensor1 = std::make_unique<mindspore::schema::TensorT>();
|
||||
tensor1->nodeType = mindspore::lite::NodeType_ValueNode;
|
||||
tensor1->format = mindspore::schema::Format_NHWC;
|
||||
tensor1->dataType = mindspore::TypeId::kNumberTypeFloat32;
|
||||
tensor1->dims = {4};
|
||||
tensor1->offset = -1;
|
||||
tensor1->name = "cos";
|
||||
|
||||
auto tensor2 = std::make_unique<mindspore::schema::TensorT>();
|
||||
tensor2->nodeType = mindspore::lite::NodeType_ValueNode;
|
||||
tensor2->format = mindspore::schema::Format_NHWC;
|
||||
tensor2->dataType = mindspore::TypeId::kNumberTypeFloat32;
|
||||
tensor2->dims = {4};
|
||||
tensor2->offset = -1;
|
||||
tensor2->name = "sin";
|
||||
|
||||
auto tensor3 = std::make_unique<mindspore::schema::TensorT>();
|
||||
tensor3->nodeType = mindspore::lite::NodeType_ValueNode;
|
||||
tensor3->format = mindspore::schema::Format_NHWC;
|
||||
tensor3->dataType = mindspore::TypeId::kNumberTypeFloat32;
|
||||
tensor3->dims = {4};
|
||||
tensor3->offset = -1;
|
||||
tensor3->name = "add";
|
||||
|
||||
meta_graph->nodes.emplace_back(std::move(cos));
|
||||
meta_graph->nodes.emplace_back(std::move(sin));
|
||||
meta_graph->nodes.emplace_back(std::move(add));
|
||||
|
||||
meta_graph->allTensors.emplace_back(std::move(tensor0));
|
||||
meta_graph->allTensors.emplace_back(std::move(tensor1));
|
||||
meta_graph->allTensors.emplace_back(std::move(tensor2));
|
||||
meta_graph->allTensors.emplace_back(std::move(tensor3));
|
||||
|
||||
meta_graph->inputIndex = {0};
|
||||
meta_graph->outputIndex = {3};
|
||||
}
|
||||
|
||||
TEST_F(OptAllocator, OptAllocator1) {
|
||||
auto meta_graph = std::make_shared<mindspore::schema::MetaGraphT>();
|
||||
CreateModel1(meta_graph.get());
|
||||
|
||||
flatbuffers::FlatBufferBuilder builder(1024);
|
||||
auto offset = mindspore::schema::MetaGraph::Pack(builder, meta_graph.get());
|
||||
builder.Finish(offset);
|
||||
mindspore::schema::FinishMetaGraphBuffer(builder, offset);
|
||||
size_t size = builder.GetSize();
|
||||
const char *content = reinterpret_cast<char *>(builder.GetBufferPointer());
|
||||
|
||||
auto context = std::make_shared<mindspore::lite::Context>();
|
||||
auto *lite_session = mindspore::session::LiteSession::CreateSession(content, size, context.get());
|
||||
ASSERT_NE(lite_session, nullptr);
|
||||
|
||||
auto input = lite_session->GetInputs().front();
|
||||
std::vector<float> in_data = {1.0, 2.0, 3.0, 4.0};
|
||||
memcpy(input->MutableData(), in_data.data(), input->Size());
|
||||
|
||||
auto ret = lite_session->RunGraph();
|
||||
ASSERT_EQ(mindspore::lite::RET_OK, ret);
|
||||
|
||||
/* checkout output */
|
||||
void *out_data = lite_session->GetOutputs().begin()->second->MutableData();
|
||||
float *fp32_data = reinterpret_cast<float *>(out_data);
|
||||
|
||||
ASSERT_LE(fabs(fp32_data[0] - (1.054698)), 0.01);
|
||||
ASSERT_LE(fabs(fp32_data[1] - (-0.820386)), 0.01);
|
||||
ASSERT_LE(fabs(fp32_data[2] - (-1.826014)), 0.01);
|
||||
ASSERT_LE(fabs(fp32_data[3] - (-1.261727)), 0.01);
|
||||
|
||||
delete lite_session;
|
||||
}
|
||||
} // namespace mindspore
|
|
@ -159,6 +159,7 @@ set(LITE_SRC
|
|||
${SRC_DIR}/common/prim_util.cc
|
||||
${SRC_DIR}/common/tensor_util.cc
|
||||
${SRC_DIR}/runtime/inner_allocator.cc
|
||||
${SRC_DIR}/runtime/optimize_allocator.cc
|
||||
${SRC_DIR}/runtime/infer_manager.cc
|
||||
${SRC_DIR}/runtime/runtime_pass.cc
|
||||
${SRC_DIR}/inner_context.cc
|
||||
|
|
Loading…
Reference in New Issue