diff --git a/mindspore/lite/src/lite_kernel.cc b/mindspore/lite/src/lite_kernel.cc index 6227f71ce47..1211299c784 100644 --- a/mindspore/lite/src/lite_kernel.cc +++ b/mindspore/lite/src/lite_kernel.cc @@ -45,7 +45,7 @@ void LiteKernel::FreeWorkspace() { bool LiteKernel::IsReady(const std::vector &scope_tensors) { return std::all_of(this->in_tensors().begin(), this->in_tensors().end(), [&](lite::Tensor *kernel_in_tensor) { if (IsContain(scope_tensors, kernel_in_tensor)) { - return (kernel_in_tensor->IsConst() || kernel_in_tensor->ref_count() >= 1); + return (kernel_in_tensor->IsConst() || kernel_in_tensor->IsGraphInput() || kernel_in_tensor->ref_count() >= 1); } else { return true; } @@ -54,7 +54,7 @@ bool LiteKernel::IsReady(const std::vector &scope_tensors) { void LiteKernel::InitOutTensorInitRefCount() { for (auto *tensor : this->out_tensors_) { - int init_ref_count = 0; + size_t init_ref_count = 0; for (auto *post_kernel : this->out_kernels_) { init_ref_count += std::count_if(post_kernel->in_tensors_.begin(), post_kernel->in_tensors_.end(), @@ -81,7 +81,7 @@ int LiteKernel::DecOutTensorRefCount() { int LiteKernel::FreeInWorkTensor() const { for (auto &in_tensor : this->in_tensors_) { MS_ASSERT(in_tensor != nullptr); - if (in_tensor->IsConst()) { + if (in_tensor->IsConst() || in_tensor->IsGraphInput()) { continue; } MS_ASSERT(in_tensor->ref_count() > 0); @@ -220,19 +220,18 @@ void LiteKernel::FindInoutKernels(const std::vector &scope } } -std::vector LiteKernelUtil::SubgraphInputKernels( - const std::vector &kernels) { - std::set input_kernels; +std::vector LiteKernelUtil::SubgraphInputNodes(const std::vector &kernels) { + std::set input_nodes; for (const auto &kernel : kernels) { // if kernel has no pre-kernel, kernel is a graph input, it must be a subgraph input if (kernel->in_kernels().empty() && !kernel->in_tensors().empty()) { - input_kernels.insert(kernel); + input_nodes.insert(kernel); continue; } auto all_input_tensors = kernel->in_tensors(); // remove all const tensor from input tensors for (auto iter = all_input_tensors.begin(); iter != all_input_tensors.end();) { - if ((*iter)->IsConst()) { + if ((*iter)->IsConst() || (*iter)->IsGraphInput()) { iter = all_input_tensors.erase(iter); } else { iter++; @@ -249,83 +248,76 @@ std::vector LiteKernelUtil::SubgraphInputKernels( } // if some input tensor is not from kernel in subgraph if (!all_input_tensors.empty()) { - input_kernels.insert(kernel); + input_nodes.insert(kernel); } } std::vector result; - result.insert(result.end(), input_kernels.begin(), input_kernels.end()); + result.insert(result.end(), input_nodes.begin(), input_nodes.end()); return result; } -std::vector LiteKernelUtil::SubgraphOutputKernels( +std::vector LiteKernelUtil::SubgraphOutputNodes( const std::vector &kernels) { - std::set output_kernels; + std::set output_nodes; // if kernel has no post-kernel, kernel is a graph output, it must be a subgraph output for (const auto &kernel : kernels) { if (kernel->is_model_output() || (kernel->out_kernels().empty() && !kernel->out_tensors().empty())) { - output_kernels.insert(kernel); + output_nodes.insert(kernel); continue; } for (const auto &output : kernel->out_kernels()) { auto out_kernel_in_graph = std::find(kernels.begin(), kernels.end(), output); if (out_kernel_in_graph == kernels.end()) { - output_kernels.insert(kernel); + output_nodes.insert(kernel); break; } } } std::vector result; - result.insert(result.end(), output_kernels.begin(), output_kernels.end()); + result.insert(result.end(), output_nodes.begin(), output_nodes.end()); return result; } std::vector LiteKernelUtil::SubgraphInputTensors(const std::vector &kernels) { - std::vector input_tensors; - std::vector input_kernels = SubgraphInputKernels(kernels); - for (const auto &input_kernel : input_kernels) { - auto &outer_in_kernels = input_kernel->in_kernels(); - auto &in_kernel_in_tensors = input_kernel->in_tensors(); - if (outer_in_kernels.empty()) { - for (auto &in_kernel_in_tensor : in_kernel_in_tensors) { - if (!in_kernel_in_tensor->IsConst()) { - if (!IsContain(input_tensors, in_kernel_in_tensor)) { - input_tensors.push_back(in_kernel_in_tensor); - } - } + std::set input_tensors; + std::vector input_nodes = SubgraphInputNodes(kernels); + for (const auto &input_node : input_nodes) { + auto &in_node_in_kernels = input_node->in_kernels(); + auto &in_node_in_tensors = input_node->in_tensors(); + for (auto &in_node_in_tensor : in_node_in_tensors) { + if (in_node_in_tensor->IsGraphInput()) { + input_tensors.insert(in_node_in_tensor); } - continue; } - for (auto outer_in_kernel : outer_in_kernels) { - auto iter = std::find(kernels.begin(), kernels.end(), outer_in_kernel); + for (auto in_node_in_kernel : in_node_in_kernels) { + auto iter = std::find(kernels.begin(), kernels.end(), in_node_in_kernel); if (iter != kernels.end()) { continue; } - auto &outer_in_kernel_out_tensors = outer_in_kernel->out_tensors(); - for (auto in_kernel_in_tensor : in_kernel_in_tensors) { + auto &outer_in_kernel_out_tensors = in_node_in_kernel->out_tensors(); + for (auto in_node_in_tensor : in_node_in_tensors) { auto outer_in_kernel_out_tensors_iter = - std::find(outer_in_kernel_out_tensors.begin(), outer_in_kernel_out_tensors.end(), in_kernel_in_tensor); + std::find(outer_in_kernel_out_tensors.begin(), outer_in_kernel_out_tensors.end(), in_node_in_tensor); if (outer_in_kernel_out_tensors_iter != outer_in_kernel_out_tensors.end()) { - if (!IsContain(input_tensors, in_kernel_in_tensor)) { - input_tensors.emplace_back(in_kernel_in_tensor); - } + input_tensors.insert(in_node_in_tensor); } } } } - return input_tensors; + std::vector result; + result.insert(result.end(), input_tensors.begin(), input_tensors.end()); + return result; } std::vector LiteKernelUtil::SubgraphOutputTensors(const std::vector &kernels) { - std::vector output_tensors; - std::vector output_kernels = SubgraphOutputKernels(kernels); - for (const auto &output_kernel : output_kernels) { + std::set output_tensors; + std::vector output_nodes = SubgraphOutputNodes(kernels); + for (const auto &output_kernel : output_nodes) { auto &outer_out_kernels = output_kernel->out_kernels(); auto &out_kernel_out_tensors = output_kernel->out_tensors(); if (outer_out_kernels.empty()) { for (auto out_kernel_out_tensor : out_kernel_out_tensors) { - if (!IsContain(output_tensors, out_kernel_out_tensor)) { - output_tensors.push_back(out_kernel_out_tensor); - } + output_tensors.insert(out_kernel_out_tensor); } continue; } @@ -339,14 +331,14 @@ std::vector LiteKernelUtil::SubgraphOutputTensors(const std::vec auto outer_out_kernel_in_tensors_iter = std::find(outer_out_kernel_in_tensors.begin(), outer_out_kernel_in_tensors.end(), out_kernel_out_tensor); if (outer_out_kernel_in_tensors_iter != outer_out_kernel_in_tensors.end()) { - if (!IsContain(output_tensors, out_kernel_out_tensor)) { - output_tensors.emplace_back(out_kernel_out_tensor); - } + output_tensors.insert(out_kernel_out_tensor); } } } } - return output_tensors; + std::vector result; + result.insert(result.end(), output_tensors.begin(), output_tensors.end()); + return result; } int LiteKernelUtil::TopologicalSortKernels(std::vector *kernels) { diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h index ad84737c90e..9e79e7ac4d5 100644 --- a/mindspore/lite/src/lite_kernel.h +++ b/mindspore/lite/src/lite_kernel.h @@ -56,7 +56,6 @@ enum SubGraphType { kNotSubGraph = 0, kCpuFP32SubGraph, kCpuFP16SubGraph, kGpuSu class LiteKernel { public: LiteKernel() = default; - // parameter should be deleted or freed by caller, and should be deleted or freed after LiteKernel is deleted LiteKernel(OpParameter *parameter, std::vector in_tensors, std::vector out_tensors, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) : op_parameter_(parameter), @@ -214,9 +213,9 @@ typedef LiteKernel *(*KernelCreator)(const std::vector &inputs, class LiteKernelUtil { public: - static std::vector SubgraphInputKernels(const std::vector &kernels); + static std::vector SubgraphInputNodes(const std::vector &kernels); - static std::vector SubgraphOutputKernels(const std::vector &kernels); + static std::vector SubgraphOutputNodes(const std::vector &kernels); static std::vector SubgraphInputTensors(const std::vector &kernels); diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc index e1019b5de07..96c16b7c92e 100644 --- a/mindspore/lite/src/lite_session.cc +++ b/mindspore/lite/src/lite_session.cc @@ -148,9 +148,6 @@ lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) { } else { dst_tensor = new (std::nothrow) Tensor(TypeId(src_tensor.dataType()), shape, src_tensor.format(), src_category); } - if (dst_tensor == nullptr) { - return nullptr; - } return dst_tensor; } @@ -158,6 +155,8 @@ int LiteSession::ConvertTensors(const lite::Model *model) { MS_ASSERT(model != nullptr); copyed_tensor_idxes_.clear(); uint32_t tensor_count = model->all_tensors_.size(); + MS_ASSERT(!model->sub_graphs_.empty()); + auto model_input_indices = model->sub_graphs_.front()->input_indices_; for (uint32_t i = 0; i < tensor_count; ++i) { auto *src_tensor = model->all_tensors_[i]; if (src_tensor == nullptr) { @@ -176,6 +175,9 @@ int LiteSession::ConvertTensors(const lite::Model *model) { return ret; } ConvertTensorsQuantParam(src_tensor, dst_tensor); + if (IsContain(model_input_indices, i)) { + dst_tensor->set_category(Tensor::GRAPH_INPUT); + } this->tensors_.emplace_back(dst_tensor); } return RET_OK; @@ -329,6 +331,9 @@ void LiteSession::InitGraphInOutTensors(const lite::Model *model) { InitGraphOutputNodeMap(model); InitGraphOutputTensorNames(model); InitGraphOutputTensorMap(model); + for (auto *tensor : this->inputs_) { + tensor->set_category(Tensor::Category::GRAPH_INPUT); + } } int LiteSession::CompileGraph(Model *model) { @@ -398,11 +403,6 @@ int LiteSession::PrepareKernels(Model *model) { // find in_kernels and out_kernels for subgraphs for (auto kernel : this->kernels_) { kernel->FindInoutKernels(this->kernels_); - auto ret = kernel->Prepare(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret; - return ret; - } auto sub_graph = reinterpret_cast(kernel); MS_ASSERT(sub_graph != nullptr); auto kernel_in_subgraph = sub_graph->nodes(); @@ -417,6 +417,13 @@ int LiteSession::PrepareKernels(Model *model) { kernel->InitOutTensorInitRefCount(); } AdjustModelOutputTensorInitRefCount(model); + for (auto kernel : this->kernels_) { + auto ret = kernel->Prepare(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret; + return ret; + } + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/merge.cc b/mindspore/lite/src/runtime/kernel/arm/base/merge.cc index ae3a25477d5..ad54decf621 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/merge.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/merge.cc @@ -28,7 +28,7 @@ namespace mindspore::kernel { int MergeCPUKernel::FreeInWorkTensor() const { for (auto &in_tensor : this->in_tensors_) { MS_ASSERT(in_tensor != nullptr); - if (in_tensor->IsConst()) { + if (in_tensor->IsConst() || in_tensor->IsGraphInput()) { continue; } if (in_tensor->ref_count() > 0) { @@ -50,11 +50,13 @@ bool MergeCPUKernel::IsReady(const std::vector &scope_tensors) { MS_ASSERT(in_tensors().size() == 2 * out_tensors().size()); return std::all_of(this->in_tensors().begin(), this->in_tensors().begin() + in_tensors().size() / 2, [&](lite::Tensor *kernel_in_tensor) { - return kernel_in_tensor->IsConst() || kernel_in_tensor->ref_count() >= 1; + return kernel_in_tensor->IsConst() || kernel_in_tensor->IsGraphInput() || + kernel_in_tensor->ref_count() >= 1; }) || std::all_of(this->in_tensors().begin() + in_tensors().size() / 2, this->in_tensors().end(), [&](lite::Tensor *kernel_in_tensor) { - return kernel_in_tensor->IsConst() || kernel_in_tensor->ref_count() >= 1; + return kernel_in_tensor->IsConst() || kernel_in_tensor->IsGraphInput() || + kernel_in_tensor->ref_count() >= 1; }); } diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc index cad53923c51..cea8eb7fb5c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc @@ -223,19 +223,7 @@ int OpenCLSubGraph::Init() { nodes_.insert(nodes_.end(), out_convert_ops_.begin(), out_convert_ops_.end()); GetInOutNodes(); UpdateTensorDataType(); - - ret = SubGraphKernel::Prepare(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "OpenCL prepare fail"; - return ret; - } - auto opencl_exec = reinterpret_cast(executor_); - // If tuning_mode is DEFAULT, just malloc memory for reuse. - ret = opencl_exec->RunOrTune(in_tensors_, out_tensors_, nodes_, allocator_, nullptr, nullptr, true); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Run opencl executor failed: " << ret; - return ret; - } + Fusion(); return RET_OK; } @@ -307,10 +295,16 @@ int OpenCLSubGraph::Prepare() { MS_LOG(ERROR) << "Create OpenCLExecutor fail"; return RET_ERROR; } - Fusion(); - auto ret = Init(); + auto ret = SubGraphKernel::Prepare(); if (ret != RET_OK) { - MS_LOG(ERROR) << "OpenCL subgraph init fail"; + MS_LOG(ERROR) << "OpenCL prepare fail"; + return ret; + } + auto opencl_exec = reinterpret_cast(executor_); + // If tuning_mode is DEFAULT, just malloc memory for reuse. + ret = opencl_exec->RunOrTune(in_tensors_, out_tensors_, nodes_, allocator_, nullptr, nullptr, true); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Run opencl executor failed: " << ret; return ret; } return RET_OK; diff --git a/mindspore/lite/src/runtime/opencl/opencl_executor.cc b/mindspore/lite/src/runtime/opencl/opencl_executor.cc index 0c6f9c76f8c..046e76f0f78 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_executor.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_executor.cc @@ -79,6 +79,11 @@ int OpenCLExecutor::RunOrTune(std::vector &inputs, std::vectorname(); return ret; } + ret = kernel->PostProcess(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "PostProcess kernel failed, name: " << kernel->name(); + return ret; + } } else { ret = kernel->Run(); if (ret != RET_OK) { diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index 3282708f1c5..d52fd701ade 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -375,6 +375,13 @@ int Scheduler::ConstructSubGraphs(std::vector *kernels) { } kernels->emplace_back(subgraph); } + for (auto *subgraph : *kernels) { + auto ret = subgraph->Init(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Init SubGraph failed: " << ret; + return ret; + } + } return RET_OK; } bool Scheduler::MergeOpIsReady(const kernel::LiteKernel *kernel, @@ -407,12 +414,16 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector input_tensors = kernel::LiteKernelUtil::SubgraphInputTensors(kernels); std::vector output_tensors = kernel::LiteKernelUtil::SubgraphOutputTensors(kernels); - std::vector input_kernels = kernel::LiteKernelUtil::SubgraphInputKernels(kernels); - std::vector output_kernels = kernel::LiteKernelUtil::SubgraphOutputKernels(kernels); + std::vector input_kernels = kernel::LiteKernelUtil::SubgraphInputNodes(kernels); + std::vector output_kernels = kernel::LiteKernelUtil::SubgraphOutputNodes(kernels); if (type == kernel::kGpuSubGraph) { #if SUPPORT_GPU auto sub_kernel = new (std::nothrow) kernel::OpenCLSubGraph(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_); + if (sub_kernel == nullptr) { + MS_LOG(ERROR) << "Create OpenCLSubGraph failed"; + return nullptr; + } return sub_kernel; #else return nullptr; diff --git a/mindspore/lite/src/sub_graph_kernel.h b/mindspore/lite/src/sub_graph_kernel.h index d829c104702..f91a95c173d 100644 --- a/mindspore/lite/src/sub_graph_kernel.h +++ b/mindspore/lite/src/sub_graph_kernel.h @@ -102,6 +102,8 @@ class SubGraphKernel : public LiteKernel { void InitOutTensorInitRefCount() override; + int Init() override { return mindspore::lite::RET_OK; } + std::string ToString() const override; std::vector nodes() { return this->nodes_; } @@ -146,7 +148,7 @@ class CpuFp32SubGraph : public CpuSubGraph { } ~CpuFp32SubGraph() override = default; - int Init() override { return mindspore::lite::RET_ERROR; } + int Init() override { return CpuSubGraph::Init(); } int PreProcess() override { return CpuSubGraph::PreProcess(); } int Run() override { return CpuSubGraph::Run(); } int Run(const KernelCallBack &before, const KernelCallBack &after) override { @@ -166,7 +168,7 @@ class CpuFp16SubGraph : public CpuSubGraph { } ~CpuFp16SubGraph() override = default; - int Init() override { return mindspore::lite::RET_ERROR; } + int Init() override { return CpuSubGraph::Init(); } int PreProcess() override; int Run() override { return CpuSubGraph::Run(); } int Run(const KernelCallBack &before, const KernelCallBack &after) override { diff --git a/mindspore/lite/src/tensor.cc b/mindspore/lite/src/tensor.cc index 23d798ed868..6b919093b6f 100644 --- a/mindspore/lite/src/tensor.cc +++ b/mindspore/lite/src/tensor.cc @@ -328,12 +328,6 @@ void *Tensor::MutableData() { return this->data_; } -bool Tensor::IsConst() { - return (this->category_ == CONST_TENSOR || this->category_ == CONST_SCALAR) && this->data_ != nullptr; -} - -bool Tensor::IsScalar() { return this->category_ == CONST_SCALAR && this->data_ != nullptr; } - void Tensor::AddQuantParam(const QuantArg &quant_arg) { this->quant_params_.push_back(quant_arg); } std::vector Tensor::quant_params() const { return this->quant_params_; } diff --git a/mindspore/lite/src/tensor.h b/mindspore/lite/src/tensor.h index 6a1c8a33895..c146d044037 100644 --- a/mindspore/lite/src/tensor.h +++ b/mindspore/lite/src/tensor.h @@ -45,7 +45,8 @@ class Tensor : public mindspore::tensor::MSTensor { enum Category { CONST_TENSOR, // weight tensor CONST_SCALAR, // weight scalar - VAR // activation tensor + VAR, // activation tensor + GRAPH_INPUT, }; Tensor() = default; @@ -102,11 +103,13 @@ class Tensor : public mindspore::tensor::MSTensor { virtual void set_data(void *data) { this->data_ = data; } - Category category() { return this->category_; } + Category category() const { return this->category_; } + + void set_category(Category category) { this->category_ = category; } void set_format(schema::Format format) { this->format_ = format; } - schema::Format format() { return this->format_; } + schema::Format format() const { return this->format_; } size_t ref_count() const { return this->ref_count_; } @@ -130,9 +133,13 @@ class Tensor : public mindspore::tensor::MSTensor { void set_quant_clusters(const std::vector &clusters); - bool IsConst(); + bool IsConst() const { + return (this->category_ == CONST_TENSOR || this->category_ == CONST_SCALAR) && this->data_ != nullptr; + } - bool IsScalar(); + bool IsScalar() const { return this->category_ == CONST_SCALAR && this->data_ != nullptr; } + + bool IsGraphInput() const { return this->category_ == GRAPH_INPUT; } void Prepare() { if (allocator_ != nullptr) { diff --git a/mindspore/lite/test/ut/src/utils_test.cc b/mindspore/lite/test/ut/src/utils_test.cc index d2a358f2a2e..6d7e8e352a5 100644 --- a/mindspore/lite/test/ut/src/utils_test.cc +++ b/mindspore/lite/test/ut/src/utils_test.cc @@ -56,9 +56,9 @@ TEST_F(UtilsTest, TestSubgraph) { std::vector kernels = {kernel0.get(), kernel1.get(), kernel2.get()}; - auto input_kernels = kernel::LiteKernelUtil::SubgraphInputKernels(kernels); + auto input_kernels = kernel::LiteKernelUtil::SubgraphInputNodes(kernels); ASSERT_EQ(input_kernels.size(), 1); - auto output_kernels = kernel::LiteKernelUtil::SubgraphOutputKernels(kernels); + auto output_kernels = kernel::LiteKernelUtil::SubgraphOutputNodes(kernels); ASSERT_EQ(output_kernels.size(), 1); auto input_tensors = kernel::LiteKernelUtil::SubgraphInputTensors(kernels); ASSERT_EQ(input_tensors.size(), 2);