From d9a33df7bc7f036c68c2cff1185c33c3a2b3d7ba Mon Sep 17 00:00:00 2001 From: wandongdong Date: Thu, 29 Oct 2020 02:32:23 -0700 Subject: [PATCH] fix multi subgraph bug --- .../kernel/opencl/subgraph_opencl_kernel.cc | 175 +++++++++++++----- .../kernel/opencl/subgraph_opencl_kernel.h | 13 ++ mindspore/lite/src/scheduler.cc | 1 - mindspore/lite/test/models_fp16_gpu.cfg | 1 + mindspore/lite/test/models_fp32_gpu.cfg | 1 + 5 files changed, 142 insertions(+), 49 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc index c02eff91a12..5182458ea35 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc @@ -26,6 +26,71 @@ using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; SubGraphOpenCLKernel::~SubGraphOpenCLKernel() { UnInit(); } +int SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToNull( + const std::vector &in_tensors, const std::vector> &in_kernels, + OpenCLMemType mem_type) { + for (size_t i = 0; i < in_tensors.size(); ++i) { + for (auto &jv : in_kernels.at(i)) { + auto tensors = (mem_type == OpenCLMemType::IMG) ? jv->in_tensors() : jv->out_tensors(); + auto ft = std::find_if(tensors.begin(), tensors.end(), + [&in_tensors, &i](lite::Tensor *kv) { return kv == in_tensors.at(i); }); + if (ft != tensors.end()) { + *ft = nullptr; + } + auto kernels = (mem_type == OpenCLMemType::IMG) ? jv->in_kernels() : jv->out_kernels(); + std::replace_if( + kernels.begin(), kernels.end(), + [this, &in_tensors, &i](kernel::LiteKernel *kv) { + return std::find_if(kv->in_tensors().begin(), kv->in_tensors().end(), + [&in_tensors, &i](lite::Tensor *xv) { return xv == in_tensors.at(i); }) != + kv->in_tensors().end() && + this->nodes_set_.count(kv) == 0; + }, + nullptr); + if (mem_type == OpenCLMemType::IMG) { + jv->set_in_tensors(tensors); + jv->SetInKernel(kernels); + } else { + jv->set_out_tensors(tensors); + jv->SetOutKernel(kernels); + } + } + } + return RET_OK; +} +int SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToConvert(const lite::Tensor *in_tensor, + const std::vector &in_kernels, + lite::Tensor *new_tensor, + kernel::LiteKernel *in_convert_op, + OpenCLMemType mem_type) { + auto in_opencl_op = reinterpret_cast(in_convert_op); + for (auto &iv : in_kernels) { + auto kernels = (mem_type == OpenCLMemType::IMG) ? iv->in_kernels() : iv->out_kernels(); + auto fk = std::find_if(kernels.begin(), kernels.end(), [&](kernel::LiteKernel *kv) { return kv == nullptr; }); + if (fk != kernels.end()) { + *fk = in_convert_op; + } else { + kernels.emplace_back(in_convert_op); + } + auto tensors = (mem_type == OpenCLMemType::IMG) ? iv->in_tensors() : iv->out_tensors(); + auto ft = std::find_if(tensors.begin(), tensors.end(), [&](lite::Tensor *kv) { return kv == nullptr; }); + if (ft != tensors.end()) { + *ft = new_tensor; + } else { + tensors.emplace_back(new_tensor); + } + if (mem_type == OpenCLMemType::IMG) { + iv->SetInKernel(kernels); + iv->set_in_tensors(tensors); + in_opencl_op->AddOutKernel(iv); + } else { + iv->SetOutKernel(kernels); + iv->set_out_tensors(tensors); + in_convert_op->AddInKernel(iv); + } + } + return RET_OK; +} int SubGraphOpenCLKernel::GenToFormatOp(const std::vector &in_tensors, const std::vector> &in_kernels, std::vector *out_tensors, @@ -36,17 +101,9 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector &in_te out_convert_ops->clear(); MS_ASSERT(in_tensors.size() == to_kernels.size()); MS_ASSERT(in_tensors.size() == from_kernels.size()); - for (auto &iv : in_kernels) { - for (auto &jv : iv) { - if (mem_type == OpenCLMemType::IMG) { - jv->set_in_tensors({}); - jv->SetInKernel({}); - } else { - jv->set_out_tensors({}); - jv->SetOutKernel({}); - } - } - } + + ReplaceOutTensorAndKernelToNull(in_tensors, in_kernels, mem_type); + for (size_t i = 0; i < in_tensors.size(); ++i) { auto dst_format = (mem_type == OpenCLMemType::IMG) ? schema::Format::Format_NHWC4 : schema::Format::Format_NHWC; auto src_format = (mem_type == OpenCLMemType::IMG) ? schema::Format::Format_NHWC : schema::Format::Format_NHWC4; @@ -100,28 +157,23 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector &in_te parameter = nullptr; return RET_ERROR; } - auto in_opencl_op = reinterpret_cast(in_convert_op); - if (mem_type == OpenCLMemType::IMG) { - for (auto &iv : in_kernels[i]) { - in_opencl_op->AddOutKernel(iv); - auto kernels = iv->in_kernels(); - kernels.emplace_back(in_convert_op); - iv->SetInKernel(kernels); + + ReplaceOutTensorAndKernelToConvert(in_tensors.at(i), in_kernels.at(i), new_tensor, in_convert_op, mem_type); + + // replace in_tensor of inner kernel which use out tensor + if (mem_type == OpenCLMemType::BUF) { + std::vector> loop_kernels; + GetKernelFromToTensor(in_tensors, nodes_, &loop_kernels, true); + for (auto &iv : loop_kernels[i]) { auto tensors = iv->in_tensors(); - tensors.emplace_back(new_tensor); - iv->set_in_tensors(tensors); - } - } else { - for (auto &iv : in_kernels[i]) { - auto kernels = iv->out_kernels(); - kernels.emplace_back(in_convert_op); - iv->SetOutKernel(kernels); - auto tensors = iv->out_tensors(); - tensors.emplace_back(new_tensor); - iv->set_out_tensors(tensors); - in_convert_op->AddInKernel(iv); + auto jv = std::find(tensors.begin(), tensors.end(), in_tensors.at(i)); + if (jv != tensors.end()) { + *jv = new_tensor; + iv->set_in_tensors(tensors); + } } } + out_convert_ops->emplace_back(in_convert_op); } return RET_OK; @@ -137,21 +189,23 @@ int SubGraphOpenCLKernel::Init() { tensor->set_allocator(allocator_); } + GetInOutNodes(); + std::vector> from_kernels_; - GetKernelFromToTensor(in_tensors_, in_kernels_, &from_kernels_, true); + GetKernelFromToTensor(in_tensors_, in_nodes_, &from_kernels_, true); int ret = GenToFormatOp(in_tensors_, from_kernels_, &in_convert_tensors_, &in_parameters_, &in_convert_ops_, OpenCLMemType::IMG); if (ret != RET_OK) { - return RET_ERROR; + return ret; } nodes_.insert(nodes_.begin(), in_convert_ops_.begin(), in_convert_ops_.end()); std::vector> to_kernels_; - GetKernelFromToTensor(out_tensors_, out_kernels_, &to_kernels_, false); + GetKernelFromToTensor(out_tensors_, out_nodes_, &to_kernels_, false); ret = GenToFormatOp(out_tensors_, to_kernels_, &out_convert_tensors_, &out_parameters_, &out_convert_ops_, OpenCLMemType::BUF); if (ret != RET_OK) { - return RET_ERROR; + return ret; } nodes_.insert(nodes_.end(), out_convert_ops_.begin(), out_convert_ops_.end()); @@ -214,20 +268,6 @@ int SubGraphOpenCLKernel::MallocTensorWithReuse() { MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << kernel->name() << " failed"; } } - for (auto kernel : in_convert_ops_) { - MS_ASSERT(nullptr != kernel); - auto ret = kernel->DecOutTensorRefCount(); - if (0 != ret) { - MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << kernel->name() << " failed"; - } - } - for (auto kernel : out_convert_ops_) { - MS_ASSERT(nullptr != kernel); - auto ret = kernel->DecOutTensorRefCount(); - if (0 != ret) { - MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << kernel->name() << " failed"; - } - } return RET_OK; } @@ -254,6 +294,45 @@ int SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector> ksets_in; + std::vector> ksets_out; + for (auto jv : nodes_) { + std::set kset; + kset.insert(jv->in_tensors().begin(), jv->in_tensors().end()); + ksets_in.emplace_back(kset); + + kset.clear(); + kset.insert(jv->out_tensors().begin(), jv->out_tensors().end()); + ksets_out.emplace_back(kset); + } + for (size_t j = 0; j < nodes_.size(); ++j) { + if (std::find_if(in_tensors_.begin(), in_tensors_.end(), + [&ksets_in, &j](lite::Tensor *val) { return ksets_in[j].count(val); }) != in_tensors_.end()) { + in_nodes_.emplace_back(nodes_.at(j)); + } + if (std::find_if(out_tensors_.begin(), out_tensors_.end(), + [&ksets_out, &j](lite::Tensor *val) { return ksets_out[j].count(val); }) != out_tensors_.end()) { + out_nodes_.emplace_back(nodes_.at(j)); + } + } + return RET_OK; +} + +int SubGraphOpenCLKernel::Prepare() { + auto ret = Init(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "OpenCL subgraph init fail"; + return ret; + } + ret = SubGraphKernel::Prepare(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "OpenCL prepare fail"; + return ret; + } + return RET_OK; +} + int SubGraphOpenCLKernel::UnInit() { for (const auto &tensor : in_convert_tensors_) { delete tensor; diff --git a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h index b74141bf5f6..02b7fb2ba7e 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h @@ -17,6 +17,7 @@ #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SUBGRAPH_OPENCL_KENEL_H_ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SUBGRAPH_OPENCL_KENEL_H_ +#include #include #include "src/runtime/kernel/opencl/opencl_kernel.h" #include "src/runtime/opencl/opencl_allocator.h" @@ -41,11 +42,13 @@ class SubGraphOpenCLKernel : public SubGraphKernel { subgraph_type_ = kGpuSubGraph; this->name_ = "GpuSubGraph"; this->executor_ = new lite::opencl::OpenCLExecutor(); + nodes_set_.insert(nodes.begin(), nodes.end()); } ~SubGraphOpenCLKernel() override; int PreProcess() override { return mindspore::lite::RET_OK; } int PostProcess() override { return mindspore::lite::RET_OK; } + int Prepare() override; int Init() override; int InferShape(); int ReSize() override; @@ -56,6 +59,13 @@ class SubGraphOpenCLKernel : public SubGraphKernel { int UnInit(); int UpdateTensorDataType(); int MallocTensorWithReuse(); + int ReplaceOutTensorAndKernelToNull(const std::vector &in_tensors, + const std::vector> &in_kernels, + OpenCLMemType mem_type); + int ReplaceOutTensorAndKernelToConvert(const lite::Tensor *in_tensor, + const std::vector &in_kernels, lite::Tensor *new_tensor, + kernel::LiteKernel *in_convert_op, OpenCLMemType mem_type); + int GetInOutNodes(); int GenToFormatOp(const std::vector &in_tensors, const std::vector> &in_kernels, std::vector *out_tensors, std::vector *out_parameters, @@ -70,6 +80,9 @@ class SubGraphOpenCLKernel : public SubGraphKernel { std::vector out_parameters_; std::vector in_convert_ops_; std::vector out_convert_ops_; + std::vector in_nodes_; + std::vector out_nodes_; + std::set nodes_set_; lite::opencl::OpenCLRuntimeWrapper ocl_runtime_wrap_; lite::opencl::OpenCLRuntime *ocl_runtime_{nullptr}; }; diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index 8bfdcb5c69a..9cb9d7fe0f8 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -234,7 +234,6 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vectorInit(); return sub_kernel; #else return nullptr; diff --git a/mindspore/lite/test/models_fp16_gpu.cfg b/mindspore/lite/test/models_fp16_gpu.cfg index ab68ceb7a68..2dc60b454e4 100644 --- a/mindspore/lite/test/models_fp16_gpu.cfg +++ b/mindspore/lite/test/models_fp16_gpu.cfg @@ -1,5 +1,6 @@ mobilenet_v1_1.0_224.tflite mobilenet_v2_1.0_224.tflite +mtk_age_gender_fp16.tflite mtk_isface.tflite mtk_landmark.tflite mtk_new_detect.tflite diff --git a/mindspore/lite/test/models_fp32_gpu.cfg b/mindspore/lite/test/models_fp32_gpu.cfg index 116b2f78a8b..545506c7d36 100644 --- a/mindspore/lite/test/models_fp32_gpu.cfg +++ b/mindspore/lite/test/models_fp32_gpu.cfg @@ -7,6 +7,7 @@ hiai_cn_recognize_modify_padv2.tflite hiai_cv_focusShootOCRModel_08.tflite hiai_model_normalize_object_scene_ps_20200519.tflite inception_v3.tflite +mtk_age_gender_fp16.tflite mtk_isface.tflite mtk_landmark.tflite mtk_new_detect.tflite