!7795 fix bug in sub_graph_kernel

Merge pull request !7795 from hangq/master
This commit is contained in:
mindspore-ci-bot 2020-10-30 17:35:37 +08:00 committed by Gitee
commit 7887c6b7b1
5 changed files with 76 additions and 50 deletions

View File

@ -16,6 +16,7 @@
#include "src/lite_kernel.h"
#include <algorithm>
#include <queue>
#include "src/tensor.h"
namespace mindspore::kernel {
@ -120,19 +121,19 @@ std::string LiteKernel::ToString() const {
std::ostringstream oss;
oss << "LiteKernel: " << this->name_;
oss << ", Type: " << this->type_str();
oss << std::endl << this->in_tensors_.size() << " InputTensors:";
oss << ", " << this->in_tensors_.size() << " InputTensors:";
for (auto tensor : in_tensors_) {
oss << " " << tensor << ":" << tensor->ToString();
oss << " " << tensor;
}
oss << std::endl << this->out_tensors_.size() << " OutputTensors:";
oss << ", " << this->out_tensors_.size() << " OutputTensors:";
for (auto tensor : out_tensors_) {
oss << " " << tensor << ":" << tensor->ToString();
oss << " " << tensor;
}
oss << std::endl << this->in_kernels_.size() << " InputKernels:";
oss << ", " << this->in_kernels_.size() << " InputKernels:";
for (auto in_kernel : in_kernels_) {
oss << " " << in_kernel->name_;
}
oss << std::endl << this->out_kernels_.size() << " OutputKernels:";
oss << ", " << this->out_kernels_.size() << " OutputKernels:";
for (auto out_kernel : out_kernels_) {
oss << " " << out_kernel->name_;
}
@ -239,6 +240,42 @@ std::vector<lite::Tensor *> LiteKernelUtil::SubgraphOutputTensors(const std::vec
return output_tensors;
}
int LiteKernelUtil::TopologicalSortKernels(std::vector<kernel::LiteKernel *> *kernels) {
auto old_kernels = *kernels;
kernels->clear();
std::queue<kernel::LiteKernel *> kernel_queue;
for (auto kernel : old_kernels) {
if (kernel->in_kernels().empty()) {
kernel_queue.push(kernel);
kernels->emplace_back(kernel);
}
}
while (!kernel_queue.empty()) {
auto cur_kernel = kernel_queue.front();
kernel_queue.pop();
MS_ASSERT(cur_kernel != nullptr);
auto next_kernels = cur_kernel->out_kernels();
for (auto next_kernel : next_kernels) {
auto in_kernels = next_kernel->in_kernels();
if (lite::IsContain(*kernels, const_cast<kernel::LiteKernel *>(next_kernel))) {
MS_LOG(ERROR) << "TopologicalSortKernels failed, loop exist";
return RET_ERROR;
}
if (std::all_of(in_kernels.begin(), in_kernels.end(), [&](const kernel::LiteKernel *in_kernel) {
return lite::IsContain(*kernels, const_cast<kernel::LiteKernel *>(in_kernel));
})) {
kernel_queue.push(next_kernel);
}
}
}
if (kernels->size() != old_kernels.size()) {
MS_LOG(ERROR) << "TopologicalSortKernels failed, kernels size before sort: " << old_kernels.size()
<< ", kernels size after sort: " << kernels->size();
return RET_ERROR;
}
return RET_OK;
}
void LiteKernelUtil::InitIOKernels(std::vector<kernel::LiteKernel *> &kernels) {
for (auto *kernel : kernels) {
// clean io kernels

View File

@ -202,6 +202,8 @@ class LiteKernelUtil {
static std::vector<lite::Tensor *> SubgraphOutputTensors(const std::vector<kernel::LiteKernel *> &kernels);
static int TopologicalSortKernels(std::vector<kernel::LiteKernel *> *kernels);
static void InitTensorRefCount(std::vector<kernel::LiteKernel *> &kernels);
static int SetInput(LiteKernel &kernelMod, std::vector<lite::Tensor *> inputs);

View File

@ -38,17 +38,21 @@ int Scheduler::Schedule(const lite::Model *model, std::vector<Tensor *> *tensors
int ret = InferShape(model, tensors);
if (ret != RET_OK) {
MS_LOG(ERROR) << "op infer shape failed.";
return RET_ERROR;
return ret;
}
ret = InitOp2Kernel(model, tensors, kernels);
ret = BuildKernels(model, tensors, kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "init op to kernel failed.";
return RET_ERROR;
return ret;
}
kernel::LiteKernelUtil::InitIOKernels(*kernels);
ConstructSubGraphs(kernels);
ret = ConstructSubGraphs(kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConstructSubGraphs failed.";
return ret;
}
kernel::LiteKernelUtil::InitIOKernels(*kernels);
@ -129,8 +133,8 @@ int Scheduler::InferShape(const lite::Model *model, std::vector<Tensor *> *tenso
return RET_OK;
}
int Scheduler::InitOp2Kernel(const lite::Model *model, std::vector<Tensor *> *tensors,
std::vector<kernel::LiteKernel *> *kernels) {
int Scheduler::BuildKernels(const lite::Model *model, std::vector<Tensor *> *tensors,
std::vector<kernel::LiteKernel *> *kernels) {
MS_ASSERT(model != nullptr);
MS_ASSERT(tensors != nullptr);
uint32_t kernelCount = model->nodes_.size();
@ -194,7 +198,7 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels) {
std::vector<kernel::LiteKernel *> sub_kernels;
std::queue<kernel::LiteKernel *> kernel_queue;
kernel_queue.emplace(head_kernel);
auto cur_sub_graph_type = this->GetKernelSubGraphType(head_kernel);
auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(head_kernel);
while (!kernel_queue.empty()) {
auto cur_kernel = kernel_queue.front();
kernel_queue.pop();
@ -202,7 +206,7 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels) {
sub_kernels.emplace_back(cur_kernel);
auto post_kernels = cur_kernel->out_kernels();
for (auto post_kernel : post_kernels) {
if (cur_sub_graph_type == this->GetKernelSubGraphType(post_kernel)) {
if (cur_sub_graph_type == mindspore::lite::Scheduler::GetKernelSubGraphType(post_kernel)) {
auto post_kernel_inputs = post_kernel->in_kernels();
if (std::all_of(post_kernel_inputs.begin(), post_kernel_inputs.end(),
[&](kernel::LiteKernel *kernel) { return is_kernel_sinked[kernel]; })) {

View File

@ -37,8 +37,8 @@ class Scheduler {
kernel::LiteKernel *ScheduleNode(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
const mindspore::lite::PrimitiveC *primitive, const Model::Node *cnode);
int InitOp2Kernel(const lite::Model *model, std::vector<Tensor *> *tensors,
std::vector<kernel::LiteKernel *> *kernels);
int BuildKernels(const lite::Model *model, std::vector<Tensor *> *tensors,
std::vector<kernel::LiteKernel *> *kernels);
static int InferShape(const lite::Model *model, std::vector<Tensor *> *tensors);

View File

@ -44,23 +44,23 @@ int SubGraphKernel::Prepare() {
std::string SubGraphKernel::ToString() const {
std::ostringstream oss;
oss << "===============================================" << std::endl << "Subgraph type : " << this->subgraph_type_;
oss << std::endl << this->in_tensors_.size() << " InputTensors:";
oss << std::endl << this->in_tensors_.size() << "Subgraph inputTensors:";
for (auto tensor : in_tensors_) {
oss << " " << tensor << ":" << tensor->ToString();
oss << " " << tensor;
}
oss << std::endl << this->out_tensors_.size() << " OutputTensors:";
oss << std::endl << this->out_tensors_.size() << "Subgraph outputTensors:";
for (auto tensor : out_tensors_) {
oss << " " << tensor << ":" << tensor->ToString();
oss << " " << tensor;
}
oss << std::endl << "input kernels :";
oss << std::endl << "Subgraph input kernels :" << std::endl;
for (auto kernel : this->in_kernels_) {
oss << " " << kernel->ToString();
oss << " " << kernel->ToString() << std::endl;
}
oss << std::endl << "output kernels :";
oss << std::endl << "Subgraph output kernels :" << std::endl;
for (auto kernel : this->out_kernels_) {
oss << " " << kernel->ToString();
oss << " " << kernel->ToString() << std::endl;
}
oss << std::endl << nodes_.size() << " nodes :";
oss << std::endl << nodes_.size() << " nodes in subgraph :";
for (auto kernel : this->nodes_) {
oss << " " << kernel->name();
}
@ -178,36 +178,18 @@ int CpuFp16SubGraph::PreProcess() {
}
int CpuFp16SubGraph::PostProcess() {
auto fp16_to_fp32_cast_func = kernel::Float16CastUtil::GetInstance()->float16_to_float32_func_;
auto fp16_to_fp32_cast_func = Float16CastUtil::GetInstance()->float16_to_float32_func_;
if (fp16_to_fp32_cast_func == nullptr) {
MS_LOG(ERROR) << "Can not find cast fp16 to fp32 func";
return RET_ERROR;
}
for (auto tensor : this->out_tensors_) {
if (tensor->data_type() == kNumberTypeFloat16) {
void *float16_data = nullptr;
if (this->context_ != nullptr && this->context_->allocator != nullptr) {
float16_data = this->context_->allocator->Malloc(tensor->Size());
} else {
float16_data = malloc(tensor->Size());
}
if (float16_data == nullptr) {
MS_LOG(ERROR) << "malloc data failed";
return RET_ERROR;
}
memcpy(float16_data, tensor->data_c(), tensor->Size());
auto ret = tensor->FreeData();
if (RET_OK != ret) {
MS_LOG(ERROR) << "free data failed";
if (this->context_ != nullptr && this->context_->allocator != nullptr) {
this->context_->allocator->Free(float16_data);
} else {
free(float16_data);
}
return RET_ERROR;
}
auto float16_data = tensor->data_c();
MS_ASSERT(float16_data != nullptr);
tensor->set_data(nullptr);
tensor->set_data_type(TypeId::kNumberTypeFloat32);
ret = tensor->MallocData();
auto ret = tensor->MallocData();
if (RET_OK != ret) {
MS_LOG(ERROR) << "malloc data failed";
if (this->context_ != nullptr && this->context_->allocator != nullptr) {
@ -217,9 +199,10 @@ int CpuFp16SubGraph::PostProcess() {
}
return RET_ERROR;
}
MS_ASSERT(tensor->data_c() != nullptr);
fp16_to_fp32_cast_func(float16_data, tensor->data_c(), tensor->ElementsNum());
if (this->context_ != nullptr && this->context_->allocator != nullptr) {
this->context_->allocator->Free(float16_data);
if (tensor->allocator() != nullptr) {
tensor->allocator()->Free(float16_data);
} else {
free(float16_data);
}