forked from mindspore-Ecosystem/mindspore
!7795 fix bug in sub_graph_kernel
Merge pull request !7795 from hangq/master
This commit is contained in:
commit
7887c6b7b1
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "src/lite_kernel.h"
|
||||
#include <algorithm>
|
||||
#include <queue>
|
||||
#include "src/tensor.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
@ -120,19 +121,19 @@ std::string LiteKernel::ToString() const {
|
|||
std::ostringstream oss;
|
||||
oss << "LiteKernel: " << this->name_;
|
||||
oss << ", Type: " << this->type_str();
|
||||
oss << std::endl << this->in_tensors_.size() << " InputTensors:";
|
||||
oss << ", " << this->in_tensors_.size() << " InputTensors:";
|
||||
for (auto tensor : in_tensors_) {
|
||||
oss << " " << tensor << ":" << tensor->ToString();
|
||||
oss << " " << tensor;
|
||||
}
|
||||
oss << std::endl << this->out_tensors_.size() << " OutputTensors:";
|
||||
oss << ", " << this->out_tensors_.size() << " OutputTensors:";
|
||||
for (auto tensor : out_tensors_) {
|
||||
oss << " " << tensor << ":" << tensor->ToString();
|
||||
oss << " " << tensor;
|
||||
}
|
||||
oss << std::endl << this->in_kernels_.size() << " InputKernels:";
|
||||
oss << ", " << this->in_kernels_.size() << " InputKernels:";
|
||||
for (auto in_kernel : in_kernels_) {
|
||||
oss << " " << in_kernel->name_;
|
||||
}
|
||||
oss << std::endl << this->out_kernels_.size() << " OutputKernels:";
|
||||
oss << ", " << this->out_kernels_.size() << " OutputKernels:";
|
||||
for (auto out_kernel : out_kernels_) {
|
||||
oss << " " << out_kernel->name_;
|
||||
}
|
||||
|
@ -239,6 +240,42 @@ std::vector<lite::Tensor *> LiteKernelUtil::SubgraphOutputTensors(const std::vec
|
|||
return output_tensors;
|
||||
}
|
||||
|
||||
int LiteKernelUtil::TopologicalSortKernels(std::vector<kernel::LiteKernel *> *kernels) {
|
||||
auto old_kernels = *kernels;
|
||||
kernels->clear();
|
||||
std::queue<kernel::LiteKernel *> kernel_queue;
|
||||
for (auto kernel : old_kernels) {
|
||||
if (kernel->in_kernels().empty()) {
|
||||
kernel_queue.push(kernel);
|
||||
kernels->emplace_back(kernel);
|
||||
}
|
||||
}
|
||||
while (!kernel_queue.empty()) {
|
||||
auto cur_kernel = kernel_queue.front();
|
||||
kernel_queue.pop();
|
||||
MS_ASSERT(cur_kernel != nullptr);
|
||||
auto next_kernels = cur_kernel->out_kernels();
|
||||
for (auto next_kernel : next_kernels) {
|
||||
auto in_kernels = next_kernel->in_kernels();
|
||||
if (lite::IsContain(*kernels, const_cast<kernel::LiteKernel *>(next_kernel))) {
|
||||
MS_LOG(ERROR) << "TopologicalSortKernels failed, loop exist";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (std::all_of(in_kernels.begin(), in_kernels.end(), [&](const kernel::LiteKernel *in_kernel) {
|
||||
return lite::IsContain(*kernels, const_cast<kernel::LiteKernel *>(in_kernel));
|
||||
})) {
|
||||
kernel_queue.push(next_kernel);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (kernels->size() != old_kernels.size()) {
|
||||
MS_LOG(ERROR) << "TopologicalSortKernels failed, kernels size before sort: " << old_kernels.size()
|
||||
<< ", kernels size after sort: " << kernels->size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void LiteKernelUtil::InitIOKernels(std::vector<kernel::LiteKernel *> &kernels) {
|
||||
for (auto *kernel : kernels) {
|
||||
// clean io kernels
|
||||
|
|
|
@ -202,6 +202,8 @@ class LiteKernelUtil {
|
|||
|
||||
static std::vector<lite::Tensor *> SubgraphOutputTensors(const std::vector<kernel::LiteKernel *> &kernels);
|
||||
|
||||
static int TopologicalSortKernels(std::vector<kernel::LiteKernel *> *kernels);
|
||||
|
||||
static void InitTensorRefCount(std::vector<kernel::LiteKernel *> &kernels);
|
||||
|
||||
static int SetInput(LiteKernel &kernelMod, std::vector<lite::Tensor *> inputs);
|
||||
|
|
|
@ -38,17 +38,21 @@ int Scheduler::Schedule(const lite::Model *model, std::vector<Tensor *> *tensors
|
|||
int ret = InferShape(model, tensors);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "op infer shape failed.";
|
||||
return RET_ERROR;
|
||||
return ret;
|
||||
}
|
||||
ret = InitOp2Kernel(model, tensors, kernels);
|
||||
ret = BuildKernels(model, tensors, kernels);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "init op to kernel failed.";
|
||||
return RET_ERROR;
|
||||
return ret;
|
||||
}
|
||||
|
||||
kernel::LiteKernelUtil::InitIOKernels(*kernels);
|
||||
|
||||
ConstructSubGraphs(kernels);
|
||||
ret = ConstructSubGraphs(kernels);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConstructSubGraphs failed.";
|
||||
return ret;
|
||||
}
|
||||
|
||||
kernel::LiteKernelUtil::InitIOKernels(*kernels);
|
||||
|
||||
|
@ -129,8 +133,8 @@ int Scheduler::InferShape(const lite::Model *model, std::vector<Tensor *> *tenso
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int Scheduler::InitOp2Kernel(const lite::Model *model, std::vector<Tensor *> *tensors,
|
||||
std::vector<kernel::LiteKernel *> *kernels) {
|
||||
int Scheduler::BuildKernels(const lite::Model *model, std::vector<Tensor *> *tensors,
|
||||
std::vector<kernel::LiteKernel *> *kernels) {
|
||||
MS_ASSERT(model != nullptr);
|
||||
MS_ASSERT(tensors != nullptr);
|
||||
uint32_t kernelCount = model->nodes_.size();
|
||||
|
@ -194,7 +198,7 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels) {
|
|||
std::vector<kernel::LiteKernel *> sub_kernels;
|
||||
std::queue<kernel::LiteKernel *> kernel_queue;
|
||||
kernel_queue.emplace(head_kernel);
|
||||
auto cur_sub_graph_type = this->GetKernelSubGraphType(head_kernel);
|
||||
auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(head_kernel);
|
||||
while (!kernel_queue.empty()) {
|
||||
auto cur_kernel = kernel_queue.front();
|
||||
kernel_queue.pop();
|
||||
|
@ -202,7 +206,7 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels) {
|
|||
sub_kernels.emplace_back(cur_kernel);
|
||||
auto post_kernels = cur_kernel->out_kernels();
|
||||
for (auto post_kernel : post_kernels) {
|
||||
if (cur_sub_graph_type == this->GetKernelSubGraphType(post_kernel)) {
|
||||
if (cur_sub_graph_type == mindspore::lite::Scheduler::GetKernelSubGraphType(post_kernel)) {
|
||||
auto post_kernel_inputs = post_kernel->in_kernels();
|
||||
if (std::all_of(post_kernel_inputs.begin(), post_kernel_inputs.end(),
|
||||
[&](kernel::LiteKernel *kernel) { return is_kernel_sinked[kernel]; })) {
|
||||
|
|
|
@ -37,8 +37,8 @@ class Scheduler {
|
|||
kernel::LiteKernel *ScheduleNode(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const mindspore::lite::PrimitiveC *primitive, const Model::Node *cnode);
|
||||
|
||||
int InitOp2Kernel(const lite::Model *model, std::vector<Tensor *> *tensors,
|
||||
std::vector<kernel::LiteKernel *> *kernels);
|
||||
int BuildKernels(const lite::Model *model, std::vector<Tensor *> *tensors,
|
||||
std::vector<kernel::LiteKernel *> *kernels);
|
||||
|
||||
static int InferShape(const lite::Model *model, std::vector<Tensor *> *tensors);
|
||||
|
||||
|
|
|
@ -44,23 +44,23 @@ int SubGraphKernel::Prepare() {
|
|||
std::string SubGraphKernel::ToString() const {
|
||||
std::ostringstream oss;
|
||||
oss << "===============================================" << std::endl << "Subgraph type : " << this->subgraph_type_;
|
||||
oss << std::endl << this->in_tensors_.size() << " InputTensors:";
|
||||
oss << std::endl << this->in_tensors_.size() << "Subgraph inputTensors:";
|
||||
for (auto tensor : in_tensors_) {
|
||||
oss << " " << tensor << ":" << tensor->ToString();
|
||||
oss << " " << tensor;
|
||||
}
|
||||
oss << std::endl << this->out_tensors_.size() << " OutputTensors:";
|
||||
oss << std::endl << this->out_tensors_.size() << "Subgraph outputTensors:";
|
||||
for (auto tensor : out_tensors_) {
|
||||
oss << " " << tensor << ":" << tensor->ToString();
|
||||
oss << " " << tensor;
|
||||
}
|
||||
oss << std::endl << "input kernels :";
|
||||
oss << std::endl << "Subgraph input kernels :" << std::endl;
|
||||
for (auto kernel : this->in_kernels_) {
|
||||
oss << " " << kernel->ToString();
|
||||
oss << " " << kernel->ToString() << std::endl;
|
||||
}
|
||||
oss << std::endl << "output kernels :";
|
||||
oss << std::endl << "Subgraph output kernels :" << std::endl;
|
||||
for (auto kernel : this->out_kernels_) {
|
||||
oss << " " << kernel->ToString();
|
||||
oss << " " << kernel->ToString() << std::endl;
|
||||
}
|
||||
oss << std::endl << nodes_.size() << " nodes :";
|
||||
oss << std::endl << nodes_.size() << " nodes in subgraph :";
|
||||
for (auto kernel : this->nodes_) {
|
||||
oss << " " << kernel->name();
|
||||
}
|
||||
|
@ -178,36 +178,18 @@ int CpuFp16SubGraph::PreProcess() {
|
|||
}
|
||||
|
||||
int CpuFp16SubGraph::PostProcess() {
|
||||
auto fp16_to_fp32_cast_func = kernel::Float16CastUtil::GetInstance()->float16_to_float32_func_;
|
||||
auto fp16_to_fp32_cast_func = Float16CastUtil::GetInstance()->float16_to_float32_func_;
|
||||
if (fp16_to_fp32_cast_func == nullptr) {
|
||||
MS_LOG(ERROR) << "Can not find cast fp16 to fp32 func";
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (auto tensor : this->out_tensors_) {
|
||||
if (tensor->data_type() == kNumberTypeFloat16) {
|
||||
void *float16_data = nullptr;
|
||||
if (this->context_ != nullptr && this->context_->allocator != nullptr) {
|
||||
float16_data = this->context_->allocator->Malloc(tensor->Size());
|
||||
} else {
|
||||
float16_data = malloc(tensor->Size());
|
||||
}
|
||||
if (float16_data == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc data failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
memcpy(float16_data, tensor->data_c(), tensor->Size());
|
||||
auto ret = tensor->FreeData();
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "free data failed";
|
||||
if (this->context_ != nullptr && this->context_->allocator != nullptr) {
|
||||
this->context_->allocator->Free(float16_data);
|
||||
} else {
|
||||
free(float16_data);
|
||||
}
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto float16_data = tensor->data_c();
|
||||
MS_ASSERT(float16_data != nullptr);
|
||||
tensor->set_data(nullptr);
|
||||
tensor->set_data_type(TypeId::kNumberTypeFloat32);
|
||||
ret = tensor->MallocData();
|
||||
auto ret = tensor->MallocData();
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "malloc data failed";
|
||||
if (this->context_ != nullptr && this->context_->allocator != nullptr) {
|
||||
|
@ -217,9 +199,10 @@ int CpuFp16SubGraph::PostProcess() {
|
|||
}
|
||||
return RET_ERROR;
|
||||
}
|
||||
MS_ASSERT(tensor->data_c() != nullptr);
|
||||
fp16_to_fp32_cast_func(float16_data, tensor->data_c(), tensor->ElementsNum());
|
||||
if (this->context_ != nullptr && this->context_->allocator != nullptr) {
|
||||
this->context_->allocator->Free(float16_data);
|
||||
if (tensor->allocator() != nullptr) {
|
||||
tensor->allocator()->Free(float16_data);
|
||||
} else {
|
||||
free(float16_data);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue