forked from mindspore-Ecosystem/mindspore
[MSLITE][DEVELOP] fix bug of npu, add npu testcase
This commit is contained in:
parent
9e27ca929a
commit
7d1ae1d5c3
|
@ -38,10 +38,28 @@ int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsSameShapeTensor(Tensor *tensor, std::shared_ptr<hiai::AiTensor> npu_tensor) {
|
bool IsSameShapeTensor(Tensor *tensor, std::shared_ptr<hiai::AiTensor> npu_tensor) {
|
||||||
return tensor->Batch() == npu_tensor->GetTensorDimension().GetNumber() &&
|
if (tensor->shape().size() == 4) {
|
||||||
tensor->Channel() == npu_tensor->GetTensorDimension().GetChannel() &&
|
return tensor->Batch() == npu_tensor->GetTensorDimension().GetNumber() &&
|
||||||
tensor->Height() == npu_tensor->GetTensorDimension().GetHeight() &&
|
tensor->Channel() == npu_tensor->GetTensorDimension().GetChannel() &&
|
||||||
tensor->Width() == npu_tensor->GetTensorDimension().GetWidth();
|
tensor->Height() == npu_tensor->GetTensorDimension().GetHeight() &&
|
||||||
|
tensor->Width() == npu_tensor->GetTensorDimension().GetWidth();
|
||||||
|
}
|
||||||
|
if (tensor->shape().size() > 4) {
|
||||||
|
MS_LOG(ERROR) << "Npu doesn't support input tensor dims greater than 4";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
std::vector<int> npu_shape;
|
||||||
|
auto dim = tensor->shape().size();
|
||||||
|
if (dim > 0) {
|
||||||
|
npu_shape.push_back(npu_tensor->GetTensorDimension().GetNumber());
|
||||||
|
}
|
||||||
|
if (dim > 1) {
|
||||||
|
npu_shape.push_back(npu_tensor->GetTensorDimension().GetChannel());
|
||||||
|
}
|
||||||
|
if (dim > 2) {
|
||||||
|
npu_shape.push_back(npu_tensor->GetTensorDimension().GetWidth());
|
||||||
|
}
|
||||||
|
return npu_shape == tensor->shape();
|
||||||
}
|
}
|
||||||
|
|
||||||
int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||||
|
@ -49,10 +67,11 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<
|
||||||
const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator,
|
const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator,
|
||||||
const KernelCallBack &before, const KernelCallBack &after) {
|
const KernelCallBack &before, const KernelCallBack &after) {
|
||||||
hiai::AiContext context;
|
hiai::AiContext context;
|
||||||
|
std::vector<bool> inputs_visited(in_tensors.size(), false);
|
||||||
for (int i = 0; i < npu_input_tensors_.size(); ++i) {
|
for (int i = 0; i < npu_input_tensors_.size(); ++i) {
|
||||||
int index = 0;
|
int index = 0;
|
||||||
for (; index < in_tensors.size(); index++) {
|
for (; index < in_tensors.size(); index++) {
|
||||||
if (IsSameShapeTensor(in_tensors[index], npu_input_tensors_[i])) {
|
if (!inputs_visited[index] && IsSameShapeTensor(in_tensors[index], npu_input_tensors_[i])) {
|
||||||
void *data = in_tensors[index]->data_c();
|
void *data = in_tensors[index]->data_c();
|
||||||
if (data == nullptr) {
|
if (data == nullptr) {
|
||||||
MS_LOG(ERROR) << model_name_ << " Inputs data is nullptr";
|
MS_LOG(ERROR) << model_name_ << " Inputs data is nullptr";
|
||||||
|
@ -60,6 +79,7 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[index]->Size());
|
memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[index]->Size());
|
||||||
|
inputs_visited[index] = true;
|
||||||
in_tensors[index]->set_ref_count(in_tensors[index]->ref_count() - 1);
|
in_tensors[index]->set_ref_count(in_tensors[index]->ref_count() - 1);
|
||||||
if (in_tensors[index]->ref_count() <= 0) {
|
if (in_tensors[index]->ref_count() <= 0) {
|
||||||
in_tensors[index]->FreeData();
|
in_tensors[index]->FreeData();
|
||||||
|
@ -85,33 +105,14 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
// For the output kernel of the entire model, and the format is nchw, the output tensor needs to be nchw TO nhwc.
|
|
||||||
std::vector<Tensor *> trans_tensors;
|
|
||||||
for (auto kernel : out_kernels) {
|
|
||||||
if (kernel->out_kernels().empty() && npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) {
|
|
||||||
for (int i = 0; i < kernel->out_tensors().size(); ++i) {
|
|
||||||
trans_tensors.push_back(kernel->out_tensors()[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int i = 0; i < npu_output_tensors_.size(); ++i) {
|
for (int i = 0; i < npu_output_tensors_.size(); ++i) {
|
||||||
void *data = out_tensors[i]->MutableData();
|
void *data = out_tensors[i]->MutableData();
|
||||||
if (data == nullptr) {
|
if (data == nullptr) {
|
||||||
MS_LOG(ERROR) << "Malloc buffer failed.";
|
MS_LOG(ERROR) << "Malloc buffer failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
|
||||||
if (std::find(trans_tensors.begin(), trans_tensors.end(), out_tensors[i]) != trans_tensors.end()) {
|
out_tensors[i]->ResetRefCount();
|
||||||
// Change data&tensor shape nc->nh
|
|
||||||
PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data,
|
|
||||||
npu_output_tensors_[i]->GetTensorDimension().GetNumber(),
|
|
||||||
npu_output_tensors_[i]->GetTensorDimension().GetWidth() *
|
|
||||||
npu_output_tensors_[i]->GetTensorDimension().GetHeight(),
|
|
||||||
npu_output_tensors_[i]->GetTensorDimension().GetChannel());
|
|
||||||
} else {
|
|
||||||
memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
|
|
||||||
out_tensors[i]->ResetRefCount();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,9 +29,8 @@ bool CheckFusion(kernel::LiteKernel *kernel) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
auto post_flag =
|
auto post_flag =
|
||||||
std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *out_kernel) {
|
std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(),
|
||||||
return NPUPassUtils::IsNhwc2Nchw(out_kernel) && (!out_kernel->out_kernels().empty());
|
[](const kernel::LiteKernel *out_kernel) { return NPUPassUtils::IsNhwc2Nchw(out_kernel); });
|
||||||
});
|
|
||||||
return post_flag;
|
return post_flag;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -41,15 +40,11 @@ bool CheckFormatFusion(kernel::LiteKernel *kernel) {
|
||||||
}
|
}
|
||||||
if (NPUPassUtils::IsNhwc2Nchw(kernel)) {
|
if (NPUPassUtils::IsNhwc2Nchw(kernel)) {
|
||||||
return std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(),
|
return std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(),
|
||||||
[](const kernel::LiteKernel *kernel) {
|
[](const kernel::LiteKernel *kernel) { return NPUPassUtils::IsNchw2Nhwc(kernel); });
|
||||||
return NPUPassUtils::IsNchw2Nhwc(kernel) && (!kernel->out_kernels().empty());
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
if (NPUPassUtils::IsNchw2Nhwc(kernel)) {
|
if (NPUPassUtils::IsNchw2Nhwc(kernel)) {
|
||||||
return std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(),
|
return std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(),
|
||||||
[](const kernel::LiteKernel *kernel) {
|
[](const kernel::LiteKernel *kernel) { return NPUPassUtils::IsNhwc2Nchw(kernel); });
|
||||||
return NPUPassUtils::IsNhwc2Nchw(kernel) && (!kernel->out_kernels().empty());
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -92,32 +87,32 @@ void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void NPUFusionPass::UpdatePostKernels(kernel::LiteKernel *cur_kernel) {
|
void NPUFusionPass::UpdatePostKernels(kernel::LiteKernel *cur_kernel) {
|
||||||
|
auto cur_out_kernels = cur_kernel->out_kernels();
|
||||||
for (auto out_kernel : cur_kernel->out_kernels()) {
|
for (auto out_kernel : cur_kernel->out_kernels()) {
|
||||||
// graph out kernel
|
// graph out kernel
|
||||||
if (out_kernel->out_kernels().empty()) {
|
if (out_kernel->out_kernels().empty()) {
|
||||||
continue;
|
cur_out_kernels.erase(find(cur_out_kernels.begin(), cur_out_kernels.end(), out_kernel));
|
||||||
}
|
} else {
|
||||||
auto post_kernel = out_kernel->out_kernels()[0];
|
auto post_kernel = out_kernel->out_kernels()[0];
|
||||||
|
auto post_in_kernels = post_kernel->in_kernels();
|
||||||
|
for (size_t i = 0; i < post_in_kernels.size(); i++) {
|
||||||
|
if (post_in_kernels[i] == out_kernel) {
|
||||||
|
post_in_kernels[i] = cur_kernel;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
post_kernel->set_in_kernels(post_in_kernels);
|
||||||
|
|
||||||
auto post_in_kernels = post_kernel->in_kernels();
|
for (size_t i = 0; i < cur_out_kernels.size(); i++) {
|
||||||
for (size_t i = 0; i < post_in_kernels.size(); i++) {
|
if (cur_out_kernels[i] == out_kernel) {
|
||||||
if (post_in_kernels[i] == out_kernel) {
|
cur_out_kernels[i] = post_kernel;
|
||||||
post_in_kernels[i] = cur_kernel;
|
break;
|
||||||
break;
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
post_kernel->set_in_kernels(post_in_kernels);
|
|
||||||
|
|
||||||
auto cur_out_kernels = cur_kernel->out_kernels();
|
|
||||||
for (size_t i = 0; i < cur_out_kernels.size(); i++) {
|
|
||||||
if (cur_out_kernels[i] == out_kernel) {
|
|
||||||
cur_out_kernels[i] = post_kernel;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
cur_kernel->set_out_kernels(cur_out_kernels);
|
|
||||||
RemoveAndFreeKernel(out_kernel);
|
RemoveAndFreeKernel(out_kernel);
|
||||||
}
|
}
|
||||||
|
cur_kernel->set_out_kernels(cur_out_kernels);
|
||||||
}
|
}
|
||||||
|
|
||||||
void UpdatePreTensors(kernel::LiteKernel *cur_kernel) {
|
void UpdatePreTensors(kernel::LiteKernel *cur_kernel) {
|
||||||
|
@ -145,6 +140,9 @@ void UpdatePostTensors(kernel::LiteKernel *cur_kernel) {
|
||||||
auto tensor = cur_kernel->out_tensors()[0];
|
auto tensor = cur_kernel->out_tensors()[0];
|
||||||
for (auto out_kernel : cur_kernel->out_kernels()) {
|
for (auto out_kernel : cur_kernel->out_kernels()) {
|
||||||
auto out_tensor = out_kernel->out_tensors()[0];
|
auto out_tensor = out_kernel->out_tensors()[0];
|
||||||
|
if (out_kernel->out_kernels().empty()) {
|
||||||
|
cur_kernel->set_out_tensors({out_kernel->out_tensors()[0]});
|
||||||
|
}
|
||||||
for (auto post_kernel : out_kernel->out_kernels()) {
|
for (auto post_kernel : out_kernel->out_kernels()) {
|
||||||
auto tensors_vec = post_kernel->in_tensors();
|
auto tensors_vec = post_kernel->in_tensors();
|
||||||
for (int i = 0; i < tensors_vec.size(); i++) {
|
for (int i = 0; i < tensors_vec.size(); i++) {
|
||||||
|
@ -197,6 +195,10 @@ int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) {
|
||||||
auto in_tensor = kernel->in_tensors()[0];
|
auto in_tensor = kernel->in_tensors()[0];
|
||||||
std::vector<kernel::LiteKernel *> pre_insert_kernels;
|
std::vector<kernel::LiteKernel *> pre_insert_kernels;
|
||||||
for (const auto &trans_kernel : kernel->out_kernels()) {
|
for (const auto &trans_kernel : kernel->out_kernels()) {
|
||||||
|
if (trans_kernel->out_kernels().empty()) {
|
||||||
|
// kernel is a trans kernel, it's input kernel num and input tensor num must be 1
|
||||||
|
kernel->in_kernels()[0]->set_out_tensors({trans_kernel->out_tensors()[0]});
|
||||||
|
}
|
||||||
for (const auto &post_kernel : trans_kernel->out_kernels()) {
|
for (const auto &post_kernel : trans_kernel->out_kernels()) {
|
||||||
// update tensor
|
// update tensor
|
||||||
auto tensors_vec = post_kernel->in_tensors();
|
auto tensors_vec = post_kernel->in_tensors();
|
||||||
|
@ -218,8 +220,8 @@ int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) {
|
||||||
}
|
}
|
||||||
post_kernel->set_in_kernels(post_in_kernels);
|
post_kernel->set_in_kernels(post_in_kernels);
|
||||||
pre_insert_kernels.push_back(post_kernel);
|
pre_insert_kernels.push_back(post_kernel);
|
||||||
RemoveAndFreeKernel(trans_kernel);
|
|
||||||
}
|
}
|
||||||
|
RemoveAndFreeKernel(trans_kernel);
|
||||||
}
|
}
|
||||||
pre_kernel->set_out_kernels(pre_insert_kernels);
|
pre_kernel->set_out_kernels(pre_insert_kernels);
|
||||||
RemoveAndFreeKernel(kernel);
|
RemoveAndFreeKernel(kernel);
|
||||||
|
|
|
@ -42,7 +42,7 @@ int PadNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const
|
||||||
int size = static_cast<int>(pad_->GetPaddings().size() / 2);
|
int size = static_cast<int>(pad_->GetPaddings().size() / 2);
|
||||||
ge::TensorDesc padding_tensor_desc(ge::Shape({size, 2}), ge::FORMAT_NCHW, ge::DT_INT32);
|
ge::TensorDesc padding_tensor_desc(ge::Shape({size, 2}), ge::FORMAT_NCHW, ge::DT_INT32);
|
||||||
ge::TensorPtr padding_tensor = std::make_shared<hiai::Tensor>(padding_tensor_desc);
|
ge::TensorPtr padding_tensor = std::make_shared<hiai::Tensor>(padding_tensor_desc);
|
||||||
padding_tensor->SetData(reinterpret_cast<uint8_t *>(pad_->GetPaddings().data()), size * sizeof(int));
|
padding_tensor->SetData(reinterpret_cast<uint8_t *>(pad_->GetPaddings().data()), 2 * size * sizeof(int));
|
||||||
auto paddings = new hiai::op::Const(name_ + "paddings");
|
auto paddings = new hiai::op::Const(name_ + "paddings");
|
||||||
paddings->set_attr_value(padding_tensor);
|
paddings->set_attr_value(padding_tensor);
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,10 @@ using mindspore::schema::PrimitiveType_SoftMax;
|
||||||
namespace mindspore::kernel {
|
namespace mindspore::kernel {
|
||||||
int SoftmaxNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
|
int SoftmaxNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
|
||||||
OpParameter *opParameter) {
|
OpParameter *opParameter) {
|
||||||
|
if (inputs[0]->shape().size() > 4) {
|
||||||
|
MS_LOG(ERROR) << "Npu softmax only supports tensor'dim less than 4.";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -67,3 +67,4 @@ PoseNet_dla_17_x512
|
||||||
ml_location_scene_division
|
ml_location_scene_division
|
||||||
ml_tabel_recog
|
ml_tabel_recog
|
||||||
ml_text_division
|
ml_text_division
|
||||||
|
6c_seg_nomean_20200610
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
mobilenet_v2_1.0_224.tflite 2.5
|
mobilenet_v2_1.0_224.tflite 2.5
|
||||||
squeezenet.tflite 2.5
|
squeezenet.tflite 2.5
|
||||||
inception_v3.tflite 1
|
inception_v3.tflite 1
|
||||||
|
6c_seg_nomean_20200610 1.5
|
||||||
porseg_tmp.onnx 1 2
|
porseg_tmp.onnx 1 2
|
||||||
|
|
Loading…
Reference in New Issue