forked from mindspore-Ecosystem/mindspore
run mobilenet_v2 success
This commit is contained in:
parent
a3d4dded12
commit
d45b5b5126
|
@ -95,6 +95,8 @@ class LiteKernel {
|
||||||
|
|
||||||
virtual int Init() { return mindspore::lite::RET_ERROR; }
|
virtual int Init() { return mindspore::lite::RET_ERROR; }
|
||||||
|
|
||||||
|
OpParameter *op_parameter() { return op_parameter_; }
|
||||||
|
|
||||||
std::string name() const { return this->name_; }
|
std::string name() const { return this->name_; }
|
||||||
|
|
||||||
virtual int Train() {
|
virtual int Train() {
|
||||||
|
|
|
@ -479,12 +479,6 @@ int LiteSession::Init(const Context *context) {
|
||||||
is_running_.store(false);
|
is_running_.store(false);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
ret = InitNPURuntime();
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "Init NPU runtime failed.";
|
|
||||||
is_running_.store(false);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
executor_ = new (std::nothrow) Executor();
|
executor_ = new (std::nothrow) Executor();
|
||||||
if (nullptr == executor_) {
|
if (nullptr == executor_) {
|
||||||
MS_LOG(ERROR) << "New Executor failed";
|
MS_LOG(ERROR) << "New Executor failed";
|
||||||
|
@ -661,18 +655,6 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int LiteSession::InitNPURuntime() {
|
|
||||||
#if SUPPORT_NPU
|
|
||||||
if (this->context_->IsNpuEnabled()) {
|
|
||||||
if (mindspore::lite::NPUManager::GetInstance()->InitClient() != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "NPU client init error.";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return RET_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
int LiteSession::InitGPURuntime() {
|
int LiteSession::InitGPURuntime() {
|
||||||
#if SUPPORT_GPU
|
#if SUPPORT_GPU
|
||||||
if (this->context_->IsGpuEnabled()) {
|
if (this->context_->IsGpuEnabled()) {
|
||||||
|
|
|
@ -103,8 +103,6 @@ class LiteSession : public session::LiteSession {
|
||||||
private:
|
private:
|
||||||
void ResetInputsShape(const std::vector<std::vector<int>> &dims);
|
void ResetInputsShape(const std::vector<std::vector<int>> &dims);
|
||||||
|
|
||||||
int InitNPURuntime();
|
|
||||||
|
|
||||||
int InitGPURuntime();
|
int InitGPURuntime();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -17,10 +17,9 @@
|
||||||
#include "src/runtime/agent/npu/npu_executor.h"
|
#include "src/runtime/agent/npu/npu_executor.h"
|
||||||
#include "include/errorcode.h"
|
#include "include/errorcode.h"
|
||||||
#include "src/runtime/agent/npu/npu_manager.h"
|
#include "src/runtime/agent/npu/npu_manager.h"
|
||||||
#include "nnacl/pack.h"
|
|
||||||
namespace mindspore::lite {
|
namespace mindspore::lite {
|
||||||
int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
|
int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
|
||||||
this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient();
|
this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(model_name_);
|
||||||
if (this->client_ == nullptr) {
|
if (this->client_ == nullptr) {
|
||||||
MS_LOG(ERROR) << "client is nullptr.";
|
MS_LOG(ERROR) << "client is nullptr.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
|
@ -33,9 +32,8 @@ int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||||
const std::vector<kernel::LiteKernel *> &kernels, const std::vector<bool> &inputs_nhwc2nchw,
|
const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator,
|
||||||
const std::vector<bool> &outputs_nchw2nhwc, Allocator *allocator, const KernelCallBack &before,
|
const KernelCallBack &before, const KernelCallBack &after) {
|
||||||
const KernelCallBack &after) {
|
|
||||||
hiai::AiContext context;
|
hiai::AiContext context;
|
||||||
for (int i = 0; i < npu_input_tensors_.size(); ++i) {
|
for (int i = 0; i < npu_input_tensors_.size(); ++i) {
|
||||||
void *data = in_tensors[i]->data_c();
|
void *data = in_tensors[i]->data_c();
|
||||||
|
@ -43,13 +41,8 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<
|
||||||
MS_LOG(ERROR) << model_name_ << " inputs data is nullptr";
|
MS_LOG(ERROR) << model_name_ << " inputs data is nullptr";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
if (inputs_nhwc2nchw[i]) {
|
|
||||||
PackNHWCToNCHWFp32(data, npu_input_tensors_[i]->GetBuffer(), in_tensors[i]->Batch(),
|
|
||||||
in_tensors[i]->Width() * in_tensors[i]->Height(), in_tensors[i]->Channel());
|
|
||||||
} else {
|
|
||||||
memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[i]->Size());
|
memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[i]->Size());
|
||||||
}
|
}
|
||||||
}
|
|
||||||
context.AddPara("model_name", model_name_);
|
context.AddPara("model_name", model_name_);
|
||||||
if (this->client_ == nullptr) {
|
if (this->client_ == nullptr) {
|
||||||
MS_LOG(ERROR) << "NPU client is nullptr";
|
MS_LOG(ERROR) << "NPU client is nullptr";
|
||||||
|
@ -68,12 +61,7 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<
|
||||||
MS_LOG(ERROR) << "Malloc buffer failed.";
|
MS_LOG(ERROR) << "Malloc buffer failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
if (outputs_nchw2nhwc[i]) {
|
|
||||||
PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data, out_tensors[i]->Batch(),
|
|
||||||
out_tensors[i]->Width() * out_tensors[i]->Height(), out_tensors[i]->Channel());
|
|
||||||
} else {
|
|
||||||
memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
|
memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
|
||||||
}
|
|
||||||
out_tensors[i]->ResetRefCount();
|
out_tensors[i]->ResetRefCount();
|
||||||
}
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
|
|
|
@ -32,8 +32,7 @@ class NPUExecutor : public Executor {
|
||||||
int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override;
|
int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override;
|
||||||
|
|
||||||
int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||||
const std::vector<kernel::LiteKernel *> &kernels, const std::vector<bool> &inputs_nhwc2nchw,
|
const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator = nullptr,
|
||||||
const std::vector<bool> &outputs_nchw2nhwc, Allocator *allocator = nullptr,
|
|
||||||
const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr);
|
const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -0,0 +1,224 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include "src/runtime/agent/npu/npu_fusion_pass.h"
|
||||||
|
#include <vector>
|
||||||
|
#include "src/lite_kernel.h"
|
||||||
|
#include "nnacl/concat_parameter.h"
|
||||||
|
|
||||||
|
namespace mindspore::lite {
|
||||||
|
bool CheckFusion(kernel::LiteKernel *kernel) {
|
||||||
|
auto pre_flag =
|
||||||
|
std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *kernel) {
|
||||||
|
return kernel->Type() == schema::PrimitiveType_Nchw2Nhwc && kernel->out_kernels().size() == 1;
|
||||||
|
});
|
||||||
|
if (!pre_flag) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
auto post_flag =
|
||||||
|
std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) {
|
||||||
|
return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw && kernel->in_kernels().size() == 1;
|
||||||
|
});
|
||||||
|
return post_flag;
|
||||||
|
}
|
||||||
|
|
||||||
|
void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) {
|
||||||
|
for (auto in_kernel : cur_kernel->in_kernels()) {
|
||||||
|
auto pre_kernel = in_kernel->in_kernels()[0];
|
||||||
|
|
||||||
|
auto pre_out_kernels = pre_kernel->out_kernels();
|
||||||
|
for (size_t i = 0; i < pre_out_kernels.size(); i++) {
|
||||||
|
if (pre_out_kernels[i] == in_kernel) {
|
||||||
|
pre_out_kernels[i] = cur_kernel;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pre_kernel->set_out_kernels(pre_out_kernels);
|
||||||
|
|
||||||
|
auto cur_in_kernels = cur_kernel->in_kernels();
|
||||||
|
for (size_t i = 0; i < cur_in_kernels.size(); i++) {
|
||||||
|
if (cur_in_kernels[i] == in_kernel) {
|
||||||
|
cur_in_kernels[i] = pre_kernel;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cur_kernel->set_in_kernels(cur_in_kernels);
|
||||||
|
kernels->erase(find(kernels->begin(), kernels->end(), in_kernel));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void NPUFusionPass::UpdatePostKernels(kernel::LiteKernel *cur_kernel) {
|
||||||
|
for (auto out_kernel : cur_kernel->out_kernels()) {
|
||||||
|
auto post_kernel = out_kernel->out_kernels()[0];
|
||||||
|
|
||||||
|
auto post_in_kernels = post_kernel->in_kernels();
|
||||||
|
for (size_t i = 0; i < post_in_kernels.size(); i++) {
|
||||||
|
if (post_in_kernels[i] == out_kernel) {
|
||||||
|
post_in_kernels[i] = cur_kernel;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
post_kernel->set_in_kernels(post_in_kernels);
|
||||||
|
|
||||||
|
auto cur_out_kernels = cur_kernel->out_kernels();
|
||||||
|
for (size_t i = 0; i < cur_out_kernels.size(); i++) {
|
||||||
|
if (cur_out_kernels[i] == out_kernel) {
|
||||||
|
cur_out_kernels[i] = post_kernel;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cur_kernel->set_out_kernels(cur_out_kernels);
|
||||||
|
kernels->erase(find(kernels->begin(), kernels->end(), out_kernel));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void UpdatePreTensors(kernel::LiteKernel *cur_kernel) {
|
||||||
|
auto tensors_vec = cur_kernel->in_tensors();
|
||||||
|
for (auto in_kernel : cur_kernel->in_kernels()) {
|
||||||
|
lite::Tensor *cur_tensor = nullptr;
|
||||||
|
auto in_tensor = in_kernel->in_tensors()[0];
|
||||||
|
auto out_tensor = in_kernel->out_tensors()[0];
|
||||||
|
auto pre_kernel = in_kernel->in_kernels()[0];
|
||||||
|
for (size_t i = 0; i < pre_kernel->out_tensors().size(); i++) {
|
||||||
|
if (pre_kernel->out_tensors()[i] == in_tensor) {
|
||||||
|
cur_tensor = pre_kernel->out_tensors()[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < tensors_vec.size(); i++) {
|
||||||
|
if (tensors_vec[i] == out_tensor) {
|
||||||
|
tensors_vec[i] = cur_tensor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cur_kernel->set_in_tensors(tensors_vec);
|
||||||
|
}
|
||||||
|
|
||||||
|
void UpdatePostTensors(kernel::LiteKernel *cur_kernel) {
|
||||||
|
auto tensors_vec = cur_kernel->out_tensors();
|
||||||
|
for (auto out_kernel : cur_kernel->out_kernels()) {
|
||||||
|
auto in_tensor = out_kernel->in_tensors()[0];
|
||||||
|
auto out_tensor = out_kernel->out_tensors()[0];
|
||||||
|
auto post_kernel = out_kernel->out_kernels()[0];
|
||||||
|
lite::Tensor *cur_tensor = nullptr;
|
||||||
|
for (size_t i = 0; i < post_kernel->in_tensors().size(); i++) {
|
||||||
|
if (post_kernel->in_tensors()[i] == out_tensor) {
|
||||||
|
cur_tensor = post_kernel->in_tensors()[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < tensors_vec.size(); i++) {
|
||||||
|
if (tensors_vec[i] == in_tensor) {
|
||||||
|
tensors_vec[i] = cur_tensor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cur_kernel->set_out_tensors(tensors_vec);
|
||||||
|
}
|
||||||
|
|
||||||
|
int TransFormAxis(int axis) {
|
||||||
|
switch (axis) {
|
||||||
|
case 0:
|
||||||
|
return 0;
|
||||||
|
case 1:
|
||||||
|
return 2;
|
||||||
|
case 2:
|
||||||
|
return 3;
|
||||||
|
case 3:
|
||||||
|
case -1:
|
||||||
|
return 1;
|
||||||
|
default:
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int NPUFusionPass::AddFusion(kernel::LiteKernel *kernel) {
|
||||||
|
if (!CheckFusion(kernel)) {
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
UpdatePreTensors(kernel);
|
||||||
|
UpdatePostTensors(kernel);
|
||||||
|
UpdatePreKernels(kernel);
|
||||||
|
UpdatePostKernels(kernel);
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int NPUFusionPass::ConcatFusion(kernel::LiteKernel *kernel) {
|
||||||
|
if (!CheckFusion(kernel)) {
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
UpdatePreTensors(kernel);
|
||||||
|
UpdatePostTensors(kernel);
|
||||||
|
UpdatePreKernels(kernel);
|
||||||
|
UpdatePostKernels(kernel);
|
||||||
|
auto concat_param = reinterpret_cast<ConcatParameter *>(kernel->op_parameter());
|
||||||
|
concat_param->axis_ = TransFormAxis(concat_param->axis_);
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) {
|
||||||
|
if (kernel->out_kernels().empty()) {
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
if (!std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) {
|
||||||
|
return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw;
|
||||||
|
})) {
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
auto pre_kernel = kernel->in_kernels()[0];
|
||||||
|
|
||||||
|
auto pre_out_kernels = pre_kernel->out_kernels();
|
||||||
|
for (size_t i = 0; i < pre_out_kernels.size(); i++) {
|
||||||
|
if (pre_out_kernels[i] == kernel) {
|
||||||
|
pre_out_kernels.erase(pre_out_kernels.begin() + i);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (const auto &nc2nh : kernel->out_kernels()) {
|
||||||
|
for (const auto &post_kernel : nc2nh->out_kernels()) {
|
||||||
|
auto post_in_kernels = post_kernel->in_kernels();
|
||||||
|
for (size_t i = 0; i < post_in_kernels.size(); i++) {
|
||||||
|
if (post_in_kernels[i] == nc2nh) {
|
||||||
|
post_in_kernels[i] = pre_kernel;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
post_kernel->set_in_kernels(post_in_kernels);
|
||||||
|
pre_out_kernels.push_back(post_kernel);
|
||||||
|
}
|
||||||
|
kernels->erase(find(kernels->begin(), kernels->end(), nc2nh));
|
||||||
|
}
|
||||||
|
pre_kernel->set_out_kernels(pre_out_kernels);
|
||||||
|
kernels->erase(find(kernels->begin(), kernels->end(), kernel));
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int NPUFusionPass::Fusion() {
|
||||||
|
for (auto kernel : *kernels) {
|
||||||
|
switch (kernel->Type()) {
|
||||||
|
case schema::PrimitiveType_Concat:
|
||||||
|
ConcatFusion(kernel);
|
||||||
|
continue;
|
||||||
|
case schema::PrimitiveType_Add:
|
||||||
|
AddFusion(kernel);
|
||||||
|
continue;
|
||||||
|
case schema::PrimitiveType_Nchw2Nhwc:
|
||||||
|
FormatFusion(kernel);
|
||||||
|
continue;
|
||||||
|
default:
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
} // namespace mindspore::lite
|
|
@ -0,0 +1,40 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
|
||||||
|
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
|
||||||
|
#include <vector>
|
||||||
|
#include "src/lite_kernel.h"
|
||||||
|
#include "src/ops/primitive_c.h"
|
||||||
|
namespace mindspore::lite {
|
||||||
|
class NPUFusionPass {
|
||||||
|
public:
|
||||||
|
explicit NPUFusionPass(std::vector<kernel::LiteKernel *> *dst_kernels) { kernels = dst_kernels; }
|
||||||
|
~NPUFusionPass() = default;
|
||||||
|
int Fusion();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
int ConcatFusion(kernel::LiteKernel *kernel);
|
||||||
|
int AddFusion(kernel::LiteKernel *kernel);
|
||||||
|
int FormatFusion(kernel::LiteKernel *kernel);
|
||||||
|
void UpdatePreKernels(kernel::LiteKernel *kernel);
|
||||||
|
void UpdatePostKernels(kernel::LiteKernel *kernel);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<kernel::LiteKernel *> *kernels;
|
||||||
|
};
|
||||||
|
} // namespace mindspore::lite
|
||||||
|
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
|
|
@ -15,21 +15,59 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "src/runtime/agent/npu/npu_manager.h"
|
#include "src/runtime/agent/npu/npu_manager.h"
|
||||||
|
#include <sys/system_properties.h>
|
||||||
#include <sys/fcntl.h>
|
#include <sys/fcntl.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include "include/hiai_ir_build.h"
|
#include "include/hiai_ir_build.h"
|
||||||
#include "include/HiAiModelManagerService.h"
|
#include "include/HiAiModelManagerService.h"
|
||||||
#include "include/errorcode.h"
|
#include "include/errorcode.h"
|
||||||
#include "include/graph/op/all_ops.h"
|
|
||||||
#include "src/common/file_utils.h"
|
#include "src/common/file_utils.h"
|
||||||
|
|
||||||
namespace mindspore::lite {
|
namespace mindspore::lite {
|
||||||
|
#define MAX_MODEL_NUM 20
|
||||||
bool NPUManager::IsSupportNPU() {
|
int NPUManager::CompareVersion(const string &version1, const string &version2) {
|
||||||
if (!is_npu_check_executor) {
|
std::istringstream iss1(version1);
|
||||||
CheckSupportNPU();
|
std::istringstream iss2(version2);
|
||||||
|
string string1;
|
||||||
|
string string2;
|
||||||
|
while (!iss1.eof() || !iss2.eof()) {
|
||||||
|
getline(iss1, string1, '.');
|
||||||
|
getline(iss2, string2, '.');
|
||||||
|
if (stoi(string1) > stoi(string2)) return 1;
|
||||||
|
if (stoi(string1) < stoi(string2)) return -1;
|
||||||
|
string1 = string2 = "0";
|
||||||
}
|
}
|
||||||
if (is_support_npu) {
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool NPUManager::CheckEMUIVersion() {
|
||||||
|
char emui[128] = {0x00};
|
||||||
|
__system_property_get("ro.build.version.emui", emui);
|
||||||
|
std::string emui_str = emui;
|
||||||
|
int pos = emui_str.find('_');
|
||||||
|
if (pos != std::string::npos) {
|
||||||
|
auto version = emui_str.substr(pos + 1);
|
||||||
|
int ret = CompareVersion(version, "11.0.0");
|
||||||
|
if (ret < 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool NPUManager::CheckDDKVersion() {
|
||||||
|
auto client = std::make_shared<hiai::AiModelMngerClient>();
|
||||||
|
if (client->GetVersion() != nullptr) {
|
||||||
|
std::string version = client->GetVersion();
|
||||||
|
int ret = CompareVersion(version, "100.330.010.011");
|
||||||
|
if (ret < 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
bool NPUManager::IsSupportNPU() {
|
||||||
|
if (IsKirinChip() && CheckEMUIVersion() && CheckDDKVersion()) {
|
||||||
MS_LOG(INFO) << "The current device support NPU.";
|
MS_LOG(INFO) << "The current device support NPU.";
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
@ -38,36 +76,6 @@ bool NPUManager::IsSupportNPU() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string NPUManager::GetExecutorPath() {
|
|
||||||
std::string executor_path;
|
|
||||||
char cmdline[1024] = {0};
|
|
||||||
int fd = open("/proc/self/cmdline", O_RDONLY);
|
|
||||||
if (fd >= 0) {
|
|
||||||
char ch;
|
|
||||||
int i = 0;
|
|
||||||
while (read(fd, &ch, sizeof(ch)) > 0 && !isspace(ch)) {
|
|
||||||
if (':' == ch) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
cmdline[i] = ch;
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
close(fd);
|
|
||||||
}
|
|
||||||
executor_path = std::string(cmdline);
|
|
||||||
if (executor_path.empty()) {
|
|
||||||
executor_path = "./";
|
|
||||||
}
|
|
||||||
// android
|
|
||||||
if (executor_path.substr(0, 11) == "/data/data/") {
|
|
||||||
executor_path = executor_path + '/';
|
|
||||||
} else {
|
|
||||||
// Linux
|
|
||||||
executor_path = executor_path.substr(0, executor_path.rfind('/')) + "/";
|
|
||||||
}
|
|
||||||
return executor_path;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool NPUManager::IsKirinChip() {
|
bool NPUManager::IsKirinChip() {
|
||||||
std::ifstream cpu_info("/proc/cpuinfo");
|
std::ifstream cpu_info("/proc/cpuinfo");
|
||||||
if (!(cpu_info.good() && cpu_info.is_open())) {
|
if (!(cpu_info.good() && cpu_info.is_open())) {
|
||||||
|
@ -96,86 +104,6 @@ bool NPUManager::IsKirinChip() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool WriteToOMFile(domi::ModelBufferData om_model_buff, const std::string &om_file_path) {
|
|
||||||
FILE *fp;
|
|
||||||
fp = fopen(om_file_path.c_str(), "wb");
|
|
||||||
if (fp == nullptr) {
|
|
||||||
MS_LOG(ERROR) << om_file_path.c_str() << " open failed.";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto write_size = (uint32_t)fwrite(om_model_buff.data, 1, om_model_buff.length, fp);
|
|
||||||
if (write_size != om_model_buff.length) {
|
|
||||||
fclose(fp);
|
|
||||||
MS_LOG(ERROR) << "Write om file failed.";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
fclose(fp);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool NPUManager::CheckOmBuildIr(const std::string &path) {
|
|
||||||
// build test om model
|
|
||||||
std::shared_ptr<hiai::op::Add> add_op(new (std::nothrow) hiai::op::Add("add"));
|
|
||||||
if (add_op == nullptr) {
|
|
||||||
MS_LOG(ERROR) << "new add_op failed.";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
ge::TensorDesc desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT);
|
|
||||||
std::shared_ptr<hiai::op::Data> data = std::make_shared<hiai::op::Data>("data");
|
|
||||||
data->update_input_desc_x(desc);
|
|
||||||
add_op->set_input_x1(*data);
|
|
||||||
add_op->set_input_x2(*data);
|
|
||||||
domi::HiaiIrBuild ir_build;
|
|
||||||
ge::Graph ir_graph("graph");
|
|
||||||
std::vector<ge::Operator> inputs{*data, *data};
|
|
||||||
std::vector<ge::Operator> outputs{*add_op};
|
|
||||||
ir_graph.SetInputs(inputs).SetOutputs(outputs);
|
|
||||||
ge::Model om_model("test_model", "test_version");
|
|
||||||
om_model.SetGraph(ir_graph);
|
|
||||||
|
|
||||||
domi::ModelBufferData om_model_buff;
|
|
||||||
if (!ir_build.CreateModelBuff(om_model, om_model_buff)) {
|
|
||||||
MS_LOG(ERROR) << "Create model buffer failed.";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!ir_build.BuildIRModel(om_model, om_model_buff)) {
|
|
||||||
MS_LOG(ERROR) << "Build IR model failed.";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// save test om model
|
|
||||||
remove(path.c_str());
|
|
||||||
bool ret = WriteToOMFile(om_model_buff, path);
|
|
||||||
ir_build.ReleaseModelBuff(om_model_buff);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
void NPUManager::CheckSupportNPU() {
|
|
||||||
is_npu_check_executor = true;
|
|
||||||
std::string path_string = GetExecutorPath();
|
|
||||||
|
|
||||||
std::string test_model_path = path_string + "/mindspore_lite_test_npu.om";
|
|
||||||
std::ifstream ifs(test_model_path);
|
|
||||||
if (ifs.good() && ifs.is_open()) {
|
|
||||||
ifs.close();
|
|
||||||
is_support_npu = true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (!IsKirinChip()) {
|
|
||||||
MS_LOG(ERROR) << "The current device chip NOT SUPPORT NPU";
|
|
||||||
is_support_npu = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!CheckOmBuildIr(test_model_path)) {
|
|
||||||
MS_LOG(ERROR) << "Build OM IR error.";
|
|
||||||
is_support_npu = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
is_support_npu = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency) {
|
int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency) {
|
||||||
hiai::MemBuffer *buffer = mc_builder_->InputMemBufferCreate(model_buf, size);
|
hiai::MemBuffer *buffer = mc_builder_->InputMemBufferCreate(model_buf, size);
|
||||||
if (buffer == nullptr) {
|
if (buffer == nullptr) {
|
||||||
|
@ -188,33 +116,42 @@ int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &mode
|
||||||
model_desc_.push_back(desc);
|
model_desc_.push_back(desc);
|
||||||
mc_builder_->MemBufferDestroy(buffer);
|
mc_builder_->MemBufferDestroy(buffer);
|
||||||
|
|
||||||
|
model_map_.insert({model_name, index_});
|
||||||
index_++;
|
index_++;
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int NPUManager::InitClient() {
|
|
||||||
this->client_ = std::make_shared<hiai::AiModelMngerClient>();
|
|
||||||
if (this->client_ == nullptr) {
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
int ret = this->client_->Init(nullptr);
|
|
||||||
if (ret != hiai::AI_SUCCESS) {
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
mc_builder_ = std::make_shared<hiai::AiModelBuilder>(this->client_);
|
|
||||||
return RET_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
int NPUManager::LoadOMModel() {
|
int NPUManager::LoadOMModel() {
|
||||||
int ret = this->client_->Load(model_desc_);
|
for (int i = 0; i < index_ / MAX_MODEL_NUM + 1; i++) {
|
||||||
|
auto client = std::make_shared<hiai::AiModelMngerClient>();
|
||||||
|
if (client == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "NPU client is nullptr.";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
int ret = client->Init(nullptr);
|
||||||
|
if (ret != hiai::AI_SUCCESS) {
|
||||||
|
MS_LOG(ERROR) << "NPU client init failed. code is " << ret;
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
mc_builder_ = std::make_shared<hiai::AiModelBuilder>(client);
|
||||||
|
|
||||||
|
vector<std::shared_ptr<hiai::AiModelDescription>> desc(model_desc_.begin() + i * MAX_MODEL_NUM,
|
||||||
|
((i + 1) * MAX_MODEL_NUM > index_)
|
||||||
|
? model_desc_.begin() + index_
|
||||||
|
: model_desc_.begin() + (i + 1) * MAX_MODEL_NUM);
|
||||||
|
ret = client->Load(desc);
|
||||||
if (ret != hiai::AI_SUCCESS) {
|
if (ret != hiai::AI_SUCCESS) {
|
||||||
MS_LOG(ERROR) << "Client load model failed." << ret;
|
MS_LOG(ERROR) << "Client load model failed." << ret;
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
clients_.push_back(client);
|
||||||
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient() { return client_; }
|
std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient(const std::string &model_name) {
|
||||||
|
return clients_[model_map_[model_name] / MAX_MODEL_NUM];
|
||||||
|
}
|
||||||
|
|
||||||
int NPUManager::index() { return index_; }
|
int NPUManager::index() const { return index_; }
|
||||||
} // namespace mindspore::lite
|
} // namespace mindspore::lite
|
||||||
|
|
|
@ -14,15 +14,21 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_
|
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_
|
||||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_
|
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <set>
|
||||||
|
#include "schema/model_generated.h"
|
||||||
#include "include/HiAiModelManagerService.h"
|
#include "include/HiAiModelManagerService.h"
|
||||||
|
|
||||||
namespace mindspore::lite {
|
namespace mindspore::lite {
|
||||||
|
static std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = {
|
||||||
|
schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D,
|
||||||
|
schema::PrimitiveType_DepthwiseConv2D, schema::PrimitiveType_DeDepthwiseConv2D,
|
||||||
|
schema::PrimitiveType_Resize, schema::PrimitiveType_Pooling};
|
||||||
class NPUManager {
|
class NPUManager {
|
||||||
public:
|
public:
|
||||||
static NPUManager *GetInstance() {
|
static NPUManager *GetInstance() {
|
||||||
|
@ -32,8 +38,6 @@ class NPUManager {
|
||||||
|
|
||||||
bool IsSupportNPU();
|
bool IsSupportNPU();
|
||||||
|
|
||||||
int InitClient();
|
|
||||||
|
|
||||||
// provide to subgraph to add model.
|
// provide to subgraph to add model.
|
||||||
int AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency);
|
int AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency);
|
||||||
|
|
||||||
|
@ -41,18 +45,18 @@ class NPUManager {
|
||||||
int LoadOMModel();
|
int LoadOMModel();
|
||||||
|
|
||||||
// provide to executor.
|
// provide to executor.
|
||||||
std::shared_ptr<hiai::AiModelMngerClient> GetClient();
|
std::shared_ptr<hiai::AiModelMngerClient> GetClient(const std::string &model_name);
|
||||||
|
|
||||||
int index();
|
int index() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void CheckSupportNPU();
|
|
||||||
|
|
||||||
bool IsKirinChip();
|
bool IsKirinChip();
|
||||||
|
|
||||||
bool CheckOmBuildIr(const std::string &path);
|
bool CheckEMUIVersion();
|
||||||
|
|
||||||
std::string GetExecutorPath();
|
bool CheckDDKVersion();
|
||||||
|
|
||||||
|
int CompareVersion(const std::string &version1, const std::string &version2);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int index_ = 0;
|
int index_ = 0;
|
||||||
|
@ -61,12 +65,14 @@ class NPUManager {
|
||||||
|
|
||||||
bool is_support_npu = false;
|
bool is_support_npu = false;
|
||||||
|
|
||||||
std::shared_ptr<hiai::AiModelMngerClient> client_ = nullptr;
|
std::vector<std::shared_ptr<hiai::AiModelMngerClient>> clients_;
|
||||||
|
|
||||||
std::vector<std::shared_ptr<hiai::AiModelDescription>> model_desc_;
|
std::vector<std::shared_ptr<hiai::AiModelDescription>> model_desc_;
|
||||||
|
|
||||||
std::shared_ptr<hiai::AiModelBuilder> mc_builder_ = nullptr;
|
std::shared_ptr<hiai::AiModelBuilder> mc_builder_ = nullptr;
|
||||||
|
|
||||||
|
std::unordered_map<std::string, int> model_map_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace mindspore::lite
|
} // namespace mindspore::lite
|
||||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_
|
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_
|
||||||
|
|
|
@ -0,0 +1,102 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "src/kernel_registry.h"
|
||||||
|
#include "src/ops/nhwc2nchw.h"
|
||||||
|
#include "src/ops/nchw2nhwc.h"
|
||||||
|
#include "src/runtime/agent/npu/npu_pass_utils.h"
|
||||||
|
namespace mindspore::lite {
|
||||||
|
using kernel::KERNEL_ARCH::kCPU;
|
||||||
|
using kernel::KERNEL_ARCH::kNPU;
|
||||||
|
PrimitiveC *NPUPassUtils::CreateNchw2NhwcPrimitive() {
|
||||||
|
flatbuffers::FlatBufferBuilder fbb(1024);
|
||||||
|
auto val_offset = schema::CreateNchw2Nhwc(fbb);
|
||||||
|
auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nchw2Nhwc, val_offset.o);
|
||||||
|
fbb.Finish(prim_offset);
|
||||||
|
auto buf = fbb.GetBufferPointer();
|
||||||
|
if (buf == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "GetBufferPointer return nullptr";
|
||||||
|
fbb.Clear();
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize()));
|
||||||
|
if (primitive_buf == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "Malloc primitive_buf_ failed.";
|
||||||
|
fbb.Clear();
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
memcpy(primitive_buf, buf, fbb.GetSize());
|
||||||
|
auto *primitive = PrimitiveC::NewPrimitiveC<Nchw2Nhwc>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf));
|
||||||
|
free(primitive_buf);
|
||||||
|
fbb.Clear();
|
||||||
|
return primitive;
|
||||||
|
}
|
||||||
|
|
||||||
|
PrimitiveC *NPUPassUtils::CreateNhwc2NchwPrimitive() {
|
||||||
|
flatbuffers::FlatBufferBuilder fbb(1024);
|
||||||
|
auto val_offset = schema::CreateNhwc2Nchw(fbb);
|
||||||
|
auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nhwc2Nchw, val_offset.o);
|
||||||
|
fbb.Finish(prim_offset);
|
||||||
|
auto buf = fbb.GetBufferPointer();
|
||||||
|
if (buf == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "GetBufferPointer return nullptr";
|
||||||
|
fbb.Clear();
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize()));
|
||||||
|
if (primitive_buf == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "Malloc primitive_buf_ failed.";
|
||||||
|
fbb.Clear();
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
memcpy(primitive_buf, buf, fbb.GetSize());
|
||||||
|
auto *primitive = PrimitiveC::NewPrimitiveC<Nhwc2Nchw>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf));
|
||||||
|
free(primitive_buf);
|
||||||
|
fbb.Clear();
|
||||||
|
return primitive;
|
||||||
|
}
|
||||||
|
|
||||||
|
kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors,
|
||||||
|
const std::vector<Tensor *> &out_tensors,
|
||||||
|
const InnerContext *ctx, const std::string &name) {
|
||||||
|
kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nchw2Nhwc};
|
||||||
|
auto nchw2nhwc_primitive = CreateNchw2NhwcPrimitive();
|
||||||
|
auto *nchw2nhwc_kernel =
|
||||||
|
KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nchw2nhwc_primitive, ctx, key);
|
||||||
|
nchw2nhwc_kernel->set_name(name);
|
||||||
|
return nchw2nhwc_kernel;
|
||||||
|
}
|
||||||
|
|
||||||
|
kernel::LiteKernel *NPUPassUtils::CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors,
|
||||||
|
const std::vector<Tensor *> &out_tensors,
|
||||||
|
const InnerContext *ctx, const std::string &name) {
|
||||||
|
kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nhwc2Nchw};
|
||||||
|
auto nhwc2nchw_primitive = CreateNhwc2NchwPrimitive();
|
||||||
|
auto *nhwc2nchw_kernel =
|
||||||
|
KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nhwc2nchw_primitive, ctx, key);
|
||||||
|
nhwc2nchw_kernel->set_name(name);
|
||||||
|
return nhwc2nchw_kernel;
|
||||||
|
}
|
||||||
|
|
||||||
|
void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels,
|
||||||
|
const std::vector<kernel::LiteKernel *> &out_kernels,
|
||||||
|
const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors) {
|
||||||
|
kernel->set_in_tensors(in_tensors);
|
||||||
|
kernel->set_out_tensors(out_tensors);
|
||||||
|
kernel->set_in_kernels(in_kernels);
|
||||||
|
kernel->set_out_kernels(out_kernels);
|
||||||
|
}
|
||||||
|
} // namespace mindspore::lite
|
|
@ -0,0 +1,44 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
|
||||||
|
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include "src/ops/primitive_c.h"
|
||||||
|
#include "src/lite_kernel.h"
|
||||||
|
namespace mindspore::lite {
|
||||||
|
class NPUPassUtils {
|
||||||
|
public:
|
||||||
|
static kernel::LiteKernel *CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors,
|
||||||
|
const std::vector<Tensor *> &out_tensors, const InnerContext *ctx,
|
||||||
|
const std::string &name);
|
||||||
|
|
||||||
|
static kernel::LiteKernel *CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors,
|
||||||
|
const std::vector<Tensor *> &out_tensors, const InnerContext *ctx,
|
||||||
|
const std::string &name);
|
||||||
|
|
||||||
|
static void UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels,
|
||||||
|
const std::vector<kernel::LiteKernel *> &out_kernels,
|
||||||
|
const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors);
|
||||||
|
|
||||||
|
private:
|
||||||
|
static PrimitiveC *CreateNchw2NhwcPrimitive();
|
||||||
|
|
||||||
|
static PrimitiveC *CreateNhwc2NchwPrimitive();
|
||||||
|
};
|
||||||
|
} // namespace mindspore::lite
|
||||||
|
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
|
|
@ -0,0 +1,201 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include "src/runtime/agent/npu/npu_transform_pass.h"
|
||||||
|
#include <vector>
|
||||||
|
#include "src/lite_kernel.h"
|
||||||
|
#include "src/runtime/agent/npu/npu_manager.h"
|
||||||
|
#include "src/runtime/agent/npu/npu_pass_utils.h"
|
||||||
|
namespace mindspore::lite {
|
||||||
|
using kernel::KERNEL_ARCH::kCPU;
|
||||||
|
using kernel::KERNEL_ARCH::kNPU;
|
||||||
|
int NPUTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||||
|
kernel::LiteKernel *after_kernel) {
|
||||||
|
std::vector<kernel::LiteKernel *> out_kernels;
|
||||||
|
|
||||||
|
for (auto out_kernel : kernel->out_kernels()) {
|
||||||
|
if (out_kernel == after_kernel) {
|
||||||
|
out_kernels.push_back(trans_kernel);
|
||||||
|
} else {
|
||||||
|
out_kernels.push_back(out_kernel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors());
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int NPUTransformPass::UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||||
|
kernel::LiteKernel *before_kernel) {
|
||||||
|
std::vector<lite::Tensor *> cur_kernel_in_tensors = {trans_kernel->out_tensors()[0]};
|
||||||
|
for (int i = 1; i < kernel->in_tensors().size(); i++) {
|
||||||
|
cur_kernel_in_tensors.push_back(kernel->in_tensors()[i]);
|
||||||
|
}
|
||||||
|
std::vector<kernel::LiteKernel *> cur_in_kernels = {trans_kernel};
|
||||||
|
for (int i = 0; i < kernel->in_kernels().size(); i++) {
|
||||||
|
auto in_kernel = kernel->in_kernels()[i];
|
||||||
|
if (in_kernel != kernel) {
|
||||||
|
cur_in_kernels.push_back(in_kernel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NPUPassUtils::UpdateKernel(kernel, cur_in_kernels, kernel->out_kernels(), cur_kernel_in_tensors,
|
||||||
|
kernel->out_tensors());
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it,
|
||||||
|
std::vector<kernel::LiteKernel *> *all_kernels,
|
||||||
|
std::vector<Tensor *> *all_tensors) {
|
||||||
|
auto kernel = *it;
|
||||||
|
bool is_input_kernel = kernel->in_kernels().empty();
|
||||||
|
if (is_input_kernel || kernel->in_kernels()[0]->desc().arch != kNPU ||
|
||||||
|
npu_trans_nodes.find(kernel->in_kernels()[0]->Type()) == npu_trans_nodes.end()) {
|
||||||
|
kernel::LiteKernel *before_kernel = nullptr;
|
||||||
|
if (!is_input_kernel) {
|
||||||
|
before_kernel = kernel->in_kernels()[0];
|
||||||
|
}
|
||||||
|
// Create pre transform kernel out tensors.
|
||||||
|
std::vector<int> shapes{kernel->in_tensors()[0]->shape()[0], kernel->in_tensors()[0]->shape()[3],
|
||||||
|
kernel->in_tensors()[0]->shape()[1], kernel->in_tensors()[0]->shape()[2]};
|
||||||
|
auto tensor = new Tensor(kernel->in_tensors()[0]->data_type(), shapes, schema::Format_NCHW, Tensor::VAR);
|
||||||
|
std::vector<Tensor *> pre_trans_out_tensors = {tensor};
|
||||||
|
all_tensors->push_back(pre_trans_out_tensors[0]);
|
||||||
|
// Replace the output tensor of the previous node
|
||||||
|
auto name = kernel->name() + "_pre_trans" + "_Nhwc2Nchw_" + std::to_string(total++);
|
||||||
|
auto *pre_trans_kernel =
|
||||||
|
NPUPassUtils::CreateNhwc2NchwKernel({kernel->in_tensors()[0]}, pre_trans_out_tensors, context, name);
|
||||||
|
// Insert Nhwc2Nchw into the front of the current queue
|
||||||
|
all_kernels->push_back(pre_trans_kernel);
|
||||||
|
// Replace the output kernel of the previous node
|
||||||
|
std::vector<kernel::LiteKernel *> pre_trans_in_kernel;
|
||||||
|
if (is_input_kernel) {
|
||||||
|
pre_trans_in_kernel = {};
|
||||||
|
} else {
|
||||||
|
pre_trans_in_kernel = {before_kernel};
|
||||||
|
}
|
||||||
|
NPUPassUtils::UpdateKernel(pre_trans_kernel, pre_trans_in_kernel, {kernel}, {kernel->in_tensors()[0]},
|
||||||
|
pre_trans_out_tensors);
|
||||||
|
|
||||||
|
if (before_kernel != nullptr) {
|
||||||
|
UpdateNH2NCTransNodePreKernel(before_kernel, pre_trans_kernel, kernel);
|
||||||
|
}
|
||||||
|
UpdateNH2NCTransNodeAfterKernel(kernel, pre_trans_kernel, before_kernel);
|
||||||
|
}
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it,
|
||||||
|
std::vector<kernel::LiteKernel *> *all_kernels,
|
||||||
|
std::vector<Tensor *> *all_tensors) {
|
||||||
|
auto kernel = *it;
|
||||||
|
// Single output multiple references
|
||||||
|
for (int i = 0; i < kernel->out_kernels().size(); i++) {
|
||||||
|
auto next_kernel = kernel->out_kernels().at(i);
|
||||||
|
if (next_kernel->desc().arch == kNPU && npu_trans_nodes.find(next_kernel->Type()) != npu_trans_nodes.end()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Change format the output of the current kernel nhwc->nchw
|
||||||
|
auto shapes = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[1],
|
||||||
|
kernel->out_tensors()[0]->shape()[2], kernel->out_tensors()[0]->shape()[3]};
|
||||||
|
auto tensor = new Tensor(kernel->out_tensors()[0]->data_type(), shapes, schema::Format_NHWC, Tensor::VAR);
|
||||||
|
std::vector<Tensor *> post_trans_out_tensors = {tensor};
|
||||||
|
all_tensors->push_back(post_trans_out_tensors[0]);
|
||||||
|
// Use the output tensor of the current node as the input tensor of the post-conversion operator
|
||||||
|
auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++);
|
||||||
|
auto *post_trans_kernel =
|
||||||
|
NPUPassUtils::CreateNchw2NhwcKernel(kernel->out_tensors(), post_trans_out_tensors, context, name);
|
||||||
|
// Replace the input tensor of the next node
|
||||||
|
NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {next_kernel}, kernel->out_tensors(),
|
||||||
|
post_trans_out_tensors);
|
||||||
|
// Directly insert in the back, will not affect the topological sort
|
||||||
|
all_kernels->push_back(post_trans_kernel);
|
||||||
|
UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, next_kernel);
|
||||||
|
UpdateNC2NHTransNodeAfterKernel(kernel, post_trans_kernel, next_kernel);
|
||||||
|
}
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int NPUTransformPass::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||||
|
kernel::LiteKernel *next_kernel) {
|
||||||
|
std::vector<kernel::LiteKernel *> cur_out_kernels;
|
||||||
|
for (auto out_kernel : kernel->out_kernels()) {
|
||||||
|
if (out_kernel == next_kernel) {
|
||||||
|
cur_out_kernels.push_back(trans_kernel);
|
||||||
|
} else {
|
||||||
|
cur_out_kernels.push_back(out_kernel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto kernel_out_tensor = kernel->out_tensors()[0];
|
||||||
|
// Change format the output of the current kernel nhwc->nchw
|
||||||
|
std::vector<int> kernel_out_new_shapes = {kernel_out_tensor->shape()[0], kernel_out_tensor->shape()[3],
|
||||||
|
kernel_out_tensor->shape()[1], kernel_out_tensor->shape()[2]};
|
||||||
|
kernel_out_tensor->set_format(schema::Format_NCHW);
|
||||||
|
kernel_out_tensor->set_shape(kernel_out_new_shapes);
|
||||||
|
NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), cur_out_kernels, kernel->in_tensors(), {kernel_out_tensor});
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int NPUTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||||
|
kernel::LiteKernel *next_kernel) {
|
||||||
|
std::vector<Tensor *> next_in_tensors;
|
||||||
|
for (auto next_in_tensor : next_kernel->in_tensors()) {
|
||||||
|
if (next_in_tensor != kernel->out_tensors()[0]) {
|
||||||
|
next_in_tensors.push_back(next_in_tensor);
|
||||||
|
} else {
|
||||||
|
next_in_tensors.push_back(trans_kernel->out_tensors()[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
next_kernel->set_in_tensors(next_in_tensors);
|
||||||
|
std::vector<kernel::LiteKernel *> next_in_kernels;
|
||||||
|
for (auto in_kernel : next_kernel->in_kernels()) {
|
||||||
|
if (in_kernel == kernel) {
|
||||||
|
next_in_kernels.push_back(trans_kernel);
|
||||||
|
} else {
|
||||||
|
next_in_kernels.push_back(in_kernel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors,
|
||||||
|
next_kernel->out_tensors());
|
||||||
|
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int NPUTransformPass::FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
|
||||||
|
std::vector<Tensor *> *all_tensors) {
|
||||||
|
if (context->IsNpuEnabled()) {
|
||||||
|
std::vector<kernel::LiteKernel *> new_kernels;
|
||||||
|
|
||||||
|
for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) {
|
||||||
|
auto kernel = *it;
|
||||||
|
if (kernel->desc().arch != kNPU) {
|
||||||
|
new_kernels.push_back(kernel);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) {
|
||||||
|
InsertPreNode(context, it, &new_kernels, all_tensors);
|
||||||
|
new_kernels.push_back(kernel);
|
||||||
|
InsertPostNode(context, it, &new_kernels, all_tensors);
|
||||||
|
} else {
|
||||||
|
new_kernels.push_back(kernel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
all_kernels->clear();
|
||||||
|
for (int i = 0; i < new_kernels.size(); i++) {
|
||||||
|
all_kernels->push_back(new_kernels[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mindspore::lite
|
|
@ -0,0 +1,51 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
|
||||||
|
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
|
||||||
|
#include <vector>
|
||||||
|
#include "src/lite_kernel.h"
|
||||||
|
#include "src/ops/primitive_c.h"
|
||||||
|
namespace mindspore::lite {
|
||||||
|
class NPUTransformPass {
|
||||||
|
public:
|
||||||
|
int FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
|
||||||
|
std::vector<Tensor *> *all_tensors);
|
||||||
|
|
||||||
|
private:
|
||||||
|
int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||||
|
kernel::LiteKernel *after_kernel);
|
||||||
|
|
||||||
|
int UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||||
|
kernel::LiteKernel *before_kernel);
|
||||||
|
|
||||||
|
int UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||||
|
kernel::LiteKernel *after_kernel);
|
||||||
|
|
||||||
|
int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||||
|
kernel::LiteKernel *next_kernel);
|
||||||
|
|
||||||
|
int InsertPreNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it,
|
||||||
|
std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors);
|
||||||
|
|
||||||
|
int InsertPostNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it,
|
||||||
|
std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors);
|
||||||
|
|
||||||
|
private:
|
||||||
|
int total = 0;
|
||||||
|
};
|
||||||
|
} // namespace mindspore::lite
|
||||||
|
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
|
|
@ -24,7 +24,6 @@
|
||||||
#include "include/graph/model.h"
|
#include "include/graph/model.h"
|
||||||
#include "include/hiai_ir_build.h"
|
#include "include/hiai_ir_build.h"
|
||||||
#include "include/HiAiModelManagerType.h"
|
#include "include/HiAiModelManagerType.h"
|
||||||
#include "include/context.h"
|
|
||||||
#include "include/version.h"
|
#include "include/version.h"
|
||||||
#include "src/common/utils.h"
|
#include "src/common/utils.h"
|
||||||
#include "src/runtime/agent/npu/npu_converter_utils.h"
|
#include "src/runtime/agent/npu/npu_converter_utils.h"
|
||||||
|
@ -34,10 +33,6 @@ namespace mindspore::kernel {
|
||||||
using mindspore::lite::RET_ERROR;
|
using mindspore::lite::RET_ERROR;
|
||||||
using mindspore::lite::RET_OK;
|
using mindspore::lite::RET_OK;
|
||||||
|
|
||||||
std::set<schema::PrimitiveType> trans_nodes = {schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D,
|
|
||||||
schema::PrimitiveType_DepthwiseConv2D,
|
|
||||||
schema::PrimitiveType_DeDepthwiseConv2D, schema::PrimitiveType_Resize};
|
|
||||||
|
|
||||||
domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() {
|
domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() {
|
||||||
ge::Graph graph("NPUGraph");
|
ge::Graph graph("NPUGraph");
|
||||||
|
|
||||||
|
@ -75,8 +70,7 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() {
|
||||||
}
|
}
|
||||||
|
|
||||||
int SubGraphNpuKernel::Run() {
|
int SubGraphNpuKernel::Run() {
|
||||||
return reinterpret_cast<lite::NPUExecutor *>(this->executor_)
|
return reinterpret_cast<lite::NPUExecutor *>(this->executor_)->Run(in_tensors_, out_tensors_, nodes_);
|
||||||
->Run(in_tensors_, out_tensors_, nodes_, inputs_nhwc2nchw_, outputs_nchw2nhwc_);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int SubGraphNpuKernel::BuildNPUInputOp() {
|
int SubGraphNpuKernel::BuildNPUInputOp() {
|
||||||
|
@ -88,21 +82,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
|
||||||
if (IsSubGraphInputTensor(in_tensor)) {
|
if (IsSubGraphInputTensor(in_tensor)) {
|
||||||
auto tensor_name = node->name() + "_" + std::to_string(count++);
|
auto tensor_name = node->name() + "_" + std::to_string(count++);
|
||||||
hiai::op::Data *data;
|
hiai::op::Data *data;
|
||||||
if (trans_nodes.find(node->Type()) != trans_nodes.end()) {
|
|
||||||
auto shape = in_tensor->shape();
|
|
||||||
data = new (std::nothrow) hiai::op::Data(tensor_name);
|
|
||||||
if (data == nullptr) {
|
|
||||||
MS_LOG(ERROR) << "New data failed.";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
ge::TensorDesc tensor_desc(lite::ConverterToNPUShape({shape[0], shape[3], shape[1], shape[2]}),
|
|
||||||
ge::FORMAT_NCHW, lite::ConverterToNPUDataType(in_tensor->data_type()));
|
|
||||||
data->update_input_desc_x(tensor_desc);
|
|
||||||
inputs_nhwc2nchw_.push_back(true);
|
|
||||||
} else {
|
|
||||||
data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name);
|
data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name);
|
||||||
inputs_nhwc2nchw_.push_back(false);
|
|
||||||
}
|
|
||||||
subgraph_input_op_.push_back(*data);
|
subgraph_input_op_.push_back(*data);
|
||||||
node_input_op.push_back(data);
|
node_input_op.push_back(data);
|
||||||
continue;
|
continue;
|
||||||
|
@ -132,7 +112,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
|
||||||
|
|
||||||
// weight tensor
|
// weight tensor
|
||||||
if (is_weight_tensor) {
|
if (is_weight_tensor) {
|
||||||
if (trans_nodes.find(node->Type()) == trans_nodes.end()) {
|
if (lite::npu_trans_nodes.find(node->Type()) == lite::npu_trans_nodes.end()) {
|
||||||
auto name = node->name() + "_" + std::to_string(count++);
|
auto name = node->name() + "_" + std::to_string(count++);
|
||||||
auto weight_const = new (std::nothrow) hiai::op::Const(node->name() + "_" + std::to_string(count++));
|
auto weight_const = new (std::nothrow) hiai::op::Const(node->name() + "_" + std::to_string(count++));
|
||||||
if (weight_const == nullptr) {
|
if (weight_const == nullptr) {
|
||||||
|
@ -162,11 +142,6 @@ std::vector<ge::Operator> SubGraphNpuKernel::GetNPUNodes(const vector<kernel::Li
|
||||||
ops.reserve(nodes.size());
|
ops.reserve(nodes.size());
|
||||||
for (int i = 0; i < nodes.size(); i++) {
|
for (int i = 0; i < nodes.size(); i++) {
|
||||||
ops.push_back(*reinterpret_cast<NPUKernel *>(nodes[i])->GetNPUOp());
|
ops.push_back(*reinterpret_cast<NPUKernel *>(nodes[i])->GetNPUOp());
|
||||||
if (trans_nodes.find(schema::PrimitiveType(nodes[i]->GetPrimitive()->Type())) != trans_nodes.end()) {
|
|
||||||
outputs_nchw2nhwc_.push_back(true);
|
|
||||||
} else {
|
|
||||||
outputs_nchw2nhwc_.push_back(false);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return ops;
|
return ops;
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,10 +69,6 @@ class SubGraphNpuKernel : public SubGraphKernel {
|
||||||
std::string GetOMModelName();
|
std::string GetOMModelName();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<bool> inputs_nhwc2nchw_;
|
|
||||||
|
|
||||||
std::vector<bool> outputs_nchw2nhwc_;
|
|
||||||
|
|
||||||
domi::ModelBufferData *model_buffer_data_;
|
domi::ModelBufferData *model_buffer_data_;
|
||||||
|
|
||||||
std::vector<ge::Operator> subgraph_input_op_;
|
std::vector<ge::Operator> subgraph_input_op_;
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
#include "src/runtime/kernel/npu/convolution_base_npu.h"
|
#include "src/runtime/kernel/npu/convolution_base_npu.h"
|
||||||
#include "src/runtime/agent/npu/npu_converter_utils.h"
|
#include "src/runtime/agent/npu/npu_converter_utils.h"
|
||||||
|
#include "nnacl/pack.h"
|
||||||
|
|
||||||
namespace mindspore::kernel {
|
namespace mindspore::kernel {
|
||||||
ConvolutionBaseNPUKernel::~ConvolutionBaseNPUKernel() {
|
ConvolutionBaseNPUKernel::~ConvolutionBaseNPUKernel() {
|
||||||
|
@ -39,14 +40,27 @@ int ConvolutionBaseNPUKernel::InitWeightBiasConst(const std::vector<lite::Tensor
|
||||||
MS_LOG(ERROR) << "New weight const failed.";
|
MS_LOG(ERROR) << "New weight const failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
auto weight_shape = inputs[1]->shape();
|
auto w_shape = inputs[1]->shape();
|
||||||
inputs[1]->set_shape({weight_shape[0], weight_shape[3], weight_shape[1], weight_shape[2]});
|
auto nhwc_data = inputs[1]->data_c();
|
||||||
inputs[1]->set_format(schema::Format_NCHW);
|
auto nchw_data = reinterpret_cast<float *>(malloc(inputs[1]->ElementsNum() * sizeof(float)));
|
||||||
auto weight_tensor = mindspore::lite::ConverterToNPUTensor(inputs[1]);
|
if (nchw_data == nullptr) {
|
||||||
weight_->set_attr_value(weight_tensor);
|
MS_LOG(ERROR) << "Malloc buffer failed.";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
PackNHWCToNCHWFp32(nhwc_data, nchw_data, w_shape[0], w_shape[1] * w_shape[2], w_shape[3]);
|
||||||
|
|
||||||
inputs[1]->set_shape(weight_shape);
|
std::shared_ptr<ge::Tensor> weight_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
|
||||||
inputs[1]->set_format(schema::Format_NHWC);
|
if (weight_tensor == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "new weight_tensor failed.";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
ge::TensorDesc tensor_desc(lite::ConverterToNPUShape({w_shape[0], w_shape[3], w_shape[1], w_shape[2]}),
|
||||||
|
ge::FORMAT_NCHW, lite::ConverterToNPUDataType(inputs[1]->data_type()));
|
||||||
|
weight_tensor->SetTensorDesc(tensor_desc);
|
||||||
|
weight_tensor->SetData(reinterpret_cast<const uint8_t *>(nchw_data), inputs[1]->Size());
|
||||||
|
|
||||||
|
weight_->set_attr_value(weight_tensor);
|
||||||
|
free(nchw_data);
|
||||||
|
|
||||||
if (inputs.size() >= 3) {
|
if (inputs.size() >= 3) {
|
||||||
bias_ = new (std::nothrow) hiai::op::Const(name_ + "_b");
|
bias_ = new (std::nothrow) hiai::op::Const(name_ + "_b");
|
||||||
|
|
|
@ -17,17 +17,18 @@
|
||||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CONVOLUTION_BASE_NPU_H_
|
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CONVOLUTION_BASE_NPU_H_
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <memory>
|
||||||
#include "include/graph/op/all_ops.h"
|
#include "include/graph/op/all_ops.h"
|
||||||
#include "src/runtime/kernel/npu/transpose_base_npu.h"
|
#include "src/runtime/kernel/npu/transpose_base_npu.h"
|
||||||
#include "nnacl/conv_parameter.h"
|
#include "nnacl/conv_parameter.h"
|
||||||
|
|
||||||
namespace mindspore::kernel {
|
namespace mindspore::kernel {
|
||||||
class ConvolutionBaseNPUKernel : public TransposeBaseNPUKernel {
|
class ConvolutionBaseNPUKernel : public NPUKernel {
|
||||||
public:
|
public:
|
||||||
ConvolutionBaseNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
ConvolutionBaseNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||||
const mindspore::lite::PrimitiveC *primitive)
|
const mindspore::lite::PrimitiveC *primitive)
|
||||||
: TransposeBaseNPUKernel(parameter, inputs, outputs, ctx, primitive) {}
|
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||||
~ConvolutionBaseNPUKernel() override;
|
~ConvolutionBaseNPUKernel() override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -25,7 +25,7 @@ using mindspore::schema::PrimitiveType_DepthwiseConv2D;
|
||||||
namespace mindspore::kernel {
|
namespace mindspore::kernel {
|
||||||
int ConvolutionDepthwiseNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs,
|
int ConvolutionDepthwiseNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs,
|
||||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) {
|
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) {
|
||||||
return RET_ERROR;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ConvolutionDepthwiseNPUKernel::SetConvDwParam() {
|
int ConvolutionDepthwiseNPUKernel::SetConvDwParam() {
|
||||||
|
@ -49,19 +49,13 @@ int ConvolutionDepthwiseNPUKernel::SetConvDwParam() {
|
||||||
int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
|
int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
|
||||||
const std::vector<lite::Tensor *> &outputs,
|
const std::vector<lite::Tensor *> &outputs,
|
||||||
const std::vector<ge::Operator *> &npu_inputs) {
|
const std::vector<ge::Operator *> &npu_inputs) {
|
||||||
auto ret = SetPreTranspose(npu_inputs[0]);
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed.";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
// set conv attr param
|
// set conv attr param
|
||||||
conv_dw_ = new (std::nothrow) hiai::op::ConvolutionDepthwise(name_ + "_conv_depthwise");
|
conv_dw_ = new (std::nothrow) hiai::op::ConvolutionDepthwise(name_ + "_conv_depthwise");
|
||||||
if (conv_dw_ == nullptr) {
|
if (conv_dw_ == nullptr) {
|
||||||
MS_LOG(ERROR) << "New convolution depthwise operator for op " << name_ << " failed.";
|
MS_LOG(ERROR) << "New convolution depthwise operator for op " << name_ << " failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
ret = SetConvDwParam();
|
auto ret = SetConvDwParam();
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "Set npu op parameter for convolution depthwise op " << name_ << " failed.";
|
MS_LOG(ERROR) << "Set npu op parameter for convolution depthwise op " << name_ << " failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
|
@ -76,7 +70,7 @@ int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *
|
||||||
if (inputs.size() == 3) {
|
if (inputs.size() == 3) {
|
||||||
conv_dw_->set_input_bias(*bias_);
|
conv_dw_->set_input_bias(*bias_);
|
||||||
}
|
}
|
||||||
conv_dw_->set_input_x(*pre_trans_);
|
conv_dw_->set_input_x(*npu_inputs[0]);
|
||||||
|
|
||||||
if (conv_param_->act_type_ != ActType_No) {
|
if (conv_param_->act_type_ != ActType_No) {
|
||||||
ret = SetActivation(conv_dw_, conv_param_->act_type_);
|
ret = SetActivation(conv_dw_, conv_param_->act_type_);
|
||||||
|
@ -85,20 +79,16 @@ int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (conv_param_->act_type_ == ActType_No) {
|
|
||||||
ret = SetPostTranspose(conv_dw_);
|
|
||||||
} else {
|
|
||||||
ret = SetPostTranspose(act_);
|
|
||||||
}
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed.";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
ge::Operator *mindspore::kernel::ConvolutionDepthwiseNPUKernel::GetNPUOp() { return post_trans_; }
|
ge::Operator *mindspore::kernel::ConvolutionDepthwiseNPUKernel::GetNPUOp() {
|
||||||
|
if (conv_param_->act_type_ == ActType_No) {
|
||||||
|
return conv_dw_;
|
||||||
|
} else {
|
||||||
|
return act_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ConvolutionDepthwiseNPUKernel::~ConvolutionDepthwiseNPUKernel() {
|
ConvolutionDepthwiseNPUKernel::~ConvolutionDepthwiseNPUKernel() {
|
||||||
if (conv_dw_ != nullptr) {
|
if (conv_dw_ != nullptr) {
|
||||||
|
|
|
@ -24,7 +24,7 @@ using mindspore::schema::PrimitiveType_Conv2D;
|
||||||
namespace mindspore::kernel {
|
namespace mindspore::kernel {
|
||||||
int ConvolutionNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs,
|
int ConvolutionNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs,
|
||||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) {
|
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) {
|
||||||
return RET_ERROR;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ConvolutionNPUKernel::SetConvParam() {
|
int ConvolutionNPUKernel::SetConvParam() {
|
||||||
|
@ -49,19 +49,13 @@ int ConvolutionNPUKernel::SetConvParam() {
|
||||||
int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
|
int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
|
||||||
const std::vector<lite::Tensor *> &outputs,
|
const std::vector<lite::Tensor *> &outputs,
|
||||||
const std::vector<ge::Operator *> &npu_inputs) {
|
const std::vector<ge::Operator *> &npu_inputs) {
|
||||||
auto ret = SetPreTranspose(npu_inputs[0]);
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed.";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
// set conv attr param
|
// set conv attr param
|
||||||
conv_ = new (std::nothrow) hiai::op::Convolution(name_ + "_conv");
|
conv_ = new (std::nothrow) hiai::op::Convolution(name_ + "_conv");
|
||||||
if (conv_ == nullptr) {
|
if (conv_ == nullptr) {
|
||||||
MS_LOG(ERROR) << "New convolution operator for convolution op " << name_ << " failed.";
|
MS_LOG(ERROR) << "New convolution operator for convolution op " << name_ << " failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
ret = SetConvParam();
|
auto ret = SetConvParam();
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "Set npu op parameter for convolution op " << name_ << " failed.";
|
MS_LOG(ERROR) << "Set npu op parameter for convolution op " << name_ << " failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
|
@ -76,7 +70,7 @@ int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs
|
||||||
if (inputs.size() == 3) {
|
if (inputs.size() == 3) {
|
||||||
conv_->set_input_bias(*bias_);
|
conv_->set_input_bias(*bias_);
|
||||||
}
|
}
|
||||||
conv_->set_input_x(*pre_trans_);
|
conv_->set_input_x(*npu_inputs[0]);
|
||||||
|
|
||||||
if (conv_param_->act_type_ != ActType_No) {
|
if (conv_param_->act_type_ != ActType_No) {
|
||||||
ret = SetActivation(conv_, conv_param_->act_type_);
|
ret = SetActivation(conv_, conv_param_->act_type_);
|
||||||
|
@ -85,20 +79,16 @@ int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (conv_param_->act_type_ == ActType_No) {
|
|
||||||
ret = SetPostTranspose(conv_);
|
|
||||||
} else {
|
|
||||||
ret = SetPostTranspose(act_);
|
|
||||||
}
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed.";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
ge::Operator *mindspore::kernel::ConvolutionNPUKernel::GetNPUOp() { return post_trans_; }
|
ge::Operator *mindspore::kernel::ConvolutionNPUKernel::GetNPUOp() {
|
||||||
|
if (conv_param_->act_type_ == ActType_No) {
|
||||||
|
return conv_;
|
||||||
|
} else {
|
||||||
|
return act_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ConvolutionNPUKernel::~ConvolutionNPUKernel() {
|
ConvolutionNPUKernel::~ConvolutionNPUKernel() {
|
||||||
if (conv_ != nullptr) {
|
if (conv_ != nullptr) {
|
||||||
|
|
|
@ -62,23 +62,17 @@ int PoolingNPUKernel::SetPoolingParam() {
|
||||||
int PoolingNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
|
int PoolingNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
|
||||||
const std::vector<lite::Tensor *> &outputs,
|
const std::vector<lite::Tensor *> &outputs,
|
||||||
const std::vector<ge::Operator *> &npu_inputs) {
|
const std::vector<ge::Operator *> &npu_inputs) {
|
||||||
auto ret = SetPreTranspose(npu_inputs[0]);
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed.";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
pooling_ = new (std::nothrow) hiai::op::PoolingD(name_ + "_pooling");
|
pooling_ = new (std::nothrow) hiai::op::PoolingD(name_ + "_pooling");
|
||||||
if (pooling_ == nullptr) {
|
if (pooling_ == nullptr) {
|
||||||
MS_LOG(ERROR) << "New pooling npu operator for op " << name_ << " failed.";
|
MS_LOG(ERROR) << "New pooling npu operator for op " << name_ << " failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
ret = SetPoolingParam();
|
auto ret = SetPoolingParam();
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "Set npu op parameter for convolution op " << name_ << " failed.";
|
MS_LOG(ERROR) << "Set npu op parameter for convolution op " << name_ << " failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
pooling_->set_input_x(*pre_trans_);
|
pooling_->set_input_x(*npu_inputs[0]);
|
||||||
|
|
||||||
if (pooling_param_->act_type_ != ActType_No) {
|
if (pooling_param_->act_type_ != ActType_No) {
|
||||||
ret = SetActivation(pooling_, pooling_param_->act_type_);
|
ret = SetActivation(pooling_, pooling_param_->act_type_);
|
||||||
|
@ -87,20 +81,16 @@ int PoolingNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pooling_param_->act_type_ == ActType_No) {
|
|
||||||
ret = SetPostTranspose(pooling_);
|
|
||||||
} else {
|
|
||||||
ret = SetPostTranspose(act_);
|
|
||||||
}
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed.";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
ge::Operator *mindspore::kernel::PoolingNPUKernel::GetNPUOp() { return post_trans_; }
|
ge::Operator *mindspore::kernel::PoolingNPUKernel::GetNPUOp() {
|
||||||
|
if (pooling_param_->act_type_ == ActType_No) {
|
||||||
|
return pooling_;
|
||||||
|
} else {
|
||||||
|
return act_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
PoolingNPUKernel::~PoolingNPUKernel() {
|
PoolingNPUKernel::~PoolingNPUKernel() {
|
||||||
if (pooling_ != nullptr) {
|
if (pooling_ != nullptr) {
|
||||||
|
|
|
@ -36,12 +36,6 @@ int ResizeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const
|
||||||
|
|
||||||
int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
|
int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
|
||||||
const std::vector<ge::Operator *> &npu_inputs) {
|
const std::vector<ge::Operator *> &npu_inputs) {
|
||||||
auto ret = SetPreTranspose(npu_inputs[0]);
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed.";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
ge::TensorDesc sizeTensorDesc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_INT32);
|
ge::TensorDesc sizeTensorDesc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_INT32);
|
||||||
ge::TensorPtr sizeTensor = std::make_shared<hiai::Tensor>(sizeTensorDesc);
|
ge::TensorPtr sizeTensor = std::make_shared<hiai::Tensor>(sizeTensorDesc);
|
||||||
vector<int32_t> dataValue = {static_cast<int32_t>(new_height_), static_cast<int32_t>(new_width_)};
|
vector<int32_t> dataValue = {static_cast<int32_t>(new_height_), static_cast<int32_t>(new_width_)};
|
||||||
|
@ -55,7 +49,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
op->set_attr_align_corners(align_corners_);
|
op->set_attr_align_corners(align_corners_);
|
||||||
op->set_input_x(*pre_trans_);
|
op->set_input_x(*npu_inputs[0]);
|
||||||
op->set_input_size(*out_size);
|
op->set_input_size(*out_size);
|
||||||
op->set_attr_half_pixel_centers(preserve_aspect_ratio_);
|
op->set_attr_half_pixel_centers(preserve_aspect_ratio_);
|
||||||
op_ = op;
|
op_ = op;
|
||||||
|
@ -66,21 +60,14 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
op->set_attr_align_corners(align_corners_);
|
op->set_attr_align_corners(align_corners_);
|
||||||
op->set_input_x(*pre_trans_);
|
op->set_input_x(*npu_inputs[0]);
|
||||||
op->set_input_size(*out_size);
|
op->set_input_size(*out_size);
|
||||||
op_ = op;
|
op_ = op;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = SetPostTranspose(op_);
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed.";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
ge::Operator *mindspore::kernel::ResizeNPUKernel::GetNPUOp() { return this->post_trans_; }
|
ge::Operator *mindspore::kernel::ResizeNPUKernel::GetNPUOp() { return this->op_; }
|
||||||
|
|
||||||
ResizeNPUKernel::~ResizeNPUKernel() {
|
ResizeNPUKernel::~ResizeNPUKernel() {
|
||||||
if (op_ != nullptr) {
|
if (op_ != nullptr) {
|
||||||
|
|
|
@ -24,12 +24,12 @@
|
||||||
#include "include/graph/op/all_ops.h"
|
#include "include/graph/op/all_ops.h"
|
||||||
#include "src/runtime/kernel/npu/transpose_base_npu.h"
|
#include "src/runtime/kernel/npu/transpose_base_npu.h"
|
||||||
namespace mindspore::kernel {
|
namespace mindspore::kernel {
|
||||||
class ResizeNPUKernel : public TransposeBaseNPUKernel {
|
class ResizeNPUKernel : public NPUKernel {
|
||||||
public:
|
public:
|
||||||
ResizeNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
ResizeNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||||
const mindspore::lite::PrimitiveC *primitive)
|
const mindspore::lite::PrimitiveC *primitive)
|
||||||
: TransposeBaseNPUKernel(parameter, inputs, outputs, ctx, primitive) {
|
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||||
auto resize_parameter = reinterpret_cast<ResizeParameter *>(parameter);
|
auto resize_parameter = reinterpret_cast<ResizeParameter *>(parameter);
|
||||||
method_ = resize_parameter->method_;
|
method_ = resize_parameter->method_;
|
||||||
new_height_ = resize_parameter->new_height_;
|
new_height_ = resize_parameter->new_height_;
|
||||||
|
|
|
@ -33,6 +33,8 @@
|
||||||
#if SUPPORT_NPU
|
#if SUPPORT_NPU
|
||||||
#include "src/runtime/agent/npu/subgraph_npu_kernel.h"
|
#include "src/runtime/agent/npu/subgraph_npu_kernel.h"
|
||||||
#include "src/runtime/agent/npu/npu_manager.h"
|
#include "src/runtime/agent/npu/npu_manager.h"
|
||||||
|
#include "src/runtime/agent/npu/npu_transform_pass.h"
|
||||||
|
#include "src/runtime/agent/npu/npu_fusion_pass.h"
|
||||||
#endif
|
#endif
|
||||||
namespace mindspore::lite {
|
namespace mindspore::lite {
|
||||||
using kernel::KERNEL_ARCH::kCPU;
|
using kernel::KERNEL_ARCH::kCPU;
|
||||||
|
@ -63,6 +65,11 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
FindAllInoutKernels(*dst_kernels);
|
FindAllInoutKernels(*dst_kernels);
|
||||||
|
ret = RunPass(dst_kernels);
|
||||||
|
if (ret != RET_OK) {
|
||||||
|
MS_LOG(ERROR) << "Schedule run pass failed.";
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
ret = ConstructSubGraphs(dst_kernels);
|
ret = ConstructSubGraphs(dst_kernels);
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "ConstructSubGraphs failed.";
|
MS_LOG(ERROR) << "ConstructSubGraphs failed.";
|
||||||
|
@ -514,4 +521,25 @@ void Scheduler::FindAllInoutKernels(const std::vector<kernel::LiteKernel *> &ker
|
||||||
kernel->FindInoutKernels(kernels);
|
kernel->FindInoutKernels(kernels);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int Scheduler::RunPass(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
||||||
|
int ret = RET_OK;
|
||||||
|
#if SUPPORT_NPU
|
||||||
|
auto transform_pass = new NPUTransformPass;
|
||||||
|
ret = transform_pass->FormatTransformPass(context_, dst_kernels, &src_tensors_);
|
||||||
|
delete transform_pass;
|
||||||
|
if (ret != RET_OK) {
|
||||||
|
MS_LOG(ERROR) << "Run npu format transform pass failed.";
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
auto fusion_pass = new NPUFusionPass(dst_kernels);
|
||||||
|
ret = fusion_pass->Fusion();
|
||||||
|
if (ret != RET_OK) {
|
||||||
|
MS_LOG(ERROR) << "Run npu fussion transform pass failed.";
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
delete fusion_pass;
|
||||||
|
#endif
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
} // namespace mindspore::lite
|
} // namespace mindspore::lite
|
||||||
|
|
|
@ -77,6 +77,8 @@ class Scheduler {
|
||||||
|
|
||||||
static kernel::SubGraphType GetKernelSubGraphType(const kernel::LiteKernel *kernel);
|
static kernel::SubGraphType GetKernelSubGraphType(const kernel::LiteKernel *kernel);
|
||||||
|
|
||||||
|
int RunPass(std::vector<kernel::LiteKernel *> *dst_kernels);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const InnerContext *context_ = nullptr;
|
const InnerContext *context_ = nullptr;
|
||||||
Model *src_model_ = nullptr;
|
Model *src_model_ = nullptr;
|
||||||
|
|
Loading…
Reference in New Issue