forked from mindspore-Ecosystem/mindspore
run mobilenet_v2 success
This commit is contained in:
parent
a3d4dded12
commit
d45b5b5126
|
@ -95,6 +95,8 @@ class LiteKernel {
|
|||
|
||||
virtual int Init() { return mindspore::lite::RET_ERROR; }
|
||||
|
||||
OpParameter *op_parameter() { return op_parameter_; }
|
||||
|
||||
std::string name() const { return this->name_; }
|
||||
|
||||
virtual int Train() {
|
||||
|
|
|
@ -479,12 +479,6 @@ int LiteSession::Init(const Context *context) {
|
|||
is_running_.store(false);
|
||||
return ret;
|
||||
}
|
||||
ret = InitNPURuntime();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init NPU runtime failed.";
|
||||
is_running_.store(false);
|
||||
return ret;
|
||||
}
|
||||
executor_ = new (std::nothrow) Executor();
|
||||
if (nullptr == executor_) {
|
||||
MS_LOG(ERROR) << "New Executor failed";
|
||||
|
@ -661,18 +655,6 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int LiteSession::InitNPURuntime() {
|
||||
#if SUPPORT_NPU
|
||||
if (this->context_->IsNpuEnabled()) {
|
||||
if (mindspore::lite::NPUManager::GetInstance()->InitClient() != RET_OK) {
|
||||
MS_LOG(ERROR) << "NPU client init error.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LiteSession::InitGPURuntime() {
|
||||
#if SUPPORT_GPU
|
||||
if (this->context_->IsGpuEnabled()) {
|
||||
|
|
|
@ -103,8 +103,6 @@ class LiteSession : public session::LiteSession {
|
|||
private:
|
||||
void ResetInputsShape(const std::vector<std::vector<int>> &dims);
|
||||
|
||||
int InitNPURuntime();
|
||||
|
||||
int InitGPURuntime();
|
||||
|
||||
protected:
|
||||
|
|
|
@ -17,10 +17,9 @@
|
|||
#include "src/runtime/agent/npu/npu_executor.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "src/runtime/agent/npu/npu_manager.h"
|
||||
#include "nnacl/pack.h"
|
||||
namespace mindspore::lite {
|
||||
int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
|
||||
this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient();
|
||||
this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(model_name_);
|
||||
if (this->client_ == nullptr) {
|
||||
MS_LOG(ERROR) << "client is nullptr.";
|
||||
return RET_ERROR;
|
||||
|
@ -33,9 +32,8 @@ int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
|
|||
}
|
||||
|
||||
int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const std::vector<kernel::LiteKernel *> &kernels, const std::vector<bool> &inputs_nhwc2nchw,
|
||||
const std::vector<bool> &outputs_nchw2nhwc, Allocator *allocator, const KernelCallBack &before,
|
||||
const KernelCallBack &after) {
|
||||
const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator,
|
||||
const KernelCallBack &before, const KernelCallBack &after) {
|
||||
hiai::AiContext context;
|
||||
for (int i = 0; i < npu_input_tensors_.size(); ++i) {
|
||||
void *data = in_tensors[i]->data_c();
|
||||
|
@ -43,12 +41,7 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<
|
|||
MS_LOG(ERROR) << model_name_ << " inputs data is nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (inputs_nhwc2nchw[i]) {
|
||||
PackNHWCToNCHWFp32(data, npu_input_tensors_[i]->GetBuffer(), in_tensors[i]->Batch(),
|
||||
in_tensors[i]->Width() * in_tensors[i]->Height(), in_tensors[i]->Channel());
|
||||
} else {
|
||||
memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[i]->Size());
|
||||
}
|
||||
memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[i]->Size());
|
||||
}
|
||||
context.AddPara("model_name", model_name_);
|
||||
if (this->client_ == nullptr) {
|
||||
|
@ -68,12 +61,7 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<
|
|||
MS_LOG(ERROR) << "Malloc buffer failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (outputs_nchw2nhwc[i]) {
|
||||
PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data, out_tensors[i]->Batch(),
|
||||
out_tensors[i]->Width() * out_tensors[i]->Height(), out_tensors[i]->Channel());
|
||||
} else {
|
||||
memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
|
||||
}
|
||||
memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
|
||||
out_tensors[i]->ResetRefCount();
|
||||
}
|
||||
return RET_OK;
|
||||
|
|
|
@ -32,8 +32,7 @@ class NPUExecutor : public Executor {
|
|||
int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override;
|
||||
|
||||
int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const std::vector<kernel::LiteKernel *> &kernels, const std::vector<bool> &inputs_nhwc2nchw,
|
||||
const std::vector<bool> &outputs_nchw2nhwc, Allocator *allocator = nullptr,
|
||||
const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator = nullptr,
|
||||
const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr);
|
||||
|
||||
private:
|
||||
|
|
|
@ -0,0 +1,224 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/agent/npu/npu_fusion_pass.h"
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "nnacl/concat_parameter.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
bool CheckFusion(kernel::LiteKernel *kernel) {
|
||||
auto pre_flag =
|
||||
std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *kernel) {
|
||||
return kernel->Type() == schema::PrimitiveType_Nchw2Nhwc && kernel->out_kernels().size() == 1;
|
||||
});
|
||||
if (!pre_flag) {
|
||||
return false;
|
||||
}
|
||||
auto post_flag =
|
||||
std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) {
|
||||
return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw && kernel->in_kernels().size() == 1;
|
||||
});
|
||||
return post_flag;
|
||||
}
|
||||
|
||||
void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) {
|
||||
for (auto in_kernel : cur_kernel->in_kernels()) {
|
||||
auto pre_kernel = in_kernel->in_kernels()[0];
|
||||
|
||||
auto pre_out_kernels = pre_kernel->out_kernels();
|
||||
for (size_t i = 0; i < pre_out_kernels.size(); i++) {
|
||||
if (pre_out_kernels[i] == in_kernel) {
|
||||
pre_out_kernels[i] = cur_kernel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
pre_kernel->set_out_kernels(pre_out_kernels);
|
||||
|
||||
auto cur_in_kernels = cur_kernel->in_kernels();
|
||||
for (size_t i = 0; i < cur_in_kernels.size(); i++) {
|
||||
if (cur_in_kernels[i] == in_kernel) {
|
||||
cur_in_kernels[i] = pre_kernel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
cur_kernel->set_in_kernels(cur_in_kernels);
|
||||
kernels->erase(find(kernels->begin(), kernels->end(), in_kernel));
|
||||
}
|
||||
}
|
||||
|
||||
void NPUFusionPass::UpdatePostKernels(kernel::LiteKernel *cur_kernel) {
|
||||
for (auto out_kernel : cur_kernel->out_kernels()) {
|
||||
auto post_kernel = out_kernel->out_kernels()[0];
|
||||
|
||||
auto post_in_kernels = post_kernel->in_kernels();
|
||||
for (size_t i = 0; i < post_in_kernels.size(); i++) {
|
||||
if (post_in_kernels[i] == out_kernel) {
|
||||
post_in_kernels[i] = cur_kernel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
post_kernel->set_in_kernels(post_in_kernels);
|
||||
|
||||
auto cur_out_kernels = cur_kernel->out_kernels();
|
||||
for (size_t i = 0; i < cur_out_kernels.size(); i++) {
|
||||
if (cur_out_kernels[i] == out_kernel) {
|
||||
cur_out_kernels[i] = post_kernel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
cur_kernel->set_out_kernels(cur_out_kernels);
|
||||
kernels->erase(find(kernels->begin(), kernels->end(), out_kernel));
|
||||
}
|
||||
}
|
||||
|
||||
void UpdatePreTensors(kernel::LiteKernel *cur_kernel) {
|
||||
auto tensors_vec = cur_kernel->in_tensors();
|
||||
for (auto in_kernel : cur_kernel->in_kernels()) {
|
||||
lite::Tensor *cur_tensor = nullptr;
|
||||
auto in_tensor = in_kernel->in_tensors()[0];
|
||||
auto out_tensor = in_kernel->out_tensors()[0];
|
||||
auto pre_kernel = in_kernel->in_kernels()[0];
|
||||
for (size_t i = 0; i < pre_kernel->out_tensors().size(); i++) {
|
||||
if (pre_kernel->out_tensors()[i] == in_tensor) {
|
||||
cur_tensor = pre_kernel->out_tensors()[i];
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < tensors_vec.size(); i++) {
|
||||
if (tensors_vec[i] == out_tensor) {
|
||||
tensors_vec[i] = cur_tensor;
|
||||
}
|
||||
}
|
||||
}
|
||||
cur_kernel->set_in_tensors(tensors_vec);
|
||||
}
|
||||
|
||||
void UpdatePostTensors(kernel::LiteKernel *cur_kernel) {
|
||||
auto tensors_vec = cur_kernel->out_tensors();
|
||||
for (auto out_kernel : cur_kernel->out_kernels()) {
|
||||
auto in_tensor = out_kernel->in_tensors()[0];
|
||||
auto out_tensor = out_kernel->out_tensors()[0];
|
||||
auto post_kernel = out_kernel->out_kernels()[0];
|
||||
lite::Tensor *cur_tensor = nullptr;
|
||||
for (size_t i = 0; i < post_kernel->in_tensors().size(); i++) {
|
||||
if (post_kernel->in_tensors()[i] == out_tensor) {
|
||||
cur_tensor = post_kernel->in_tensors()[i];
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < tensors_vec.size(); i++) {
|
||||
if (tensors_vec[i] == in_tensor) {
|
||||
tensors_vec[i] = cur_tensor;
|
||||
}
|
||||
}
|
||||
}
|
||||
cur_kernel->set_out_tensors(tensors_vec);
|
||||
}
|
||||
|
||||
int TransFormAxis(int axis) {
|
||||
switch (axis) {
|
||||
case 0:
|
||||
return 0;
|
||||
case 1:
|
||||
return 2;
|
||||
case 2:
|
||||
return 3;
|
||||
case 3:
|
||||
case -1:
|
||||
return 1;
|
||||
default:
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
|
||||
int NPUFusionPass::AddFusion(kernel::LiteKernel *kernel) {
|
||||
if (!CheckFusion(kernel)) {
|
||||
return RET_OK;
|
||||
}
|
||||
UpdatePreTensors(kernel);
|
||||
UpdatePostTensors(kernel);
|
||||
UpdatePreKernels(kernel);
|
||||
UpdatePostKernels(kernel);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUFusionPass::ConcatFusion(kernel::LiteKernel *kernel) {
|
||||
if (!CheckFusion(kernel)) {
|
||||
return RET_OK;
|
||||
}
|
||||
UpdatePreTensors(kernel);
|
||||
UpdatePostTensors(kernel);
|
||||
UpdatePreKernels(kernel);
|
||||
UpdatePostKernels(kernel);
|
||||
auto concat_param = reinterpret_cast<ConcatParameter *>(kernel->op_parameter());
|
||||
concat_param->axis_ = TransFormAxis(concat_param->axis_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) {
|
||||
if (kernel->out_kernels().empty()) {
|
||||
return RET_OK;
|
||||
}
|
||||
if (!std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) {
|
||||
return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw;
|
||||
})) {
|
||||
return RET_OK;
|
||||
}
|
||||
auto pre_kernel = kernel->in_kernels()[0];
|
||||
|
||||
auto pre_out_kernels = pre_kernel->out_kernels();
|
||||
for (size_t i = 0; i < pre_out_kernels.size(); i++) {
|
||||
if (pre_out_kernels[i] == kernel) {
|
||||
pre_out_kernels.erase(pre_out_kernels.begin() + i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (const auto &nc2nh : kernel->out_kernels()) {
|
||||
for (const auto &post_kernel : nc2nh->out_kernels()) {
|
||||
auto post_in_kernels = post_kernel->in_kernels();
|
||||
for (size_t i = 0; i < post_in_kernels.size(); i++) {
|
||||
if (post_in_kernels[i] == nc2nh) {
|
||||
post_in_kernels[i] = pre_kernel;
|
||||
break;
|
||||
}
|
||||
}
|
||||
post_kernel->set_in_kernels(post_in_kernels);
|
||||
pre_out_kernels.push_back(post_kernel);
|
||||
}
|
||||
kernels->erase(find(kernels->begin(), kernels->end(), nc2nh));
|
||||
}
|
||||
pre_kernel->set_out_kernels(pre_out_kernels);
|
||||
kernels->erase(find(kernels->begin(), kernels->end(), kernel));
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUFusionPass::Fusion() {
|
||||
for (auto kernel : *kernels) {
|
||||
switch (kernel->Type()) {
|
||||
case schema::PrimitiveType_Concat:
|
||||
ConcatFusion(kernel);
|
||||
continue;
|
||||
case schema::PrimitiveType_Add:
|
||||
AddFusion(kernel);
|
||||
continue;
|
||||
case schema::PrimitiveType_Nchw2Nhwc:
|
||||
FormatFusion(kernel);
|
||||
continue;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,40 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/ops/primitive_c.h"
|
||||
namespace mindspore::lite {
|
||||
class NPUFusionPass {
|
||||
public:
|
||||
explicit NPUFusionPass(std::vector<kernel::LiteKernel *> *dst_kernels) { kernels = dst_kernels; }
|
||||
~NPUFusionPass() = default;
|
||||
int Fusion();
|
||||
|
||||
protected:
|
||||
int ConcatFusion(kernel::LiteKernel *kernel);
|
||||
int AddFusion(kernel::LiteKernel *kernel);
|
||||
int FormatFusion(kernel::LiteKernel *kernel);
|
||||
void UpdatePreKernels(kernel::LiteKernel *kernel);
|
||||
void UpdatePostKernels(kernel::LiteKernel *kernel);
|
||||
|
||||
private:
|
||||
std::vector<kernel::LiteKernel *> *kernels;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
|
|
@ -15,21 +15,59 @@
|
|||
*/
|
||||
|
||||
#include "src/runtime/agent/npu/npu_manager.h"
|
||||
#include <sys/system_properties.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include "include/hiai_ir_build.h"
|
||||
#include "include/HiAiModelManagerService.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "include/graph/op/all_ops.h"
|
||||
#include "src/common/file_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
|
||||
bool NPUManager::IsSupportNPU() {
|
||||
if (!is_npu_check_executor) {
|
||||
CheckSupportNPU();
|
||||
#define MAX_MODEL_NUM 20
|
||||
int NPUManager::CompareVersion(const string &version1, const string &version2) {
|
||||
std::istringstream iss1(version1);
|
||||
std::istringstream iss2(version2);
|
||||
string string1;
|
||||
string string2;
|
||||
while (!iss1.eof() || !iss2.eof()) {
|
||||
getline(iss1, string1, '.');
|
||||
getline(iss2, string2, '.');
|
||||
if (stoi(string1) > stoi(string2)) return 1;
|
||||
if (stoi(string1) < stoi(string2)) return -1;
|
||||
string1 = string2 = "0";
|
||||
}
|
||||
if (is_support_npu) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool NPUManager::CheckEMUIVersion() {
|
||||
char emui[128] = {0x00};
|
||||
__system_property_get("ro.build.version.emui", emui);
|
||||
std::string emui_str = emui;
|
||||
int pos = emui_str.find('_');
|
||||
if (pos != std::string::npos) {
|
||||
auto version = emui_str.substr(pos + 1);
|
||||
int ret = CompareVersion(version, "11.0.0");
|
||||
if (ret < 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NPUManager::CheckDDKVersion() {
|
||||
auto client = std::make_shared<hiai::AiModelMngerClient>();
|
||||
if (client->GetVersion() != nullptr) {
|
||||
std::string version = client->GetVersion();
|
||||
int ret = CompareVersion(version, "100.330.010.011");
|
||||
if (ret < 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool NPUManager::IsSupportNPU() {
|
||||
if (IsKirinChip() && CheckEMUIVersion() && CheckDDKVersion()) {
|
||||
MS_LOG(INFO) << "The current device support NPU.";
|
||||
return true;
|
||||
} else {
|
||||
|
@ -38,36 +76,6 @@ bool NPUManager::IsSupportNPU() {
|
|||
}
|
||||
}
|
||||
|
||||
std::string NPUManager::GetExecutorPath() {
|
||||
std::string executor_path;
|
||||
char cmdline[1024] = {0};
|
||||
int fd = open("/proc/self/cmdline", O_RDONLY);
|
||||
if (fd >= 0) {
|
||||
char ch;
|
||||
int i = 0;
|
||||
while (read(fd, &ch, sizeof(ch)) > 0 && !isspace(ch)) {
|
||||
if (':' == ch) {
|
||||
break;
|
||||
}
|
||||
cmdline[i] = ch;
|
||||
i++;
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
executor_path = std::string(cmdline);
|
||||
if (executor_path.empty()) {
|
||||
executor_path = "./";
|
||||
}
|
||||
// android
|
||||
if (executor_path.substr(0, 11) == "/data/data/") {
|
||||
executor_path = executor_path + '/';
|
||||
} else {
|
||||
// Linux
|
||||
executor_path = executor_path.substr(0, executor_path.rfind('/')) + "/";
|
||||
}
|
||||
return executor_path;
|
||||
}
|
||||
|
||||
bool NPUManager::IsKirinChip() {
|
||||
std::ifstream cpu_info("/proc/cpuinfo");
|
||||
if (!(cpu_info.good() && cpu_info.is_open())) {
|
||||
|
@ -96,86 +104,6 @@ bool NPUManager::IsKirinChip() {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool WriteToOMFile(domi::ModelBufferData om_model_buff, const std::string &om_file_path) {
|
||||
FILE *fp;
|
||||
fp = fopen(om_file_path.c_str(), "wb");
|
||||
if (fp == nullptr) {
|
||||
MS_LOG(ERROR) << om_file_path.c_str() << " open failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
auto write_size = (uint32_t)fwrite(om_model_buff.data, 1, om_model_buff.length, fp);
|
||||
if (write_size != om_model_buff.length) {
|
||||
fclose(fp);
|
||||
MS_LOG(ERROR) << "Write om file failed.";
|
||||
return false;
|
||||
}
|
||||
fclose(fp);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NPUManager::CheckOmBuildIr(const std::string &path) {
|
||||
// build test om model
|
||||
std::shared_ptr<hiai::op::Add> add_op(new (std::nothrow) hiai::op::Add("add"));
|
||||
if (add_op == nullptr) {
|
||||
MS_LOG(ERROR) << "new add_op failed.";
|
||||
return false;
|
||||
}
|
||||
ge::TensorDesc desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT);
|
||||
std::shared_ptr<hiai::op::Data> data = std::make_shared<hiai::op::Data>("data");
|
||||
data->update_input_desc_x(desc);
|
||||
add_op->set_input_x1(*data);
|
||||
add_op->set_input_x2(*data);
|
||||
domi::HiaiIrBuild ir_build;
|
||||
ge::Graph ir_graph("graph");
|
||||
std::vector<ge::Operator> inputs{*data, *data};
|
||||
std::vector<ge::Operator> outputs{*add_op};
|
||||
ir_graph.SetInputs(inputs).SetOutputs(outputs);
|
||||
ge::Model om_model("test_model", "test_version");
|
||||
om_model.SetGraph(ir_graph);
|
||||
|
||||
domi::ModelBufferData om_model_buff;
|
||||
if (!ir_build.CreateModelBuff(om_model, om_model_buff)) {
|
||||
MS_LOG(ERROR) << "Create model buffer failed.";
|
||||
return false;
|
||||
}
|
||||
if (!ir_build.BuildIRModel(om_model, om_model_buff)) {
|
||||
MS_LOG(ERROR) << "Build IR model failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
// save test om model
|
||||
remove(path.c_str());
|
||||
bool ret = WriteToOMFile(om_model_buff, path);
|
||||
ir_build.ReleaseModelBuff(om_model_buff);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void NPUManager::CheckSupportNPU() {
|
||||
is_npu_check_executor = true;
|
||||
std::string path_string = GetExecutorPath();
|
||||
|
||||
std::string test_model_path = path_string + "/mindspore_lite_test_npu.om";
|
||||
std::ifstream ifs(test_model_path);
|
||||
if (ifs.good() && ifs.is_open()) {
|
||||
ifs.close();
|
||||
is_support_npu = true;
|
||||
return;
|
||||
}
|
||||
if (!IsKirinChip()) {
|
||||
MS_LOG(ERROR) << "The current device chip NOT SUPPORT NPU";
|
||||
is_support_npu = false;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!CheckOmBuildIr(test_model_path)) {
|
||||
MS_LOG(ERROR) << "Build OM IR error.";
|
||||
is_support_npu = false;
|
||||
return;
|
||||
}
|
||||
is_support_npu = true;
|
||||
}
|
||||
|
||||
int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency) {
|
||||
hiai::MemBuffer *buffer = mc_builder_->InputMemBufferCreate(model_buf, size);
|
||||
if (buffer == nullptr) {
|
||||
|
@ -188,33 +116,42 @@ int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &mode
|
|||
model_desc_.push_back(desc);
|
||||
mc_builder_->MemBufferDestroy(buffer);
|
||||
|
||||
model_map_.insert({model_name, index_});
|
||||
index_++;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUManager::InitClient() {
|
||||
this->client_ = std::make_shared<hiai::AiModelMngerClient>();
|
||||
if (this->client_ == nullptr) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
int ret = this->client_->Init(nullptr);
|
||||
if (ret != hiai::AI_SUCCESS) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
mc_builder_ = std::make_shared<hiai::AiModelBuilder>(this->client_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUManager::LoadOMModel() {
|
||||
int ret = this->client_->Load(model_desc_);
|
||||
if (ret != hiai::AI_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Client load model failed." << ret;
|
||||
return RET_ERROR;
|
||||
for (int i = 0; i < index_ / MAX_MODEL_NUM + 1; i++) {
|
||||
auto client = std::make_shared<hiai::AiModelMngerClient>();
|
||||
if (client == nullptr) {
|
||||
MS_LOG(ERROR) << "NPU client is nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
int ret = client->Init(nullptr);
|
||||
if (ret != hiai::AI_SUCCESS) {
|
||||
MS_LOG(ERROR) << "NPU client init failed. code is " << ret;
|
||||
return RET_ERROR;
|
||||
}
|
||||
mc_builder_ = std::make_shared<hiai::AiModelBuilder>(client);
|
||||
|
||||
vector<std::shared_ptr<hiai::AiModelDescription>> desc(model_desc_.begin() + i * MAX_MODEL_NUM,
|
||||
((i + 1) * MAX_MODEL_NUM > index_)
|
||||
? model_desc_.begin() + index_
|
||||
: model_desc_.begin() + (i + 1) * MAX_MODEL_NUM);
|
||||
ret = client->Load(desc);
|
||||
if (ret != hiai::AI_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Client load model failed." << ret;
|
||||
return RET_ERROR;
|
||||
}
|
||||
clients_.push_back(client);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient() { return client_; }
|
||||
std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient(const std::string &model_name) {
|
||||
return clients_[model_map_[model_name] / MAX_MODEL_NUM];
|
||||
}
|
||||
|
||||
int NPUManager::index() { return index_; }
|
||||
int NPUManager::index() const { return index_; }
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -14,15 +14,21 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <set>
|
||||
#include "schema/model_generated.h"
|
||||
#include "include/HiAiModelManagerService.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
|
||||
static std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = {
|
||||
schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D,
|
||||
schema::PrimitiveType_DepthwiseConv2D, schema::PrimitiveType_DeDepthwiseConv2D,
|
||||
schema::PrimitiveType_Resize, schema::PrimitiveType_Pooling};
|
||||
class NPUManager {
|
||||
public:
|
||||
static NPUManager *GetInstance() {
|
||||
|
@ -32,8 +38,6 @@ class NPUManager {
|
|||
|
||||
bool IsSupportNPU();
|
||||
|
||||
int InitClient();
|
||||
|
||||
// provide to subgraph to add model.
|
||||
int AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency);
|
||||
|
||||
|
@ -41,18 +45,18 @@ class NPUManager {
|
|||
int LoadOMModel();
|
||||
|
||||
// provide to executor.
|
||||
std::shared_ptr<hiai::AiModelMngerClient> GetClient();
|
||||
std::shared_ptr<hiai::AiModelMngerClient> GetClient(const std::string &model_name);
|
||||
|
||||
int index();
|
||||
int index() const;
|
||||
|
||||
private:
|
||||
void CheckSupportNPU();
|
||||
|
||||
bool IsKirinChip();
|
||||
|
||||
bool CheckOmBuildIr(const std::string &path);
|
||||
bool CheckEMUIVersion();
|
||||
|
||||
std::string GetExecutorPath();
|
||||
bool CheckDDKVersion();
|
||||
|
||||
int CompareVersion(const std::string &version1, const std::string &version2);
|
||||
|
||||
private:
|
||||
int index_ = 0;
|
||||
|
@ -61,12 +65,14 @@ class NPUManager {
|
|||
|
||||
bool is_support_npu = false;
|
||||
|
||||
std::shared_ptr<hiai::AiModelMngerClient> client_ = nullptr;
|
||||
std::vector<std::shared_ptr<hiai::AiModelMngerClient>> clients_;
|
||||
|
||||
std::vector<std::shared_ptr<hiai::AiModelDescription>> model_desc_;
|
||||
|
||||
std::shared_ptr<hiai::AiModelBuilder> mc_builder_ = nullptr;
|
||||
|
||||
std::unordered_map<std::string, int> model_map_;
|
||||
};
|
||||
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/kernel_registry.h"
|
||||
#include "src/ops/nhwc2nchw.h"
|
||||
#include "src/ops/nchw2nhwc.h"
|
||||
#include "src/runtime/agent/npu/npu_pass_utils.h"
|
||||
namespace mindspore::lite {
|
||||
using kernel::KERNEL_ARCH::kCPU;
|
||||
using kernel::KERNEL_ARCH::kNPU;
|
||||
PrimitiveC *NPUPassUtils::CreateNchw2NhwcPrimitive() {
|
||||
flatbuffers::FlatBufferBuilder fbb(1024);
|
||||
auto val_offset = schema::CreateNchw2Nhwc(fbb);
|
||||
auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nchw2Nhwc, val_offset.o);
|
||||
fbb.Finish(prim_offset);
|
||||
auto buf = fbb.GetBufferPointer();
|
||||
if (buf == nullptr) {
|
||||
MS_LOG(ERROR) << "GetBufferPointer return nullptr";
|
||||
fbb.Clear();
|
||||
return nullptr;
|
||||
}
|
||||
auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize()));
|
||||
if (primitive_buf == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc primitive_buf_ failed.";
|
||||
fbb.Clear();
|
||||
return nullptr;
|
||||
}
|
||||
memcpy(primitive_buf, buf, fbb.GetSize());
|
||||
auto *primitive = PrimitiveC::NewPrimitiveC<Nchw2Nhwc>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf));
|
||||
free(primitive_buf);
|
||||
fbb.Clear();
|
||||
return primitive;
|
||||
}
|
||||
|
||||
PrimitiveC *NPUPassUtils::CreateNhwc2NchwPrimitive() {
|
||||
flatbuffers::FlatBufferBuilder fbb(1024);
|
||||
auto val_offset = schema::CreateNhwc2Nchw(fbb);
|
||||
auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nhwc2Nchw, val_offset.o);
|
||||
fbb.Finish(prim_offset);
|
||||
auto buf = fbb.GetBufferPointer();
|
||||
if (buf == nullptr) {
|
||||
MS_LOG(ERROR) << "GetBufferPointer return nullptr";
|
||||
fbb.Clear();
|
||||
return nullptr;
|
||||
}
|
||||
auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize()));
|
||||
if (primitive_buf == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc primitive_buf_ failed.";
|
||||
fbb.Clear();
|
||||
return nullptr;
|
||||
}
|
||||
memcpy(primitive_buf, buf, fbb.GetSize());
|
||||
auto *primitive = PrimitiveC::NewPrimitiveC<Nhwc2Nchw>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf));
|
||||
free(primitive_buf);
|
||||
fbb.Clear();
|
||||
return primitive;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors,
|
||||
const InnerContext *ctx, const std::string &name) {
|
||||
kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nchw2Nhwc};
|
||||
auto nchw2nhwc_primitive = CreateNchw2NhwcPrimitive();
|
||||
auto *nchw2nhwc_kernel =
|
||||
KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nchw2nhwc_primitive, ctx, key);
|
||||
nchw2nhwc_kernel->set_name(name);
|
||||
return nchw2nhwc_kernel;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *NPUPassUtils::CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors,
|
||||
const InnerContext *ctx, const std::string &name) {
|
||||
kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nhwc2Nchw};
|
||||
auto nhwc2nchw_primitive = CreateNhwc2NchwPrimitive();
|
||||
auto *nhwc2nchw_kernel =
|
||||
KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nhwc2nchw_primitive, ctx, key);
|
||||
nhwc2nchw_kernel->set_name(name);
|
||||
return nhwc2nchw_kernel;
|
||||
}
|
||||
|
||||
void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels,
|
||||
const std::vector<kernel::LiteKernel *> &out_kernels,
|
||||
const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors) {
|
||||
kernel->set_in_tensors(in_tensors);
|
||||
kernel->set_out_tensors(out_tensors);
|
||||
kernel->set_in_kernels(in_kernels);
|
||||
kernel->set_out_kernels(out_kernels);
|
||||
}
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,44 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "src/ops/primitive_c.h"
|
||||
#include "src/lite_kernel.h"
|
||||
namespace mindspore::lite {
|
||||
class NPUPassUtils {
|
||||
public:
|
||||
static kernel::LiteKernel *CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors, const InnerContext *ctx,
|
||||
const std::string &name);
|
||||
|
||||
static kernel::LiteKernel *CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors,
|
||||
const std::vector<Tensor *> &out_tensors, const InnerContext *ctx,
|
||||
const std::string &name);
|
||||
|
||||
static void UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels,
|
||||
const std::vector<kernel::LiteKernel *> &out_kernels,
|
||||
const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors);
|
||||
|
||||
private:
|
||||
static PrimitiveC *CreateNchw2NhwcPrimitive();
|
||||
|
||||
static PrimitiveC *CreateNhwc2NchwPrimitive();
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
|
|
@ -0,0 +1,201 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/agent/npu/npu_transform_pass.h"
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/runtime/agent/npu/npu_manager.h"
|
||||
#include "src/runtime/agent/npu/npu_pass_utils.h"
|
||||
namespace mindspore::lite {
|
||||
using kernel::KERNEL_ARCH::kCPU;
|
||||
using kernel::KERNEL_ARCH::kNPU;
|
||||
int NPUTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *after_kernel) {
|
||||
std::vector<kernel::LiteKernel *> out_kernels;
|
||||
|
||||
for (auto out_kernel : kernel->out_kernels()) {
|
||||
if (out_kernel == after_kernel) {
|
||||
out_kernels.push_back(trans_kernel);
|
||||
} else {
|
||||
out_kernels.push_back(out_kernel);
|
||||
}
|
||||
}
|
||||
NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUTransformPass::UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *before_kernel) {
|
||||
std::vector<lite::Tensor *> cur_kernel_in_tensors = {trans_kernel->out_tensors()[0]};
|
||||
for (int i = 1; i < kernel->in_tensors().size(); i++) {
|
||||
cur_kernel_in_tensors.push_back(kernel->in_tensors()[i]);
|
||||
}
|
||||
std::vector<kernel::LiteKernel *> cur_in_kernels = {trans_kernel};
|
||||
for (int i = 0; i < kernel->in_kernels().size(); i++) {
|
||||
auto in_kernel = kernel->in_kernels()[i];
|
||||
if (in_kernel != kernel) {
|
||||
cur_in_kernels.push_back(in_kernel);
|
||||
}
|
||||
}
|
||||
NPUPassUtils::UpdateKernel(kernel, cur_in_kernels, kernel->out_kernels(), cur_kernel_in_tensors,
|
||||
kernel->out_tensors());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it,
|
||||
std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors) {
|
||||
auto kernel = *it;
|
||||
bool is_input_kernel = kernel->in_kernels().empty();
|
||||
if (is_input_kernel || kernel->in_kernels()[0]->desc().arch != kNPU ||
|
||||
npu_trans_nodes.find(kernel->in_kernels()[0]->Type()) == npu_trans_nodes.end()) {
|
||||
kernel::LiteKernel *before_kernel = nullptr;
|
||||
if (!is_input_kernel) {
|
||||
before_kernel = kernel->in_kernels()[0];
|
||||
}
|
||||
// Create pre transform kernel out tensors.
|
||||
std::vector<int> shapes{kernel->in_tensors()[0]->shape()[0], kernel->in_tensors()[0]->shape()[3],
|
||||
kernel->in_tensors()[0]->shape()[1], kernel->in_tensors()[0]->shape()[2]};
|
||||
auto tensor = new Tensor(kernel->in_tensors()[0]->data_type(), shapes, schema::Format_NCHW, Tensor::VAR);
|
||||
std::vector<Tensor *> pre_trans_out_tensors = {tensor};
|
||||
all_tensors->push_back(pre_trans_out_tensors[0]);
|
||||
// Replace the output tensor of the previous node
|
||||
auto name = kernel->name() + "_pre_trans" + "_Nhwc2Nchw_" + std::to_string(total++);
|
||||
auto *pre_trans_kernel =
|
||||
NPUPassUtils::CreateNhwc2NchwKernel({kernel->in_tensors()[0]}, pre_trans_out_tensors, context, name);
|
||||
// Insert Nhwc2Nchw into the front of the current queue
|
||||
all_kernels->push_back(pre_trans_kernel);
|
||||
// Replace the output kernel of the previous node
|
||||
std::vector<kernel::LiteKernel *> pre_trans_in_kernel;
|
||||
if (is_input_kernel) {
|
||||
pre_trans_in_kernel = {};
|
||||
} else {
|
||||
pre_trans_in_kernel = {before_kernel};
|
||||
}
|
||||
NPUPassUtils::UpdateKernel(pre_trans_kernel, pre_trans_in_kernel, {kernel}, {kernel->in_tensors()[0]},
|
||||
pre_trans_out_tensors);
|
||||
|
||||
if (before_kernel != nullptr) {
|
||||
UpdateNH2NCTransNodePreKernel(before_kernel, pre_trans_kernel, kernel);
|
||||
}
|
||||
UpdateNH2NCTransNodeAfterKernel(kernel, pre_trans_kernel, before_kernel);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it,
|
||||
std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors) {
|
||||
auto kernel = *it;
|
||||
// Single output multiple references
|
||||
for (int i = 0; i < kernel->out_kernels().size(); i++) {
|
||||
auto next_kernel = kernel->out_kernels().at(i);
|
||||
if (next_kernel->desc().arch == kNPU && npu_trans_nodes.find(next_kernel->Type()) != npu_trans_nodes.end()) {
|
||||
continue;
|
||||
}
|
||||
// Change format the output of the current kernel nhwc->nchw
|
||||
auto shapes = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[1],
|
||||
kernel->out_tensors()[0]->shape()[2], kernel->out_tensors()[0]->shape()[3]};
|
||||
auto tensor = new Tensor(kernel->out_tensors()[0]->data_type(), shapes, schema::Format_NHWC, Tensor::VAR);
|
||||
std::vector<Tensor *> post_trans_out_tensors = {tensor};
|
||||
all_tensors->push_back(post_trans_out_tensors[0]);
|
||||
// Use the output tensor of the current node as the input tensor of the post-conversion operator
|
||||
auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++);
|
||||
auto *post_trans_kernel =
|
||||
NPUPassUtils::CreateNchw2NhwcKernel(kernel->out_tensors(), post_trans_out_tensors, context, name);
|
||||
// Replace the input tensor of the next node
|
||||
NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {next_kernel}, kernel->out_tensors(),
|
||||
post_trans_out_tensors);
|
||||
// Directly insert in the back, will not affect the topological sort
|
||||
all_kernels->push_back(post_trans_kernel);
|
||||
UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, next_kernel);
|
||||
UpdateNC2NHTransNodeAfterKernel(kernel, post_trans_kernel, next_kernel);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUTransformPass::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *next_kernel) {
|
||||
std::vector<kernel::LiteKernel *> cur_out_kernels;
|
||||
for (auto out_kernel : kernel->out_kernels()) {
|
||||
if (out_kernel == next_kernel) {
|
||||
cur_out_kernels.push_back(trans_kernel);
|
||||
} else {
|
||||
cur_out_kernels.push_back(out_kernel);
|
||||
}
|
||||
}
|
||||
auto kernel_out_tensor = kernel->out_tensors()[0];
|
||||
// Change format the output of the current kernel nhwc->nchw
|
||||
std::vector<int> kernel_out_new_shapes = {kernel_out_tensor->shape()[0], kernel_out_tensor->shape()[3],
|
||||
kernel_out_tensor->shape()[1], kernel_out_tensor->shape()[2]};
|
||||
kernel_out_tensor->set_format(schema::Format_NCHW);
|
||||
kernel_out_tensor->set_shape(kernel_out_new_shapes);
|
||||
NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), cur_out_kernels, kernel->in_tensors(), {kernel_out_tensor});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *next_kernel) {
|
||||
std::vector<Tensor *> next_in_tensors;
|
||||
for (auto next_in_tensor : next_kernel->in_tensors()) {
|
||||
if (next_in_tensor != kernel->out_tensors()[0]) {
|
||||
next_in_tensors.push_back(next_in_tensor);
|
||||
} else {
|
||||
next_in_tensors.push_back(trans_kernel->out_tensors()[0]);
|
||||
}
|
||||
}
|
||||
next_kernel->set_in_tensors(next_in_tensors);
|
||||
std::vector<kernel::LiteKernel *> next_in_kernels;
|
||||
for (auto in_kernel : next_kernel->in_kernels()) {
|
||||
if (in_kernel == kernel) {
|
||||
next_in_kernels.push_back(trans_kernel);
|
||||
} else {
|
||||
next_in_kernels.push_back(in_kernel);
|
||||
}
|
||||
}
|
||||
NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors,
|
||||
next_kernel->out_tensors());
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUTransformPass::FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors) {
|
||||
if (context->IsNpuEnabled()) {
|
||||
std::vector<kernel::LiteKernel *> new_kernels;
|
||||
|
||||
for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) {
|
||||
auto kernel = *it;
|
||||
if (kernel->desc().arch != kNPU) {
|
||||
new_kernels.push_back(kernel);
|
||||
continue;
|
||||
}
|
||||
if (npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) {
|
||||
InsertPreNode(context, it, &new_kernels, all_tensors);
|
||||
new_kernels.push_back(kernel);
|
||||
InsertPostNode(context, it, &new_kernels, all_tensors);
|
||||
} else {
|
||||
new_kernels.push_back(kernel);
|
||||
}
|
||||
}
|
||||
all_kernels->clear();
|
||||
for (int i = 0; i < new_kernels.size(); i++) {
|
||||
all_kernels->push_back(new_kernels[i]);
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,51 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/ops/primitive_c.h"
|
||||
namespace mindspore::lite {
|
||||
class NPUTransformPass {
|
||||
public:
|
||||
int FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors);
|
||||
|
||||
private:
|
||||
int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *after_kernel);
|
||||
|
||||
int UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *before_kernel);
|
||||
|
||||
int UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *after_kernel);
|
||||
|
||||
int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *next_kernel);
|
||||
|
||||
int InsertPreNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it,
|
||||
std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors);
|
||||
|
||||
int InsertPostNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it,
|
||||
std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors);
|
||||
|
||||
private:
|
||||
int total = 0;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
|
|
@ -24,7 +24,6 @@
|
|||
#include "include/graph/model.h"
|
||||
#include "include/hiai_ir_build.h"
|
||||
#include "include/HiAiModelManagerType.h"
|
||||
#include "include/context.h"
|
||||
#include "include/version.h"
|
||||
#include "src/common/utils.h"
|
||||
#include "src/runtime/agent/npu/npu_converter_utils.h"
|
||||
|
@ -34,10 +33,6 @@ namespace mindspore::kernel {
|
|||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
||||
std::set<schema::PrimitiveType> trans_nodes = {schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D,
|
||||
schema::PrimitiveType_DepthwiseConv2D,
|
||||
schema::PrimitiveType_DeDepthwiseConv2D, schema::PrimitiveType_Resize};
|
||||
|
||||
domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() {
|
||||
ge::Graph graph("NPUGraph");
|
||||
|
||||
|
@ -75,8 +70,7 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() {
|
|||
}
|
||||
|
||||
int SubGraphNpuKernel::Run() {
|
||||
return reinterpret_cast<lite::NPUExecutor *>(this->executor_)
|
||||
->Run(in_tensors_, out_tensors_, nodes_, inputs_nhwc2nchw_, outputs_nchw2nhwc_);
|
||||
return reinterpret_cast<lite::NPUExecutor *>(this->executor_)->Run(in_tensors_, out_tensors_, nodes_);
|
||||
}
|
||||
|
||||
int SubGraphNpuKernel::BuildNPUInputOp() {
|
||||
|
@ -88,21 +82,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
|
|||
if (IsSubGraphInputTensor(in_tensor)) {
|
||||
auto tensor_name = node->name() + "_" + std::to_string(count++);
|
||||
hiai::op::Data *data;
|
||||
if (trans_nodes.find(node->Type()) != trans_nodes.end()) {
|
||||
auto shape = in_tensor->shape();
|
||||
data = new (std::nothrow) hiai::op::Data(tensor_name);
|
||||
if (data == nullptr) {
|
||||
MS_LOG(ERROR) << "New data failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ge::TensorDesc tensor_desc(lite::ConverterToNPUShape({shape[0], shape[3], shape[1], shape[2]}),
|
||||
ge::FORMAT_NCHW, lite::ConverterToNPUDataType(in_tensor->data_type()));
|
||||
data->update_input_desc_x(tensor_desc);
|
||||
inputs_nhwc2nchw_.push_back(true);
|
||||
} else {
|
||||
data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name);
|
||||
inputs_nhwc2nchw_.push_back(false);
|
||||
}
|
||||
data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name);
|
||||
subgraph_input_op_.push_back(*data);
|
||||
node_input_op.push_back(data);
|
||||
continue;
|
||||
|
@ -132,7 +112,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
|
|||
|
||||
// weight tensor
|
||||
if (is_weight_tensor) {
|
||||
if (trans_nodes.find(node->Type()) == trans_nodes.end()) {
|
||||
if (lite::npu_trans_nodes.find(node->Type()) == lite::npu_trans_nodes.end()) {
|
||||
auto name = node->name() + "_" + std::to_string(count++);
|
||||
auto weight_const = new (std::nothrow) hiai::op::Const(node->name() + "_" + std::to_string(count++));
|
||||
if (weight_const == nullptr) {
|
||||
|
@ -162,11 +142,6 @@ std::vector<ge::Operator> SubGraphNpuKernel::GetNPUNodes(const vector<kernel::Li
|
|||
ops.reserve(nodes.size());
|
||||
for (int i = 0; i < nodes.size(); i++) {
|
||||
ops.push_back(*reinterpret_cast<NPUKernel *>(nodes[i])->GetNPUOp());
|
||||
if (trans_nodes.find(schema::PrimitiveType(nodes[i]->GetPrimitive()->Type())) != trans_nodes.end()) {
|
||||
outputs_nchw2nhwc_.push_back(true);
|
||||
} else {
|
||||
outputs_nchw2nhwc_.push_back(false);
|
||||
}
|
||||
}
|
||||
return ops;
|
||||
}
|
||||
|
|
|
@ -69,10 +69,6 @@ class SubGraphNpuKernel : public SubGraphKernel {
|
|||
std::string GetOMModelName();
|
||||
|
||||
private:
|
||||
std::vector<bool> inputs_nhwc2nchw_;
|
||||
|
||||
std::vector<bool> outputs_nchw2nhwc_;
|
||||
|
||||
domi::ModelBufferData *model_buffer_data_;
|
||||
|
||||
std::vector<ge::Operator> subgraph_input_op_;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "src/runtime/kernel/npu/convolution_base_npu.h"
|
||||
#include "src/runtime/agent/npu/npu_converter_utils.h"
|
||||
#include "nnacl/pack.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
ConvolutionBaseNPUKernel::~ConvolutionBaseNPUKernel() {
|
||||
|
@ -39,14 +40,27 @@ int ConvolutionBaseNPUKernel::InitWeightBiasConst(const std::vector<lite::Tensor
|
|||
MS_LOG(ERROR) << "New weight const failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto weight_shape = inputs[1]->shape();
|
||||
inputs[1]->set_shape({weight_shape[0], weight_shape[3], weight_shape[1], weight_shape[2]});
|
||||
inputs[1]->set_format(schema::Format_NCHW);
|
||||
auto weight_tensor = mindspore::lite::ConverterToNPUTensor(inputs[1]);
|
||||
weight_->set_attr_value(weight_tensor);
|
||||
auto w_shape = inputs[1]->shape();
|
||||
auto nhwc_data = inputs[1]->data_c();
|
||||
auto nchw_data = reinterpret_cast<float *>(malloc(inputs[1]->ElementsNum() * sizeof(float)));
|
||||
if (nchw_data == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc buffer failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
PackNHWCToNCHWFp32(nhwc_data, nchw_data, w_shape[0], w_shape[1] * w_shape[2], w_shape[3]);
|
||||
|
||||
inputs[1]->set_shape(weight_shape);
|
||||
inputs[1]->set_format(schema::Format_NHWC);
|
||||
std::shared_ptr<ge::Tensor> weight_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
|
||||
if (weight_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "new weight_tensor failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ge::TensorDesc tensor_desc(lite::ConverterToNPUShape({w_shape[0], w_shape[3], w_shape[1], w_shape[2]}),
|
||||
ge::FORMAT_NCHW, lite::ConverterToNPUDataType(inputs[1]->data_type()));
|
||||
weight_tensor->SetTensorDesc(tensor_desc);
|
||||
weight_tensor->SetData(reinterpret_cast<const uint8_t *>(nchw_data), inputs[1]->Size());
|
||||
|
||||
weight_->set_attr_value(weight_tensor);
|
||||
free(nchw_data);
|
||||
|
||||
if (inputs.size() >= 3) {
|
||||
bias_ = new (std::nothrow) hiai::op::Const(name_ + "_b");
|
||||
|
|
|
@ -17,17 +17,18 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CONVOLUTION_BASE_NPU_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "include/graph/op/all_ops.h"
|
||||
#include "src/runtime/kernel/npu/transpose_base_npu.h"
|
||||
#include "nnacl/conv_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ConvolutionBaseNPUKernel : public TransposeBaseNPUKernel {
|
||||
class ConvolutionBaseNPUKernel : public NPUKernel {
|
||||
public:
|
||||
ConvolutionBaseNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: TransposeBaseNPUKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
~ConvolutionBaseNPUKernel() override;
|
||||
|
||||
protected:
|
||||
|
|
|
@ -25,7 +25,7 @@ using mindspore::schema::PrimitiveType_DepthwiseConv2D;
|
|||
namespace mindspore::kernel {
|
||||
int ConvolutionDepthwiseNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) {
|
||||
return RET_ERROR;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionDepthwiseNPUKernel::SetConvDwParam() {
|
||||
|
@ -49,19 +49,13 @@ int ConvolutionDepthwiseNPUKernel::SetConvDwParam() {
|
|||
int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs,
|
||||
const std::vector<ge::Operator *> &npu_inputs) {
|
||||
auto ret = SetPreTranspose(npu_inputs[0]);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
// set conv attr param
|
||||
conv_dw_ = new (std::nothrow) hiai::op::ConvolutionDepthwise(name_ + "_conv_depthwise");
|
||||
if (conv_dw_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New convolution depthwise operator for op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ret = SetConvDwParam();
|
||||
auto ret = SetConvDwParam();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Set npu op parameter for convolution depthwise op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
|
@ -76,7 +70,7 @@ int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *
|
|||
if (inputs.size() == 3) {
|
||||
conv_dw_->set_input_bias(*bias_);
|
||||
}
|
||||
conv_dw_->set_input_x(*pre_trans_);
|
||||
conv_dw_->set_input_x(*npu_inputs[0]);
|
||||
|
||||
if (conv_param_->act_type_ != ActType_No) {
|
||||
ret = SetActivation(conv_dw_, conv_param_->act_type_);
|
||||
|
@ -85,20 +79,16 @@ int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *
|
|||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
if (conv_param_->act_type_ == ActType_No) {
|
||||
ret = SetPostTranspose(conv_dw_);
|
||||
} else {
|
||||
ret = SetPostTranspose(act_);
|
||||
}
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
ge::Operator *mindspore::kernel::ConvolutionDepthwiseNPUKernel::GetNPUOp() { return post_trans_; }
|
||||
ge::Operator *mindspore::kernel::ConvolutionDepthwiseNPUKernel::GetNPUOp() {
|
||||
if (conv_param_->act_type_ == ActType_No) {
|
||||
return conv_dw_;
|
||||
} else {
|
||||
return act_;
|
||||
}
|
||||
}
|
||||
|
||||
ConvolutionDepthwiseNPUKernel::~ConvolutionDepthwiseNPUKernel() {
|
||||
if (conv_dw_ != nullptr) {
|
||||
|
|
|
@ -24,7 +24,7 @@ using mindspore::schema::PrimitiveType_Conv2D;
|
|||
namespace mindspore::kernel {
|
||||
int ConvolutionNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) {
|
||||
return RET_ERROR;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionNPUKernel::SetConvParam() {
|
||||
|
@ -49,19 +49,13 @@ int ConvolutionNPUKernel::SetConvParam() {
|
|||
int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs,
|
||||
const std::vector<ge::Operator *> &npu_inputs) {
|
||||
auto ret = SetPreTranspose(npu_inputs[0]);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
// set conv attr param
|
||||
conv_ = new (std::nothrow) hiai::op::Convolution(name_ + "_conv");
|
||||
if (conv_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New convolution operator for convolution op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ret = SetConvParam();
|
||||
auto ret = SetConvParam();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Set npu op parameter for convolution op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
|
@ -76,7 +70,7 @@ int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs
|
|||
if (inputs.size() == 3) {
|
||||
conv_->set_input_bias(*bias_);
|
||||
}
|
||||
conv_->set_input_x(*pre_trans_);
|
||||
conv_->set_input_x(*npu_inputs[0]);
|
||||
|
||||
if (conv_param_->act_type_ != ActType_No) {
|
||||
ret = SetActivation(conv_, conv_param_->act_type_);
|
||||
|
@ -85,20 +79,16 @@ int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs
|
|||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
if (conv_param_->act_type_ == ActType_No) {
|
||||
ret = SetPostTranspose(conv_);
|
||||
} else {
|
||||
ret = SetPostTranspose(act_);
|
||||
}
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
ge::Operator *mindspore::kernel::ConvolutionNPUKernel::GetNPUOp() { return post_trans_; }
|
||||
ge::Operator *mindspore::kernel::ConvolutionNPUKernel::GetNPUOp() {
|
||||
if (conv_param_->act_type_ == ActType_No) {
|
||||
return conv_;
|
||||
} else {
|
||||
return act_;
|
||||
}
|
||||
}
|
||||
|
||||
ConvolutionNPUKernel::~ConvolutionNPUKernel() {
|
||||
if (conv_ != nullptr) {
|
||||
|
|
|
@ -62,23 +62,17 @@ int PoolingNPUKernel::SetPoolingParam() {
|
|||
int PoolingNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs,
|
||||
const std::vector<ge::Operator *> &npu_inputs) {
|
||||
auto ret = SetPreTranspose(npu_inputs[0]);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
pooling_ = new (std::nothrow) hiai::op::PoolingD(name_ + "_pooling");
|
||||
if (pooling_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New pooling npu operator for op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ret = SetPoolingParam();
|
||||
auto ret = SetPoolingParam();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Set npu op parameter for convolution op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
pooling_->set_input_x(*pre_trans_);
|
||||
pooling_->set_input_x(*npu_inputs[0]);
|
||||
|
||||
if (pooling_param_->act_type_ != ActType_No) {
|
||||
ret = SetActivation(pooling_, pooling_param_->act_type_);
|
||||
|
@ -87,20 +81,16 @@ int PoolingNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
|
|||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
if (pooling_param_->act_type_ == ActType_No) {
|
||||
ret = SetPostTranspose(pooling_);
|
||||
} else {
|
||||
ret = SetPostTranspose(act_);
|
||||
}
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
ge::Operator *mindspore::kernel::PoolingNPUKernel::GetNPUOp() { return post_trans_; }
|
||||
ge::Operator *mindspore::kernel::PoolingNPUKernel::GetNPUOp() {
|
||||
if (pooling_param_->act_type_ == ActType_No) {
|
||||
return pooling_;
|
||||
} else {
|
||||
return act_;
|
||||
}
|
||||
}
|
||||
|
||||
PoolingNPUKernel::~PoolingNPUKernel() {
|
||||
if (pooling_ != nullptr) {
|
||||
|
|
|
@ -36,12 +36,6 @@ int ResizeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const
|
|||
|
||||
int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
|
||||
const std::vector<ge::Operator *> &npu_inputs) {
|
||||
auto ret = SetPreTranspose(npu_inputs[0]);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ge::TensorDesc sizeTensorDesc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_INT32);
|
||||
ge::TensorPtr sizeTensor = std::make_shared<hiai::Tensor>(sizeTensorDesc);
|
||||
vector<int32_t> dataValue = {static_cast<int32_t>(new_height_), static_cast<int32_t>(new_width_)};
|
||||
|
@ -55,7 +49,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
|
|||
return RET_ERROR;
|
||||
}
|
||||
op->set_attr_align_corners(align_corners_);
|
||||
op->set_input_x(*pre_trans_);
|
||||
op->set_input_x(*npu_inputs[0]);
|
||||
op->set_input_size(*out_size);
|
||||
op->set_attr_half_pixel_centers(preserve_aspect_ratio_);
|
||||
op_ = op;
|
||||
|
@ -66,21 +60,14 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
|
|||
return RET_ERROR;
|
||||
}
|
||||
op->set_attr_align_corners(align_corners_);
|
||||
op->set_input_x(*pre_trans_);
|
||||
op->set_input_x(*npu_inputs[0]);
|
||||
op->set_input_size(*out_size);
|
||||
op_ = op;
|
||||
}
|
||||
|
||||
ret = SetPostTranspose(op_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
ge::Operator *mindspore::kernel::ResizeNPUKernel::GetNPUOp() { return this->post_trans_; }
|
||||
ge::Operator *mindspore::kernel::ResizeNPUKernel::GetNPUOp() { return this->op_; }
|
||||
|
||||
ResizeNPUKernel::~ResizeNPUKernel() {
|
||||
if (op_ != nullptr) {
|
||||
|
|
|
@ -24,12 +24,12 @@
|
|||
#include "include/graph/op/all_ops.h"
|
||||
#include "src/runtime/kernel/npu/transpose_base_npu.h"
|
||||
namespace mindspore::kernel {
|
||||
class ResizeNPUKernel : public TransposeBaseNPUKernel {
|
||||
class ResizeNPUKernel : public NPUKernel {
|
||||
public:
|
||||
ResizeNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: TransposeBaseNPUKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
auto resize_parameter = reinterpret_cast<ResizeParameter *>(parameter);
|
||||
method_ = resize_parameter->method_;
|
||||
new_height_ = resize_parameter->new_height_;
|
||||
|
|
|
@ -33,6 +33,8 @@
|
|||
#if SUPPORT_NPU
|
||||
#include "src/runtime/agent/npu/subgraph_npu_kernel.h"
|
||||
#include "src/runtime/agent/npu/npu_manager.h"
|
||||
#include "src/runtime/agent/npu/npu_transform_pass.h"
|
||||
#include "src/runtime/agent/npu/npu_fusion_pass.h"
|
||||
#endif
|
||||
namespace mindspore::lite {
|
||||
using kernel::KERNEL_ARCH::kCPU;
|
||||
|
@ -63,6 +65,11 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
|||
return ret;
|
||||
}
|
||||
FindAllInoutKernels(*dst_kernels);
|
||||
ret = RunPass(dst_kernels);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Schedule run pass failed.";
|
||||
return ret;
|
||||
}
|
||||
ret = ConstructSubGraphs(dst_kernels);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConstructSubGraphs failed.";
|
||||
|
@ -514,4 +521,25 @@ void Scheduler::FindAllInoutKernels(const std::vector<kernel::LiteKernel *> &ker
|
|||
kernel->FindInoutKernels(kernels);
|
||||
}
|
||||
}
|
||||
|
||||
int Scheduler::RunPass(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
||||
int ret = RET_OK;
|
||||
#if SUPPORT_NPU
|
||||
auto transform_pass = new NPUTransformPass;
|
||||
ret = transform_pass->FormatTransformPass(context_, dst_kernels, &src_tensors_);
|
||||
delete transform_pass;
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Run npu format transform pass failed.";
|
||||
return ret;
|
||||
}
|
||||
auto fusion_pass = new NPUFusionPass(dst_kernels);
|
||||
ret = fusion_pass->Fusion();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Run npu fussion transform pass failed.";
|
||||
return ret;
|
||||
}
|
||||
delete fusion_pass;
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -77,6 +77,8 @@ class Scheduler {
|
|||
|
||||
static kernel::SubGraphType GetKernelSubGraphType(const kernel::LiteKernel *kernel);
|
||||
|
||||
int RunPass(std::vector<kernel::LiteKernel *> *dst_kernels);
|
||||
|
||||
protected:
|
||||
const InnerContext *context_ = nullptr;
|
||||
Model *src_model_ = nullptr;
|
||||
|
|
Loading…
Reference in New Issue