run mobilenet_v2 success

This commit is contained in:
yeyunpeng 2020-12-20 08:34:56 +08:00
parent a3d4dded12
commit d45b5b5126
24 changed files with 852 additions and 305 deletions

View File

@ -95,6 +95,8 @@ class LiteKernel {
virtual int Init() { return mindspore::lite::RET_ERROR; }
OpParameter *op_parameter() { return op_parameter_; }
std::string name() const { return this->name_; }
virtual int Train() {

View File

@ -479,12 +479,6 @@ int LiteSession::Init(const Context *context) {
is_running_.store(false);
return ret;
}
ret = InitNPURuntime();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init NPU runtime failed.";
is_running_.store(false);
return ret;
}
executor_ = new (std::nothrow) Executor();
if (nullptr == executor_) {
MS_LOG(ERROR) << "New Executor failed";
@ -661,18 +655,6 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
return RET_OK;
}
int LiteSession::InitNPURuntime() {
#if SUPPORT_NPU
if (this->context_->IsNpuEnabled()) {
if (mindspore::lite::NPUManager::GetInstance()->InitClient() != RET_OK) {
MS_LOG(ERROR) << "NPU client init error.";
return RET_ERROR;
}
}
#endif
return RET_OK;
}
int LiteSession::InitGPURuntime() {
#if SUPPORT_GPU
if (this->context_->IsGpuEnabled()) {

View File

@ -103,8 +103,6 @@ class LiteSession : public session::LiteSession {
private:
void ResetInputsShape(const std::vector<std::vector<int>> &dims);
int InitNPURuntime();
int InitGPURuntime();
protected:

View File

@ -17,10 +17,9 @@
#include "src/runtime/agent/npu/npu_executor.h"
#include "include/errorcode.h"
#include "src/runtime/agent/npu/npu_manager.h"
#include "nnacl/pack.h"
namespace mindspore::lite {
int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient();
this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(model_name_);
if (this->client_ == nullptr) {
MS_LOG(ERROR) << "client is nullptr.";
return RET_ERROR;
@ -33,9 +32,8 @@ int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
}
int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
const std::vector<kernel::LiteKernel *> &kernels, const std::vector<bool> &inputs_nhwc2nchw,
const std::vector<bool> &outputs_nchw2nhwc, Allocator *allocator, const KernelCallBack &before,
const KernelCallBack &after) {
const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator,
const KernelCallBack &before, const KernelCallBack &after) {
hiai::AiContext context;
for (int i = 0; i < npu_input_tensors_.size(); ++i) {
void *data = in_tensors[i]->data_c();
@ -43,12 +41,7 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<
MS_LOG(ERROR) << model_name_ << " inputs data is nullptr";
return RET_ERROR;
}
if (inputs_nhwc2nchw[i]) {
PackNHWCToNCHWFp32(data, npu_input_tensors_[i]->GetBuffer(), in_tensors[i]->Batch(),
in_tensors[i]->Width() * in_tensors[i]->Height(), in_tensors[i]->Channel());
} else {
memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[i]->Size());
}
memcpy(npu_input_tensors_[i]->GetBuffer(), data, in_tensors[i]->Size());
}
context.AddPara("model_name", model_name_);
if (this->client_ == nullptr) {
@ -68,12 +61,7 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}
if (outputs_nchw2nhwc[i]) {
PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data, out_tensors[i]->Batch(),
out_tensors[i]->Width() * out_tensors[i]->Height(), out_tensors[i]->Channel());
} else {
memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
}
memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
out_tensors[i]->ResetRefCount();
}
return RET_OK;

View File

@ -32,8 +32,7 @@ class NPUExecutor : public Executor {
int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override;
int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
const std::vector<kernel::LiteKernel *> &kernels, const std::vector<bool> &inputs_nhwc2nchw,
const std::vector<bool> &outputs_nchw2nhwc, Allocator *allocator = nullptr,
const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator = nullptr,
const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr);
private:

View File

@ -0,0 +1,224 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/agent/npu/npu_fusion_pass.h"
#include <vector>
#include "src/lite_kernel.h"
#include "nnacl/concat_parameter.h"
namespace mindspore::lite {
bool CheckFusion(kernel::LiteKernel *kernel) {
auto pre_flag =
std::all_of(kernel->in_kernels().begin(), kernel->in_kernels().end(), [](const kernel::LiteKernel *kernel) {
return kernel->Type() == schema::PrimitiveType_Nchw2Nhwc && kernel->out_kernels().size() == 1;
});
if (!pre_flag) {
return false;
}
auto post_flag =
std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) {
return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw && kernel->in_kernels().size() == 1;
});
return post_flag;
}
void NPUFusionPass::UpdatePreKernels(kernel::LiteKernel *cur_kernel) {
for (auto in_kernel : cur_kernel->in_kernels()) {
auto pre_kernel = in_kernel->in_kernels()[0];
auto pre_out_kernels = pre_kernel->out_kernels();
for (size_t i = 0; i < pre_out_kernels.size(); i++) {
if (pre_out_kernels[i] == in_kernel) {
pre_out_kernels[i] = cur_kernel;
break;
}
}
pre_kernel->set_out_kernels(pre_out_kernels);
auto cur_in_kernels = cur_kernel->in_kernels();
for (size_t i = 0; i < cur_in_kernels.size(); i++) {
if (cur_in_kernels[i] == in_kernel) {
cur_in_kernels[i] = pre_kernel;
break;
}
}
cur_kernel->set_in_kernels(cur_in_kernels);
kernels->erase(find(kernels->begin(), kernels->end(), in_kernel));
}
}
void NPUFusionPass::UpdatePostKernels(kernel::LiteKernel *cur_kernel) {
for (auto out_kernel : cur_kernel->out_kernels()) {
auto post_kernel = out_kernel->out_kernels()[0];
auto post_in_kernels = post_kernel->in_kernels();
for (size_t i = 0; i < post_in_kernels.size(); i++) {
if (post_in_kernels[i] == out_kernel) {
post_in_kernels[i] = cur_kernel;
break;
}
}
post_kernel->set_in_kernels(post_in_kernels);
auto cur_out_kernels = cur_kernel->out_kernels();
for (size_t i = 0; i < cur_out_kernels.size(); i++) {
if (cur_out_kernels[i] == out_kernel) {
cur_out_kernels[i] = post_kernel;
break;
}
}
cur_kernel->set_out_kernels(cur_out_kernels);
kernels->erase(find(kernels->begin(), kernels->end(), out_kernel));
}
}
void UpdatePreTensors(kernel::LiteKernel *cur_kernel) {
auto tensors_vec = cur_kernel->in_tensors();
for (auto in_kernel : cur_kernel->in_kernels()) {
lite::Tensor *cur_tensor = nullptr;
auto in_tensor = in_kernel->in_tensors()[0];
auto out_tensor = in_kernel->out_tensors()[0];
auto pre_kernel = in_kernel->in_kernels()[0];
for (size_t i = 0; i < pre_kernel->out_tensors().size(); i++) {
if (pre_kernel->out_tensors()[i] == in_tensor) {
cur_tensor = pre_kernel->out_tensors()[i];
}
}
for (size_t i = 0; i < tensors_vec.size(); i++) {
if (tensors_vec[i] == out_tensor) {
tensors_vec[i] = cur_tensor;
}
}
}
cur_kernel->set_in_tensors(tensors_vec);
}
void UpdatePostTensors(kernel::LiteKernel *cur_kernel) {
auto tensors_vec = cur_kernel->out_tensors();
for (auto out_kernel : cur_kernel->out_kernels()) {
auto in_tensor = out_kernel->in_tensors()[0];
auto out_tensor = out_kernel->out_tensors()[0];
auto post_kernel = out_kernel->out_kernels()[0];
lite::Tensor *cur_tensor = nullptr;
for (size_t i = 0; i < post_kernel->in_tensors().size(); i++) {
if (post_kernel->in_tensors()[i] == out_tensor) {
cur_tensor = post_kernel->in_tensors()[i];
}
}
for (size_t i = 0; i < tensors_vec.size(); i++) {
if (tensors_vec[i] == in_tensor) {
tensors_vec[i] = cur_tensor;
}
}
}
cur_kernel->set_out_tensors(tensors_vec);
}
int TransFormAxis(int axis) {
switch (axis) {
case 0:
return 0;
case 1:
return 2;
case 2:
return 3;
case 3:
case -1:
return 1;
default:
return -2;
}
}
int NPUFusionPass::AddFusion(kernel::LiteKernel *kernel) {
if (!CheckFusion(kernel)) {
return RET_OK;
}
UpdatePreTensors(kernel);
UpdatePostTensors(kernel);
UpdatePreKernels(kernel);
UpdatePostKernels(kernel);
return RET_OK;
}
int NPUFusionPass::ConcatFusion(kernel::LiteKernel *kernel) {
if (!CheckFusion(kernel)) {
return RET_OK;
}
UpdatePreTensors(kernel);
UpdatePostTensors(kernel);
UpdatePreKernels(kernel);
UpdatePostKernels(kernel);
auto concat_param = reinterpret_cast<ConcatParameter *>(kernel->op_parameter());
concat_param->axis_ = TransFormAxis(concat_param->axis_);
return RET_OK;
}
int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) {
if (kernel->out_kernels().empty()) {
return RET_OK;
}
if (!std::all_of(kernel->out_kernels().begin(), kernel->out_kernels().end(), [](const kernel::LiteKernel *kernel) {
return kernel->Type() == schema::PrimitiveType_Nhwc2Nchw;
})) {
return RET_OK;
}
auto pre_kernel = kernel->in_kernels()[0];
auto pre_out_kernels = pre_kernel->out_kernels();
for (size_t i = 0; i < pre_out_kernels.size(); i++) {
if (pre_out_kernels[i] == kernel) {
pre_out_kernels.erase(pre_out_kernels.begin() + i);
break;
}
}
for (const auto &nc2nh : kernel->out_kernels()) {
for (const auto &post_kernel : nc2nh->out_kernels()) {
auto post_in_kernels = post_kernel->in_kernels();
for (size_t i = 0; i < post_in_kernels.size(); i++) {
if (post_in_kernels[i] == nc2nh) {
post_in_kernels[i] = pre_kernel;
break;
}
}
post_kernel->set_in_kernels(post_in_kernels);
pre_out_kernels.push_back(post_kernel);
}
kernels->erase(find(kernels->begin(), kernels->end(), nc2nh));
}
pre_kernel->set_out_kernels(pre_out_kernels);
kernels->erase(find(kernels->begin(), kernels->end(), kernel));
return RET_OK;
}
int NPUFusionPass::Fusion() {
for (auto kernel : *kernels) {
switch (kernel->Type()) {
case schema::PrimitiveType_Concat:
ConcatFusion(kernel);
continue;
case schema::PrimitiveType_Add:
AddFusion(kernel);
continue;
case schema::PrimitiveType_Nchw2Nhwc:
FormatFusion(kernel);
continue;
default:
continue;
}
}
return RET_OK;
}
} // namespace mindspore::lite

View File

@ -0,0 +1,40 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/ops/primitive_c.h"
namespace mindspore::lite {
class NPUFusionPass {
public:
explicit NPUFusionPass(std::vector<kernel::LiteKernel *> *dst_kernels) { kernels = dst_kernels; }
~NPUFusionPass() = default;
int Fusion();
protected:
int ConcatFusion(kernel::LiteKernel *kernel);
int AddFusion(kernel::LiteKernel *kernel);
int FormatFusion(kernel::LiteKernel *kernel);
void UpdatePreKernels(kernel::LiteKernel *kernel);
void UpdatePostKernels(kernel::LiteKernel *kernel);
private:
std::vector<kernel::LiteKernel *> *kernels;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_

View File

@ -15,21 +15,59 @@
*/
#include "src/runtime/agent/npu/npu_manager.h"
#include <sys/system_properties.h>
#include <sys/fcntl.h>
#include <unistd.h>
#include "include/hiai_ir_build.h"
#include "include/HiAiModelManagerService.h"
#include "include/errorcode.h"
#include "include/graph/op/all_ops.h"
#include "src/common/file_utils.h"
namespace mindspore::lite {
bool NPUManager::IsSupportNPU() {
if (!is_npu_check_executor) {
CheckSupportNPU();
#define MAX_MODEL_NUM 20
int NPUManager::CompareVersion(const string &version1, const string &version2) {
std::istringstream iss1(version1);
std::istringstream iss2(version2);
string string1;
string string2;
while (!iss1.eof() || !iss2.eof()) {
getline(iss1, string1, '.');
getline(iss2, string2, '.');
if (stoi(string1) > stoi(string2)) return 1;
if (stoi(string1) < stoi(string2)) return -1;
string1 = string2 = "0";
}
if (is_support_npu) {
return 0;
}
bool NPUManager::CheckEMUIVersion() {
char emui[128] = {0x00};
__system_property_get("ro.build.version.emui", emui);
std::string emui_str = emui;
int pos = emui_str.find('_');
if (pos != std::string::npos) {
auto version = emui_str.substr(pos + 1);
int ret = CompareVersion(version, "11.0.0");
if (ret < 0) {
return false;
}
}
return true;
}
bool NPUManager::CheckDDKVersion() {
auto client = std::make_shared<hiai::AiModelMngerClient>();
if (client->GetVersion() != nullptr) {
std::string version = client->GetVersion();
int ret = CompareVersion(version, "100.330.010.011");
if (ret < 0) {
return false;
}
}
return true;
}
bool NPUManager::IsSupportNPU() {
if (IsKirinChip() && CheckEMUIVersion() && CheckDDKVersion()) {
MS_LOG(INFO) << "The current device support NPU.";
return true;
} else {
@ -38,36 +76,6 @@ bool NPUManager::IsSupportNPU() {
}
}
std::string NPUManager::GetExecutorPath() {
std::string executor_path;
char cmdline[1024] = {0};
int fd = open("/proc/self/cmdline", O_RDONLY);
if (fd >= 0) {
char ch;
int i = 0;
while (read(fd, &ch, sizeof(ch)) > 0 && !isspace(ch)) {
if (':' == ch) {
break;
}
cmdline[i] = ch;
i++;
}
close(fd);
}
executor_path = std::string(cmdline);
if (executor_path.empty()) {
executor_path = "./";
}
// android
if (executor_path.substr(0, 11) == "/data/data/") {
executor_path = executor_path + '/';
} else {
// Linux
executor_path = executor_path.substr(0, executor_path.rfind('/')) + "/";
}
return executor_path;
}
bool NPUManager::IsKirinChip() {
std::ifstream cpu_info("/proc/cpuinfo");
if (!(cpu_info.good() && cpu_info.is_open())) {
@ -96,86 +104,6 @@ bool NPUManager::IsKirinChip() {
return false;
}
bool WriteToOMFile(domi::ModelBufferData om_model_buff, const std::string &om_file_path) {
FILE *fp;
fp = fopen(om_file_path.c_str(), "wb");
if (fp == nullptr) {
MS_LOG(ERROR) << om_file_path.c_str() << " open failed.";
return false;
}
auto write_size = (uint32_t)fwrite(om_model_buff.data, 1, om_model_buff.length, fp);
if (write_size != om_model_buff.length) {
fclose(fp);
MS_LOG(ERROR) << "Write om file failed.";
return false;
}
fclose(fp);
return true;
}
bool NPUManager::CheckOmBuildIr(const std::string &path) {
// build test om model
std::shared_ptr<hiai::op::Add> add_op(new (std::nothrow) hiai::op::Add("add"));
if (add_op == nullptr) {
MS_LOG(ERROR) << "new add_op failed.";
return false;
}
ge::TensorDesc desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT);
std::shared_ptr<hiai::op::Data> data = std::make_shared<hiai::op::Data>("data");
data->update_input_desc_x(desc);
add_op->set_input_x1(*data);
add_op->set_input_x2(*data);
domi::HiaiIrBuild ir_build;
ge::Graph ir_graph("graph");
std::vector<ge::Operator> inputs{*data, *data};
std::vector<ge::Operator> outputs{*add_op};
ir_graph.SetInputs(inputs).SetOutputs(outputs);
ge::Model om_model("test_model", "test_version");
om_model.SetGraph(ir_graph);
domi::ModelBufferData om_model_buff;
if (!ir_build.CreateModelBuff(om_model, om_model_buff)) {
MS_LOG(ERROR) << "Create model buffer failed.";
return false;
}
if (!ir_build.BuildIRModel(om_model, om_model_buff)) {
MS_LOG(ERROR) << "Build IR model failed.";
return false;
}
// save test om model
remove(path.c_str());
bool ret = WriteToOMFile(om_model_buff, path);
ir_build.ReleaseModelBuff(om_model_buff);
return ret;
}
void NPUManager::CheckSupportNPU() {
is_npu_check_executor = true;
std::string path_string = GetExecutorPath();
std::string test_model_path = path_string + "/mindspore_lite_test_npu.om";
std::ifstream ifs(test_model_path);
if (ifs.good() && ifs.is_open()) {
ifs.close();
is_support_npu = true;
return;
}
if (!IsKirinChip()) {
MS_LOG(ERROR) << "The current device chip NOT SUPPORT NPU";
is_support_npu = false;
return;
}
if (!CheckOmBuildIr(test_model_path)) {
MS_LOG(ERROR) << "Build OM IR error.";
is_support_npu = false;
return;
}
is_support_npu = true;
}
int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency) {
hiai::MemBuffer *buffer = mc_builder_->InputMemBufferCreate(model_buf, size);
if (buffer == nullptr) {
@ -188,33 +116,42 @@ int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &mode
model_desc_.push_back(desc);
mc_builder_->MemBufferDestroy(buffer);
model_map_.insert({model_name, index_});
index_++;
return RET_OK;
}
int NPUManager::InitClient() {
this->client_ = std::make_shared<hiai::AiModelMngerClient>();
if (this->client_ == nullptr) {
return RET_ERROR;
}
int ret = this->client_->Init(nullptr);
if (ret != hiai::AI_SUCCESS) {
return RET_ERROR;
}
mc_builder_ = std::make_shared<hiai::AiModelBuilder>(this->client_);
return RET_OK;
}
int NPUManager::LoadOMModel() {
int ret = this->client_->Load(model_desc_);
if (ret != hiai::AI_SUCCESS) {
MS_LOG(ERROR) << "Client load model failed." << ret;
return RET_ERROR;
for (int i = 0; i < index_ / MAX_MODEL_NUM + 1; i++) {
auto client = std::make_shared<hiai::AiModelMngerClient>();
if (client == nullptr) {
MS_LOG(ERROR) << "NPU client is nullptr.";
return RET_ERROR;
}
int ret = client->Init(nullptr);
if (ret != hiai::AI_SUCCESS) {
MS_LOG(ERROR) << "NPU client init failed. code is " << ret;
return RET_ERROR;
}
mc_builder_ = std::make_shared<hiai::AiModelBuilder>(client);
vector<std::shared_ptr<hiai::AiModelDescription>> desc(model_desc_.begin() + i * MAX_MODEL_NUM,
((i + 1) * MAX_MODEL_NUM > index_)
? model_desc_.begin() + index_
: model_desc_.begin() + (i + 1) * MAX_MODEL_NUM);
ret = client->Load(desc);
if (ret != hiai::AI_SUCCESS) {
MS_LOG(ERROR) << "Client load model failed." << ret;
return RET_ERROR;
}
clients_.push_back(client);
}
return RET_OK;
}
std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient() { return client_; }
std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient(const std::string &model_name) {
return clients_[model_map_[model_name] / MAX_MODEL_NUM];
}
int NPUManager::index() { return index_; }
int NPUManager::index() const { return index_; }
} // namespace mindspore::lite

View File

@ -14,15 +14,21 @@
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_
#include <string>
#include <memory>
#include <vector>
#include <unordered_map>
#include <set>
#include "schema/model_generated.h"
#include "include/HiAiModelManagerService.h"
namespace mindspore::lite {
static std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = {
schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D,
schema::PrimitiveType_DepthwiseConv2D, schema::PrimitiveType_DeDepthwiseConv2D,
schema::PrimitiveType_Resize, schema::PrimitiveType_Pooling};
class NPUManager {
public:
static NPUManager *GetInstance() {
@ -32,8 +38,6 @@ class NPUManager {
bool IsSupportNPU();
int InitClient();
// provide to subgraph to add model.
int AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency);
@ -41,18 +45,18 @@ class NPUManager {
int LoadOMModel();
// provide to executor.
std::shared_ptr<hiai::AiModelMngerClient> GetClient();
std::shared_ptr<hiai::AiModelMngerClient> GetClient(const std::string &model_name);
int index();
int index() const;
private:
void CheckSupportNPU();
bool IsKirinChip();
bool CheckOmBuildIr(const std::string &path);
bool CheckEMUIVersion();
std::string GetExecutorPath();
bool CheckDDKVersion();
int CompareVersion(const std::string &version1, const std::string &version2);
private:
int index_ = 0;
@ -61,12 +65,14 @@ class NPUManager {
bool is_support_npu = false;
std::shared_ptr<hiai::AiModelMngerClient> client_ = nullptr;
std::vector<std::shared_ptr<hiai::AiModelMngerClient>> clients_;
std::vector<std::shared_ptr<hiai::AiModelDescription>> model_desc_;
std::shared_ptr<hiai::AiModelBuilder> mc_builder_ = nullptr;
std::unordered_map<std::string, int> model_map_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_

View File

@ -0,0 +1,102 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/kernel_registry.h"
#include "src/ops/nhwc2nchw.h"
#include "src/ops/nchw2nhwc.h"
#include "src/runtime/agent/npu/npu_pass_utils.h"
namespace mindspore::lite {
using kernel::KERNEL_ARCH::kCPU;
using kernel::KERNEL_ARCH::kNPU;
PrimitiveC *NPUPassUtils::CreateNchw2NhwcPrimitive() {
flatbuffers::FlatBufferBuilder fbb(1024);
auto val_offset = schema::CreateNchw2Nhwc(fbb);
auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nchw2Nhwc, val_offset.o);
fbb.Finish(prim_offset);
auto buf = fbb.GetBufferPointer();
if (buf == nullptr) {
MS_LOG(ERROR) << "GetBufferPointer return nullptr";
fbb.Clear();
return nullptr;
}
auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize()));
if (primitive_buf == nullptr) {
MS_LOG(ERROR) << "Malloc primitive_buf_ failed.";
fbb.Clear();
return nullptr;
}
memcpy(primitive_buf, buf, fbb.GetSize());
auto *primitive = PrimitiveC::NewPrimitiveC<Nchw2Nhwc>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf));
free(primitive_buf);
fbb.Clear();
return primitive;
}
PrimitiveC *NPUPassUtils::CreateNhwc2NchwPrimitive() {
flatbuffers::FlatBufferBuilder fbb(1024);
auto val_offset = schema::CreateNhwc2Nchw(fbb);
auto prim_offset = schema::CreatePrimitive(fbb, schema::PrimitiveType_Nhwc2Nchw, val_offset.o);
fbb.Finish(prim_offset);
auto buf = fbb.GetBufferPointer();
if (buf == nullptr) {
MS_LOG(ERROR) << "GetBufferPointer return nullptr";
fbb.Clear();
return nullptr;
}
auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize()));
if (primitive_buf == nullptr) {
MS_LOG(ERROR) << "Malloc primitive_buf_ failed.";
fbb.Clear();
return nullptr;
}
memcpy(primitive_buf, buf, fbb.GetSize());
auto *primitive = PrimitiveC::NewPrimitiveC<Nhwc2Nchw>(flatbuffers::GetRoot<schema::Primitive>(primitive_buf));
free(primitive_buf);
fbb.Clear();
return primitive;
}
kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors,
const std::vector<Tensor *> &out_tensors,
const InnerContext *ctx, const std::string &name) {
kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nchw2Nhwc};
auto nchw2nhwc_primitive = CreateNchw2NhwcPrimitive();
auto *nchw2nhwc_kernel =
KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nchw2nhwc_primitive, ctx, key);
nchw2nhwc_kernel->set_name(name);
return nchw2nhwc_kernel;
}
kernel::LiteKernel *NPUPassUtils::CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors,
const std::vector<Tensor *> &out_tensors,
const InnerContext *ctx, const std::string &name) {
kernel::KernelKey key{kCPU, kNumberTypeFloat32, schema::PrimitiveType_Nhwc2Nchw};
auto nhwc2nchw_primitive = CreateNhwc2NchwPrimitive();
auto *nhwc2nchw_kernel =
KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, nhwc2nchw_primitive, ctx, key);
nhwc2nchw_kernel->set_name(name);
return nhwc2nchw_kernel;
}
void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels,
const std::vector<kernel::LiteKernel *> &out_kernels,
const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors) {
kernel->set_in_tensors(in_tensors);
kernel->set_out_tensors(out_tensors);
kernel->set_in_kernels(in_kernels);
kernel->set_out_kernels(out_kernels);
}
} // namespace mindspore::lite

View File

@ -0,0 +1,44 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
#include <vector>
#include <string>
#include "src/ops/primitive_c.h"
#include "src/lite_kernel.h"
namespace mindspore::lite {
class NPUPassUtils {
public:
static kernel::LiteKernel *CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors,
const std::vector<Tensor *> &out_tensors, const InnerContext *ctx,
const std::string &name);
static kernel::LiteKernel *CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors,
const std::vector<Tensor *> &out_tensors, const InnerContext *ctx,
const std::string &name);
static void UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels,
const std::vector<kernel::LiteKernel *> &out_kernels,
const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors);
private:
static PrimitiveC *CreateNchw2NhwcPrimitive();
static PrimitiveC *CreateNhwc2NchwPrimitive();
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_

View File

@ -0,0 +1,201 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/agent/npu/npu_transform_pass.h"
#include <vector>
#include "src/lite_kernel.h"
#include "src/runtime/agent/npu/npu_manager.h"
#include "src/runtime/agent/npu/npu_pass_utils.h"
namespace mindspore::lite {
using kernel::KERNEL_ARCH::kCPU;
using kernel::KERNEL_ARCH::kNPU;
int NPUTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *after_kernel) {
std::vector<kernel::LiteKernel *> out_kernels;
for (auto out_kernel : kernel->out_kernels()) {
if (out_kernel == after_kernel) {
out_kernels.push_back(trans_kernel);
} else {
out_kernels.push_back(out_kernel);
}
}
NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors());
return RET_OK;
}
int NPUTransformPass::UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *before_kernel) {
std::vector<lite::Tensor *> cur_kernel_in_tensors = {trans_kernel->out_tensors()[0]};
for (int i = 1; i < kernel->in_tensors().size(); i++) {
cur_kernel_in_tensors.push_back(kernel->in_tensors()[i]);
}
std::vector<kernel::LiteKernel *> cur_in_kernels = {trans_kernel};
for (int i = 0; i < kernel->in_kernels().size(); i++) {
auto in_kernel = kernel->in_kernels()[i];
if (in_kernel != kernel) {
cur_in_kernels.push_back(in_kernel);
}
}
NPUPassUtils::UpdateKernel(kernel, cur_in_kernels, kernel->out_kernels(), cur_kernel_in_tensors,
kernel->out_tensors());
return RET_OK;
}
int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it,
std::vector<kernel::LiteKernel *> *all_kernels,
std::vector<Tensor *> *all_tensors) {
auto kernel = *it;
bool is_input_kernel = kernel->in_kernels().empty();
if (is_input_kernel || kernel->in_kernels()[0]->desc().arch != kNPU ||
npu_trans_nodes.find(kernel->in_kernels()[0]->Type()) == npu_trans_nodes.end()) {
kernel::LiteKernel *before_kernel = nullptr;
if (!is_input_kernel) {
before_kernel = kernel->in_kernels()[0];
}
// Create pre transform kernel out tensors.
std::vector<int> shapes{kernel->in_tensors()[0]->shape()[0], kernel->in_tensors()[0]->shape()[3],
kernel->in_tensors()[0]->shape()[1], kernel->in_tensors()[0]->shape()[2]};
auto tensor = new Tensor(kernel->in_tensors()[0]->data_type(), shapes, schema::Format_NCHW, Tensor::VAR);
std::vector<Tensor *> pre_trans_out_tensors = {tensor};
all_tensors->push_back(pre_trans_out_tensors[0]);
// Replace the output tensor of the previous node
auto name = kernel->name() + "_pre_trans" + "_Nhwc2Nchw_" + std::to_string(total++);
auto *pre_trans_kernel =
NPUPassUtils::CreateNhwc2NchwKernel({kernel->in_tensors()[0]}, pre_trans_out_tensors, context, name);
// Insert Nhwc2Nchw into the front of the current queue
all_kernels->push_back(pre_trans_kernel);
// Replace the output kernel of the previous node
std::vector<kernel::LiteKernel *> pre_trans_in_kernel;
if (is_input_kernel) {
pre_trans_in_kernel = {};
} else {
pre_trans_in_kernel = {before_kernel};
}
NPUPassUtils::UpdateKernel(pre_trans_kernel, pre_trans_in_kernel, {kernel}, {kernel->in_tensors()[0]},
pre_trans_out_tensors);
if (before_kernel != nullptr) {
UpdateNH2NCTransNodePreKernel(before_kernel, pre_trans_kernel, kernel);
}
UpdateNH2NCTransNodeAfterKernel(kernel, pre_trans_kernel, before_kernel);
}
return RET_OK;
}
int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it,
std::vector<kernel::LiteKernel *> *all_kernels,
std::vector<Tensor *> *all_tensors) {
auto kernel = *it;
// Single output multiple references
for (int i = 0; i < kernel->out_kernels().size(); i++) {
auto next_kernel = kernel->out_kernels().at(i);
if (next_kernel->desc().arch == kNPU && npu_trans_nodes.find(next_kernel->Type()) != npu_trans_nodes.end()) {
continue;
}
// Change format the output of the current kernel nhwc->nchw
auto shapes = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[1],
kernel->out_tensors()[0]->shape()[2], kernel->out_tensors()[0]->shape()[3]};
auto tensor = new Tensor(kernel->out_tensors()[0]->data_type(), shapes, schema::Format_NHWC, Tensor::VAR);
std::vector<Tensor *> post_trans_out_tensors = {tensor};
all_tensors->push_back(post_trans_out_tensors[0]);
// Use the output tensor of the current node as the input tensor of the post-conversion operator
auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++);
auto *post_trans_kernel =
NPUPassUtils::CreateNchw2NhwcKernel(kernel->out_tensors(), post_trans_out_tensors, context, name);
// Replace the input tensor of the next node
NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {next_kernel}, kernel->out_tensors(),
post_trans_out_tensors);
// Directly insert in the back, will not affect the topological sort
all_kernels->push_back(post_trans_kernel);
UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, next_kernel);
UpdateNC2NHTransNodeAfterKernel(kernel, post_trans_kernel, next_kernel);
}
return RET_OK;
}
int NPUTransformPass::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *next_kernel) {
std::vector<kernel::LiteKernel *> cur_out_kernels;
for (auto out_kernel : kernel->out_kernels()) {
if (out_kernel == next_kernel) {
cur_out_kernels.push_back(trans_kernel);
} else {
cur_out_kernels.push_back(out_kernel);
}
}
auto kernel_out_tensor = kernel->out_tensors()[0];
// Change format the output of the current kernel nhwc->nchw
std::vector<int> kernel_out_new_shapes = {kernel_out_tensor->shape()[0], kernel_out_tensor->shape()[3],
kernel_out_tensor->shape()[1], kernel_out_tensor->shape()[2]};
kernel_out_tensor->set_format(schema::Format_NCHW);
kernel_out_tensor->set_shape(kernel_out_new_shapes);
NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), cur_out_kernels, kernel->in_tensors(), {kernel_out_tensor});
return RET_OK;
}
int NPUTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *next_kernel) {
std::vector<Tensor *> next_in_tensors;
for (auto next_in_tensor : next_kernel->in_tensors()) {
if (next_in_tensor != kernel->out_tensors()[0]) {
next_in_tensors.push_back(next_in_tensor);
} else {
next_in_tensors.push_back(trans_kernel->out_tensors()[0]);
}
}
next_kernel->set_in_tensors(next_in_tensors);
std::vector<kernel::LiteKernel *> next_in_kernels;
for (auto in_kernel : next_kernel->in_kernels()) {
if (in_kernel == kernel) {
next_in_kernels.push_back(trans_kernel);
} else {
next_in_kernels.push_back(in_kernel);
}
}
NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors,
next_kernel->out_tensors());
return RET_OK;
}
int NPUTransformPass::FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
std::vector<Tensor *> *all_tensors) {
if (context->IsNpuEnabled()) {
std::vector<kernel::LiteKernel *> new_kernels;
for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) {
auto kernel = *it;
if (kernel->desc().arch != kNPU) {
new_kernels.push_back(kernel);
continue;
}
if (npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) {
InsertPreNode(context, it, &new_kernels, all_tensors);
new_kernels.push_back(kernel);
InsertPostNode(context, it, &new_kernels, all_tensors);
} else {
new_kernels.push_back(kernel);
}
}
all_kernels->clear();
for (int i = 0; i < new_kernels.size(); i++) {
all_kernels->push_back(new_kernels[i]);
}
}
return RET_OK;
}
} // namespace mindspore::lite

View File

@ -0,0 +1,51 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/ops/primitive_c.h"
namespace mindspore::lite {
class NPUTransformPass {
public:
int FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
std::vector<Tensor *> *all_tensors);
private:
int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *after_kernel);
int UpdateNH2NCTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *before_kernel);
int UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *after_kernel);
int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
kernel::LiteKernel *next_kernel);
int InsertPreNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it,
std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors);
int InsertPostNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it,
std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors);
private:
int total = 0;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_

View File

@ -24,7 +24,6 @@
#include "include/graph/model.h"
#include "include/hiai_ir_build.h"
#include "include/HiAiModelManagerType.h"
#include "include/context.h"
#include "include/version.h"
#include "src/common/utils.h"
#include "src/runtime/agent/npu/npu_converter_utils.h"
@ -34,10 +33,6 @@ namespace mindspore::kernel {
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
std::set<schema::PrimitiveType> trans_nodes = {schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D,
schema::PrimitiveType_DepthwiseConv2D,
schema::PrimitiveType_DeDepthwiseConv2D, schema::PrimitiveType_Resize};
domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() {
ge::Graph graph("NPUGraph");
@ -75,8 +70,7 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() {
}
int SubGraphNpuKernel::Run() {
return reinterpret_cast<lite::NPUExecutor *>(this->executor_)
->Run(in_tensors_, out_tensors_, nodes_, inputs_nhwc2nchw_, outputs_nchw2nhwc_);
return reinterpret_cast<lite::NPUExecutor *>(this->executor_)->Run(in_tensors_, out_tensors_, nodes_);
}
int SubGraphNpuKernel::BuildNPUInputOp() {
@ -88,21 +82,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
if (IsSubGraphInputTensor(in_tensor)) {
auto tensor_name = node->name() + "_" + std::to_string(count++);
hiai::op::Data *data;
if (trans_nodes.find(node->Type()) != trans_nodes.end()) {
auto shape = in_tensor->shape();
data = new (std::nothrow) hiai::op::Data(tensor_name);
if (data == nullptr) {
MS_LOG(ERROR) << "New data failed.";
return RET_ERROR;
}
ge::TensorDesc tensor_desc(lite::ConverterToNPUShape({shape[0], shape[3], shape[1], shape[2]}),
ge::FORMAT_NCHW, lite::ConverterToNPUDataType(in_tensor->data_type()));
data->update_input_desc_x(tensor_desc);
inputs_nhwc2nchw_.push_back(true);
} else {
data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name);
inputs_nhwc2nchw_.push_back(false);
}
data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name);
subgraph_input_op_.push_back(*data);
node_input_op.push_back(data);
continue;
@ -132,7 +112,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
// weight tensor
if (is_weight_tensor) {
if (trans_nodes.find(node->Type()) == trans_nodes.end()) {
if (lite::npu_trans_nodes.find(node->Type()) == lite::npu_trans_nodes.end()) {
auto name = node->name() + "_" + std::to_string(count++);
auto weight_const = new (std::nothrow) hiai::op::Const(node->name() + "_" + std::to_string(count++));
if (weight_const == nullptr) {
@ -162,11 +142,6 @@ std::vector<ge::Operator> SubGraphNpuKernel::GetNPUNodes(const vector<kernel::Li
ops.reserve(nodes.size());
for (int i = 0; i < nodes.size(); i++) {
ops.push_back(*reinterpret_cast<NPUKernel *>(nodes[i])->GetNPUOp());
if (trans_nodes.find(schema::PrimitiveType(nodes[i]->GetPrimitive()->Type())) != trans_nodes.end()) {
outputs_nchw2nhwc_.push_back(true);
} else {
outputs_nchw2nhwc_.push_back(false);
}
}
return ops;
}

View File

@ -69,10 +69,6 @@ class SubGraphNpuKernel : public SubGraphKernel {
std::string GetOMModelName();
private:
std::vector<bool> inputs_nhwc2nchw_;
std::vector<bool> outputs_nchw2nhwc_;
domi::ModelBufferData *model_buffer_data_;
std::vector<ge::Operator> subgraph_input_op_;

View File

@ -16,6 +16,7 @@
#include "src/runtime/kernel/npu/convolution_base_npu.h"
#include "src/runtime/agent/npu/npu_converter_utils.h"
#include "nnacl/pack.h"
namespace mindspore::kernel {
ConvolutionBaseNPUKernel::~ConvolutionBaseNPUKernel() {
@ -39,14 +40,27 @@ int ConvolutionBaseNPUKernel::InitWeightBiasConst(const std::vector<lite::Tensor
MS_LOG(ERROR) << "New weight const failed.";
return RET_ERROR;
}
auto weight_shape = inputs[1]->shape();
inputs[1]->set_shape({weight_shape[0], weight_shape[3], weight_shape[1], weight_shape[2]});
inputs[1]->set_format(schema::Format_NCHW);
auto weight_tensor = mindspore::lite::ConverterToNPUTensor(inputs[1]);
weight_->set_attr_value(weight_tensor);
auto w_shape = inputs[1]->shape();
auto nhwc_data = inputs[1]->data_c();
auto nchw_data = reinterpret_cast<float *>(malloc(inputs[1]->ElementsNum() * sizeof(float)));
if (nchw_data == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}
PackNHWCToNCHWFp32(nhwc_data, nchw_data, w_shape[0], w_shape[1] * w_shape[2], w_shape[3]);
inputs[1]->set_shape(weight_shape);
inputs[1]->set_format(schema::Format_NHWC);
std::shared_ptr<ge::Tensor> weight_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
if (weight_tensor == nullptr) {
MS_LOG(ERROR) << "new weight_tensor failed.";
return RET_ERROR;
}
ge::TensorDesc tensor_desc(lite::ConverterToNPUShape({w_shape[0], w_shape[3], w_shape[1], w_shape[2]}),
ge::FORMAT_NCHW, lite::ConverterToNPUDataType(inputs[1]->data_type()));
weight_tensor->SetTensorDesc(tensor_desc);
weight_tensor->SetData(reinterpret_cast<const uint8_t *>(nchw_data), inputs[1]->Size());
weight_->set_attr_value(weight_tensor);
free(nchw_data);
if (inputs.size() >= 3) {
bias_ = new (std::nothrow) hiai::op::Const(name_ + "_b");

View File

@ -17,17 +17,18 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CONVOLUTION_BASE_NPU_H_
#include <vector>
#include <memory>
#include "include/graph/op/all_ops.h"
#include "src/runtime/kernel/npu/transpose_base_npu.h"
#include "nnacl/conv_parameter.h"
namespace mindspore::kernel {
class ConvolutionBaseNPUKernel : public TransposeBaseNPUKernel {
class ConvolutionBaseNPUKernel : public NPUKernel {
public:
ConvolutionBaseNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: TransposeBaseNPUKernel(parameter, inputs, outputs, ctx, primitive) {}
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConvolutionBaseNPUKernel() override;
protected:

View File

@ -25,7 +25,7 @@ using mindspore::schema::PrimitiveType_DepthwiseConv2D;
namespace mindspore::kernel {
int ConvolutionDepthwiseNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) {
return RET_ERROR;
return RET_OK;
}
int ConvolutionDepthwiseNPUKernel::SetConvDwParam() {
@ -49,19 +49,13 @@ int ConvolutionDepthwiseNPUKernel::SetConvDwParam() {
int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs,
const std::vector<ge::Operator *> &npu_inputs) {
auto ret = SetPreTranspose(npu_inputs[0]);
if (ret != RET_OK) {
MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed.";
return RET_ERROR;
}
// set conv attr param
conv_dw_ = new (std::nothrow) hiai::op::ConvolutionDepthwise(name_ + "_conv_depthwise");
if (conv_dw_ == nullptr) {
MS_LOG(ERROR) << "New convolution depthwise operator for op " << name_ << " failed.";
return RET_ERROR;
}
ret = SetConvDwParam();
auto ret = SetConvDwParam();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Set npu op parameter for convolution depthwise op " << name_ << " failed.";
return RET_ERROR;
@ -76,7 +70,7 @@ int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *
if (inputs.size() == 3) {
conv_dw_->set_input_bias(*bias_);
}
conv_dw_->set_input_x(*pre_trans_);
conv_dw_->set_input_x(*npu_inputs[0]);
if (conv_param_->act_type_ != ActType_No) {
ret = SetActivation(conv_dw_, conv_param_->act_type_);
@ -85,20 +79,16 @@ int ConvolutionDepthwiseNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *
return RET_ERROR;
}
}
if (conv_param_->act_type_ == ActType_No) {
ret = SetPostTranspose(conv_dw_);
} else {
ret = SetPostTranspose(act_);
}
if (ret != RET_OK) {
MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed.";
return RET_ERROR;
}
return RET_OK;
}
ge::Operator *mindspore::kernel::ConvolutionDepthwiseNPUKernel::GetNPUOp() { return post_trans_; }
ge::Operator *mindspore::kernel::ConvolutionDepthwiseNPUKernel::GetNPUOp() {
if (conv_param_->act_type_ == ActType_No) {
return conv_dw_;
} else {
return act_;
}
}
ConvolutionDepthwiseNPUKernel::~ConvolutionDepthwiseNPUKernel() {
if (conv_dw_ != nullptr) {

View File

@ -24,7 +24,7 @@ using mindspore::schema::PrimitiveType_Conv2D;
namespace mindspore::kernel {
int ConvolutionNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) {
return RET_ERROR;
return RET_OK;
}
int ConvolutionNPUKernel::SetConvParam() {
@ -49,19 +49,13 @@ int ConvolutionNPUKernel::SetConvParam() {
int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs,
const std::vector<ge::Operator *> &npu_inputs) {
auto ret = SetPreTranspose(npu_inputs[0]);
if (ret != RET_OK) {
MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed.";
return RET_ERROR;
}
// set conv attr param
conv_ = new (std::nothrow) hiai::op::Convolution(name_ + "_conv");
if (conv_ == nullptr) {
MS_LOG(ERROR) << "New convolution operator for convolution op " << name_ << " failed.";
return RET_ERROR;
}
ret = SetConvParam();
auto ret = SetConvParam();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Set npu op parameter for convolution op " << name_ << " failed.";
return RET_ERROR;
@ -76,7 +70,7 @@ int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs
if (inputs.size() == 3) {
conv_->set_input_bias(*bias_);
}
conv_->set_input_x(*pre_trans_);
conv_->set_input_x(*npu_inputs[0]);
if (conv_param_->act_type_ != ActType_No) {
ret = SetActivation(conv_, conv_param_->act_type_);
@ -85,20 +79,16 @@ int ConvolutionNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs
return RET_ERROR;
}
}
if (conv_param_->act_type_ == ActType_No) {
ret = SetPostTranspose(conv_);
} else {
ret = SetPostTranspose(act_);
}
if (ret != RET_OK) {
MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed.";
return RET_ERROR;
}
return RET_OK;
}
ge::Operator *mindspore::kernel::ConvolutionNPUKernel::GetNPUOp() { return post_trans_; }
ge::Operator *mindspore::kernel::ConvolutionNPUKernel::GetNPUOp() {
if (conv_param_->act_type_ == ActType_No) {
return conv_;
} else {
return act_;
}
}
ConvolutionNPUKernel::~ConvolutionNPUKernel() {
if (conv_ != nullptr) {

View File

@ -62,23 +62,17 @@ int PoolingNPUKernel::SetPoolingParam() {
int PoolingNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs,
const std::vector<ge::Operator *> &npu_inputs) {
auto ret = SetPreTranspose(npu_inputs[0]);
if (ret != RET_OK) {
MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed.";
return RET_ERROR;
}
pooling_ = new (std::nothrow) hiai::op::PoolingD(name_ + "_pooling");
if (pooling_ == nullptr) {
MS_LOG(ERROR) << "New pooling npu operator for op " << name_ << " failed.";
return RET_ERROR;
}
ret = SetPoolingParam();
auto ret = SetPoolingParam();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Set npu op parameter for convolution op " << name_ << " failed.";
return RET_ERROR;
}
pooling_->set_input_x(*pre_trans_);
pooling_->set_input_x(*npu_inputs[0]);
if (pooling_param_->act_type_ != ActType_No) {
ret = SetActivation(pooling_, pooling_param_->act_type_);
@ -87,20 +81,16 @@ int PoolingNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
return RET_ERROR;
}
}
if (pooling_param_->act_type_ == ActType_No) {
ret = SetPostTranspose(pooling_);
} else {
ret = SetPostTranspose(act_);
}
if (ret != RET_OK) {
MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed.";
return RET_ERROR;
}
return RET_OK;
}
ge::Operator *mindspore::kernel::PoolingNPUKernel::GetNPUOp() { return post_trans_; }
ge::Operator *mindspore::kernel::PoolingNPUKernel::GetNPUOp() {
if (pooling_param_->act_type_ == ActType_No) {
return pooling_;
} else {
return act_;
}
}
PoolingNPUKernel::~PoolingNPUKernel() {
if (pooling_ != nullptr) {

View File

@ -36,12 +36,6 @@ int ResizeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const
int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
const std::vector<ge::Operator *> &npu_inputs) {
auto ret = SetPreTranspose(npu_inputs[0]);
if (ret != RET_OK) {
MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed.";
return RET_ERROR;
}
ge::TensorDesc sizeTensorDesc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_INT32);
ge::TensorPtr sizeTensor = std::make_shared<hiai::Tensor>(sizeTensorDesc);
vector<int32_t> dataValue = {static_cast<int32_t>(new_height_), static_cast<int32_t>(new_width_)};
@ -55,7 +49,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
return RET_ERROR;
}
op->set_attr_align_corners(align_corners_);
op->set_input_x(*pre_trans_);
op->set_input_x(*npu_inputs[0]);
op->set_input_size(*out_size);
op->set_attr_half_pixel_centers(preserve_aspect_ratio_);
op_ = op;
@ -66,21 +60,14 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
return RET_ERROR;
}
op->set_attr_align_corners(align_corners_);
op->set_input_x(*pre_trans_);
op->set_input_x(*npu_inputs[0]);
op->set_input_size(*out_size);
op_ = op;
}
ret = SetPostTranspose(op_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "New post transpose npu operator (NCHW -> NHWC) for op " << name_ << " failed.";
return RET_ERROR;
}
return RET_OK;
}
ge::Operator *mindspore::kernel::ResizeNPUKernel::GetNPUOp() { return this->post_trans_; }
ge::Operator *mindspore::kernel::ResizeNPUKernel::GetNPUOp() { return this->op_; }
ResizeNPUKernel::~ResizeNPUKernel() {
if (op_ != nullptr) {

View File

@ -24,12 +24,12 @@
#include "include/graph/op/all_ops.h"
#include "src/runtime/kernel/npu/transpose_base_npu.h"
namespace mindspore::kernel {
class ResizeNPUKernel : public TransposeBaseNPUKernel {
class ResizeNPUKernel : public NPUKernel {
public:
ResizeNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: TransposeBaseNPUKernel(parameter, inputs, outputs, ctx, primitive) {
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
auto resize_parameter = reinterpret_cast<ResizeParameter *>(parameter);
method_ = resize_parameter->method_;
new_height_ = resize_parameter->new_height_;

View File

@ -33,6 +33,8 @@
#if SUPPORT_NPU
#include "src/runtime/agent/npu/subgraph_npu_kernel.h"
#include "src/runtime/agent/npu/npu_manager.h"
#include "src/runtime/agent/npu/npu_transform_pass.h"
#include "src/runtime/agent/npu/npu_fusion_pass.h"
#endif
namespace mindspore::lite {
using kernel::KERNEL_ARCH::kCPU;
@ -63,6 +65,11 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
return ret;
}
FindAllInoutKernels(*dst_kernels);
ret = RunPass(dst_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Schedule run pass failed.";
return ret;
}
ret = ConstructSubGraphs(dst_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConstructSubGraphs failed.";
@ -514,4 +521,25 @@ void Scheduler::FindAllInoutKernels(const std::vector<kernel::LiteKernel *> &ker
kernel->FindInoutKernels(kernels);
}
}
int Scheduler::RunPass(std::vector<kernel::LiteKernel *> *dst_kernels) {
int ret = RET_OK;
#if SUPPORT_NPU
auto transform_pass = new NPUTransformPass;
ret = transform_pass->FormatTransformPass(context_, dst_kernels, &src_tensors_);
delete transform_pass;
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run npu format transform pass failed.";
return ret;
}
auto fusion_pass = new NPUFusionPass(dst_kernels);
ret = fusion_pass->Fusion();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run npu fussion transform pass failed.";
return ret;
}
delete fusion_pass;
#endif
return ret;
}
} // namespace mindspore::lite

View File

@ -77,6 +77,8 @@ class Scheduler {
static kernel::SubGraphType GetKernelSubGraphType(const kernel::LiteKernel *kernel);
int RunPass(std::vector<kernel::LiteKernel *> *dst_kernels);
protected:
const InnerContext *context_ = nullptr;
Model *src_model_ = nullptr;