!10269 npu solve memory leak

From: @yeyunpeng2020
Reviewed-by: 
Signed-off-by:
This commit is contained in:
mindspore-ci-bot 2020-12-21 22:42:13 +08:00 committed by Gitee
commit 8c7b616992
48 changed files with 497 additions and 361 deletions

View File

@ -30,6 +30,7 @@
#include "src/runtime/kernel/arm/base/dequant.h"
#if SUPPORT_NPU
#include "src/runtime/agent/npu/npu_manager.h"
#include "src/runtime/agent/npu/optimizer/npu_pass_manager.h"
#endif
namespace mindspore {
@ -366,7 +367,7 @@ int LiteSession::CompileGraph(Model *model) {
return ret;
}
// scheduler kernels
Scheduler scheduler(context_, model, tensors_);
Scheduler scheduler(context_, model, &tensors_);
ret = scheduler.Schedule(&kernels_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Schedule kernels failed: " << ret;
@ -537,6 +538,10 @@ LiteSession::~LiteSession() {
delete this->context_;
delete this->executor_;
this->executor_ = nullptr;
#if SUPPORT_NPU
mindspore::lite::NPUPassManager::GetInstance()->Clear();
mindspore::lite::NPUManager::GetInstance()->Reset();
#endif
is_running_.store(false);
}

View File

@ -1,9 +1,8 @@
include_directories(${DDK_PATH})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kernel)
file(GLOB_RECURSE NPU_RUNTIME_SRC
${CMAKE_CURRENT_SOURCE_DIR}/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/../../kernel/npu/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/optimizer/*.cc
)
add_library(hiai SHARED IMPORTED)
set_target_properties(hiai PROPERTIES IMPORTED_LOCATION

View File

@ -17,6 +17,7 @@
#include "src/runtime/agent/npu/npu_executor.h"
#include "include/errorcode.h"
#include "src/runtime/agent/npu/npu_manager.h"
#include "nnacl/pack.h"
namespace mindspore::lite {
int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(model_name_);
@ -32,6 +33,7 @@ int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
}
int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
const std::vector<kernel::LiteKernel *> &out_kernels,
const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator,
const KernelCallBack &before, const KernelCallBack &after) {
hiai::AiContext context;
@ -63,15 +65,33 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<
return RET_ERROR;
}
// For the output kernel of the entire model, and the format is nchw, the output tensor needs to be nchw TO nhwc.
std::vector<Tensor *> trans_tensors;
for (auto kernel : out_kernels) {
if (kernel->out_kernels().empty() && npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) {
for (int i = 0; i < kernel->out_tensors().size(); ++i) {
trans_tensors.push_back(kernel->out_tensors()[i]);
}
}
}
for (int i = 0; i < npu_output_tensors_.size(); ++i) {
void *data = out_tensors[i]->MutableData();
if (data == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}
if (std::find(trans_tensors.begin(), trans_tensors.end(), out_tensors[i]) != trans_tensors.end()) {
// Change data&tensor shape nc->nh
PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data, out_tensors[i]->Batch(),
out_tensors[i]->Width() * out_tensors[i]->Height(), out_tensors[i]->Channel());
out_tensors[i]->set_shape({out_tensors[i]->shape()[0], out_tensors[i]->shape()[2], out_tensors[i]->shape()[3],
out_tensors[i]->shape()[1]});
} else {
memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
out_tensors[i]->ResetRefCount();
}
}
return RET_OK;
}

View File

@ -32,8 +32,9 @@ class NPUExecutor : public Executor {
int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override;
int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator = nullptr,
const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr);
const std::vector<kernel::LiteKernel *> &out_kernels, const std::vector<kernel::LiteKernel *> &kernels,
Allocator *allocator = nullptr, const KernelCallBack &before = nullptr,
const KernelCallBack &after = nullptr);
private:
int GetIOTensorVec();

View File

@ -55,6 +55,26 @@ bool NPUManager::CheckEMUIVersion() {
return true;
}
void NPUManager::Reset() {
index_ = 0;
domi::HiaiIrBuild ir_build;
for (const auto &model_map : models_) {
auto model = model_map.second;
if (!model->is_freed) {
ir_build.ReleaseModelBuff(*model->model_buffer_data_);
model->model_buffer_data_ = nullptr;
model->is_freed = true;
model->desc_.reset();
model->desc_ = nullptr;
}
}
models_.clear();
for (auto client : clients_) {
client.reset();
}
clients_.clear();
}
bool NPUManager::CheckDDKVersion() {
auto client = std::make_shared<hiai::AiModelMngerClient>();
if (client->GetVersion() != nullptr) {
@ -104,54 +124,102 @@ bool NPUManager::IsKirinChip() {
return false;
}
int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency) {
hiai::MemBuffer *buffer = mc_builder_->InputMemBufferCreate(model_buf, size);
if (buffer == nullptr) {
MS_LOG(ERROR) << "MemBuffer is null.";
return RET_ERROR;
}
int NPUManager::AddModel(domi::ModelBufferData *model_buffer_data, const std::string &model_name, int frequency) {
auto model = new SubGraphModel(index_, model_name, model_buffer_data, frequency);
auto desc = std::make_shared<hiai::AiModelDescription>(model_name, frequency, 0, 0, 0);
desc->SetModelBuffer(buffer->GetMemBufferData(), buffer->GetMemBufferSize());
model_desc_.push_back(desc);
mc_builder_->MemBufferDestroy(buffer);
model_map_.insert({model_name, index_});
model->desc_ = desc;
models_.insert({model_name, model});
index_++;
return RET_OK;
}
int NPUManager::LoadOMModel() {
for (int i = 0; i < index_ / MAX_MODEL_NUM + 1; i++) {
std::shared_ptr<hiai::AiModelMngerClient> NPUManager::CreateAiModelMngerClient() {
auto client = std::make_shared<hiai::AiModelMngerClient>();
if (client == nullptr) {
MS_LOG(ERROR) << "NPU client is nullptr.";
return RET_ERROR;
return nullptr;
}
int ret = client->Init(nullptr);
if (ret != hiai::AI_SUCCESS) {
MS_LOG(ERROR) << "NPU client init failed. code is " << ret;
return nullptr;
}
return client;
}
int NPUManager::LoadOMModel() {
std::vector<std::shared_ptr<hiai::AiModelDescription>> models_desc;
std::shared_ptr<hiai::AiModelMngerClient> client = nullptr;
std::shared_ptr<hiai::AiModelBuilder> mc_builder = nullptr;
int total = 0;
for (const auto &model_map : models_) {
if (total % MAX_MODEL_NUM == 0) {
client = CreateAiModelMngerClient();
if (client == nullptr) {
MS_LOG(ERROR) << "Create Client failed.";
return RET_ERROR;
}
mc_builder_ = std::make_shared<hiai::AiModelBuilder>(client);
mc_builder = std::make_shared<hiai::AiModelBuilder>(client);
if (mc_builder == nullptr) {
MS_LOG(ERROR) << "Create AiModelBuilder failed.";
return RET_ERROR;
}
}
total++;
auto model = model_map.second;
if (model->is_loaded && model->is_freed) {
continue;
}
models_desc.push_back(model->desc_);
vector<std::shared_ptr<hiai::AiModelDescription>> desc(model_desc_.begin() + i * MAX_MODEL_NUM,
((i + 1) * MAX_MODEL_NUM > index_)
? model_desc_.begin() + index_
: model_desc_.begin() + (i + 1) * MAX_MODEL_NUM);
ret = client->Load(desc);
if (ret != hiai::AI_SUCCESS) {
MS_LOG(ERROR) << "Client load model failed." << ret;
auto buffer = mc_builder->InputMemBufferCreate(model->model_buffer_data_->data, model->model_buffer_data_->length);
if (buffer == nullptr) {
MS_LOG(ERROR) << "NPU input memory buffer create failed.";
return RET_ERROR;
}
clients_.push_back(client);
model->desc_->SetModelBuffer(model->model_buffer_data_->data, model->model_buffer_data_->length);
if (models_desc.size() == MAX_MODEL_NUM) {
auto ret = LoadModel(client, models_desc);
if (ret != RET_ERROR) {
MS_LOG(ERROR) << "Client load model failed.";
return RET_ERROR;
}
models_desc.clear();
}
}
if (!models_desc.empty()) {
auto ret = LoadModel(client, models_desc);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Client load model failed.";
return RET_ERROR;
}
models_desc.clear();
}
return RET_OK;
}
std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient(const std::string &model_name) {
return clients_[model_map_[model_name] / MAX_MODEL_NUM];
return models_[model_name]->client_;
}
int NPUManager::index() const { return index_; }
int NPUManager::LoadModel(const std::shared_ptr<hiai::AiModelMngerClient> &client,
std::vector<std::shared_ptr<hiai::AiModelDescription>> desc_list) {
auto ret = client->Load(desc_list);
if (ret != hiai::AI_SUCCESS) {
MS_LOG(ERROR) << "Client load model failed." << ret;
return RET_ERROR;
}
for (const auto &desc : desc_list) {
MS_LOG(ERROR) << desc->GetName();
auto it = models_.find(desc->GetName());
it->second->is_loaded = true;
it->second->client_ = client;
}
this->clients_.push_back(client);
return RET_OK;
}
} // namespace mindspore::lite

View File

@ -18,9 +18,11 @@
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_
#include <string>
#include <memory>
#include <utility>
#include <vector>
#include <unordered_map>
#include <set>
#include "include/hiai_ir_build.h"
#include "schema/model_generated.h"
#include "include/HiAiModelManagerService.h"
@ -29,17 +31,34 @@ static std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = {
schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D,
schema::PrimitiveType_DepthwiseConv2D, schema::PrimitiveType_DeDepthwiseConv2D,
schema::PrimitiveType_Resize, schema::PrimitiveType_Pooling};
struct SubGraphModel {
public:
SubGraphModel(int index, std::string model_name, domi::ModelBufferData *model_buffer_data, int frequency)
: index_(index), model_name_(std::move(model_name)), model_buffer_data_(model_buffer_data) {
std::cout << model_name;
}
bool is_freed = false;
bool is_loaded = false;
int index_;
std::string model_name_;
domi::ModelBufferData *model_buffer_data_;
std::shared_ptr<hiai::AiModelMngerClient> client_;
std::shared_ptr<hiai::AiModelDescription> desc_;
};
class NPUManager {
public:
static NPUManager *GetInstance() {
static NPUManager npuManager;
return &npuManager;
static NPUManager manager;
return &manager;
}
~NPUManager() { Reset(); }
bool IsSupportNPU();
// provide to subgraph to add model.
int AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency);
int AddModel(domi::ModelBufferData *model_buffer_data, const std::string &model_name, int frequency);
// scheduler to load om model.
int LoadOMModel();
@ -49,6 +68,11 @@ class NPUManager {
int index() const;
void Reset();
int LoadModel(const std::shared_ptr<hiai::AiModelMngerClient> &client,
std::vector<std::shared_ptr<hiai::AiModelDescription>> desc_list);
private:
bool IsKirinChip();
@ -58,16 +82,12 @@ class NPUManager {
int CompareVersion(const std::string &version1, const std::string &version2);
std::shared_ptr<hiai::AiModelMngerClient> CreateAiModelMngerClient();
private:
int index_ = 0;
std::unordered_map<std::string, SubGraphModel *> models_;
std::vector<std::shared_ptr<hiai::AiModelMngerClient>> clients_;
std::vector<std::shared_ptr<hiai::AiModelDescription>> model_desc_;
std::shared_ptr<hiai::AiModelBuilder> mc_builder_ = nullptr;
std::unordered_map<std::string, int> model_map_;
};
} // namespace mindspore::lite

View File

@ -13,8 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/agent/npu/npu_add_transform_pass.h"
#include "src/runtime/agent/npu/npu_pass_utils.h"
#include "src/runtime/agent/npu/optimizer/npu_add_transform_pass.h"
#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h"
namespace mindspore::lite {
using kernel::KERNEL_ARCH::kNPU;
int NPUAddTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
@ -56,9 +56,11 @@ int NPUAddTransformPass::InsertNode(const InnerContext *context, std::vector<ker
auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++);
auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name);
all_kernels->push_back(nh2nc_kernel);
insert_primitive_.push_back(nh2nc_kernel->GetPrimitive());
auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++);
auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name);
all_kernels->push_back(nc2nh_kernel);
insert_primitive_.push_back(nc2nh_kernel->GetPrimitive());
NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors);
NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors);
UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel);
@ -91,12 +93,11 @@ int NPUAddTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *ker
return RET_OK;
}
int NPUAddTransformPass::Run(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
std::vector<Tensor *> *all_tensors) {
if (context->IsNpuEnabled()) {
int NPUAddTransformPass::Run() {
if (context_->IsNpuEnabled()) {
std::vector<kernel::LiteKernel *> new_kernels;
for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) {
for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) {
auto kernel = *it;
new_kernels.push_back(kernel);
if (kernel->desc().arch != kNPU) {
@ -110,14 +111,14 @@ int NPUAddTransformPass::Run(const InnerContext *context, std::vector<kernel::Li
}
}
if (kernel->out_kernels().size() != sum) {
InsertNode(context, it, &new_kernels, all_tensors);
InsertNode(context_, it, &new_kernels, all_tensors_);
}
}
}
all_kernels->clear();
all_kernels_->clear();
for (int i = 0; i < new_kernels.size(); i++) {
all_kernels->push_back(new_kernels[i]);
all_kernels_->push_back(new_kernels[i]);
}
}
return RET_OK;

View File

@ -14,16 +14,29 @@
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/ops/primitive_c.h"
#include "src/runtime/agent/npu/optimizer/npu_base_pass.h"
namespace mindspore::lite {
class NPUAddTransformPass {
class NPUAddTransformPass : public NPUBasePass {
public:
int Run(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
std::vector<Tensor *> *all_tensors);
explicit NPUAddTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
std::vector<Tensor *> *all_tensors) {
context_ = context;
all_kernels_ = all_kernels;
all_tensors_ = all_tensors;
name_ = "NPUConcatTransformPass";
}
~NPUAddTransformPass() override {
for (auto primitive : insert_primitive_) {
delete primitive;
}
insert_primitive_.clear();
}
int Run() override;
private:
int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
@ -37,6 +50,10 @@ class NPUAddTransformPass {
private:
int total = 0;
const InnerContext *context_;
std::vector<kernel::LiteKernel *> *all_kernels_;
std::vector<const PrimitiveC *> insert_primitive_;
std::vector<Tensor *> *all_tensors_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_

View File

@ -0,0 +1,34 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_BASE_PASS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_BASE_PASS_H_
#include <string>
namespace mindspore::lite {
class NPUBasePass {
public:
virtual int Run() = 0;
virtual ~NPUBasePass() = default;
std::string name() { return name_; }
protected:
std::string name_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_BASE_PASS_H_

View File

@ -13,8 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/agent/npu/npu_concat_transform_pass.h"
#include "src/runtime/agent/npu/npu_pass_utils.h"
#include "src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h"
#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h"
namespace mindspore::lite {
using kernel::KERNEL_ARCH::kNPU;
int NPUConcatTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
@ -54,9 +54,11 @@ int NPUConcatTransformPass::InsertNode(const InnerContext *context, std::vector<
auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++);
auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name);
all_kernels->push_back(nh2nc_kernel);
insert_primitive_.push_back(nh2nc_kernel->GetPrimitive());
auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++);
auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name);
all_kernels->push_back(nc2nh_kernel);
insert_primitive_.push_back(nc2nh_kernel->GetPrimitive());
NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors);
NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors);
UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel);
@ -90,12 +92,11 @@ int NPUConcatTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *
return RET_OK;
}
int NPUConcatTransformPass::Run(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
std::vector<Tensor *> *all_tensors) {
if (context->IsNpuEnabled()) {
int NPUConcatTransformPass::Run() {
if (context_->IsNpuEnabled()) {
std::vector<kernel::LiteKernel *> new_kernels;
for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) {
for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) {
auto kernel = *it;
if (kernel->desc().arch != kNPU) {
new_kernels.push_back(kernel);
@ -109,15 +110,15 @@ int NPUConcatTransformPass::Run(const InnerContext *context, std::vector<kernel:
}
}
if (kernel->out_kernels().size() != sum) {
InsertNode(context, it, &new_kernels, all_tensors);
InsertNode(context_, it, &new_kernels, all_tensors_);
}
}
new_kernels.push_back(kernel);
}
all_kernels->clear();
all_kernels_->clear();
for (int i = 0; i < new_kernels.size(); i++) {
all_kernels->push_back(new_kernels[i]);
all_kernels_->push_back(new_kernels[i]);
}
}
return RET_OK;

View File

@ -14,16 +14,29 @@
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/ops/primitive_c.h"
#include "src/runtime/agent/npu/optimizer/npu_base_pass.h"
namespace mindspore::lite {
class NPUConcatTransformPass {
class NPUConcatTransformPass : public NPUBasePass {
public:
int Run(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
std::vector<Tensor *> *all_tensors);
explicit NPUConcatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
std::vector<Tensor *> *all_tensors) {
context_ = context;
all_kernels_ = all_kernels;
all_tensors_ = all_tensors;
name_ = "NPUConcatTransformPass";
}
~NPUConcatTransformPass() override {
for (auto primitive : insert_primitive_) {
delete primitive;
}
insert_primitive_.clear();
}
int Run() override;
private:
int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
@ -37,6 +50,10 @@ class NPUConcatTransformPass {
private:
int total = 0;
const InnerContext *context_;
std::vector<kernel::LiteKernel *> *all_kernels_;
std::vector<Tensor *> *all_tensors_;
std::vector<const PrimitiveC *> insert_primitive_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_

View File

@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/agent/npu/npu_fusion_pass.h"
#include "src/runtime/agent/npu/optimizer/npu_fusion_pass.h"
#include <vector>
#include "src/lite_kernel.h"
#include "nnacl/concat_parameter.h"
@ -198,7 +198,7 @@ int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) {
return RET_OK;
}
int NPUFusionPass::Fusion() {
int NPUFusionPass::Run() {
for (auto kernel : *kernels) {
switch (kernel->Type()) {
case schema::PrimitiveType_Concat:

View File

@ -14,17 +14,23 @@
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_FUSION_PASS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_FUSION_PASS_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/ops/primitive_c.h"
#include "src/runtime/agent/npu/optimizer/npu_base_pass.h"
namespace mindspore::lite {
class NPUFusionPass {
class NPUFusionPass : public NPUBasePass {
public:
explicit NPUFusionPass(std::vector<kernel::LiteKernel *> *dst_kernels) { kernels = dst_kernels; }
~NPUFusionPass() = default;
int Fusion();
explicit NPUFusionPass(std::vector<kernel::LiteKernel *> *dst_kernels) {
kernels = dst_kernels;
name_ = "NPUFusionPass";
}
~NPUFusionPass() override = default;
int Run() override;
protected:
int ConcatFusion(kernel::LiteKernel *kernel);
@ -37,4 +43,4 @@ class NPUFusionPass {
std::vector<kernel::LiteKernel *> *kernels;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_FUSION_PASS_H_

View File

@ -0,0 +1,39 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/agent/npu/optimizer/npu_pass_manager.h"
#include "include/errorcode.h"
#include "src/common/log_adapter.h"
namespace mindspore::lite {
void NPUPassManager::AddPass(NPUBasePass *pass) { all_pass_.push_back(pass); }
int NPUPassManager::Run() {
for (auto pass : all_pass_) {
auto ret = pass->Run();
if (ret != RET_OK) {
MS_LOG(ERROR) << "NPU Pass Run failed. Pass name is:" << pass->name();
return ret;
}
}
return RET_OK;
}
void NPUPassManager::Clear() {
for (auto pass : all_pass_) {
delete pass;
}
all_pass_.clear();
}
} // namespace mindspore::lite

View File

@ -0,0 +1,41 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_MANAGER_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_MANAGER_H_
#include <vector>
#include "src/runtime/agent/npu/optimizer/npu_base_pass.h"
namespace mindspore::lite {
class NPUPassManager {
public:
static NPUPassManager *GetInstance() {
static NPUPassManager pass_manager;
return &pass_manager;
}
~NPUPassManager() { Clear(); }
void AddPass(NPUBasePass *pass);
int Run();
void Clear();
private:
std::vector<NPUBasePass *> all_pass_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_MANAGER_H_

View File

@ -17,7 +17,7 @@
#include "src/kernel_registry.h"
#include "src/ops/nhwc2nchw.h"
#include "src/ops/nchw2nhwc.h"
#include "src/runtime/agent/npu/npu_pass_utils.h"
#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h"
namespace mindspore::lite {
using kernel::KERNEL_ARCH::kCPU;
using kernel::KERNEL_ARCH::kNPU;
@ -34,7 +34,7 @@ PrimitiveC *NPUPassUtils::CreateNchw2NhwcPrimitive() {
}
auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize()));
if (primitive_buf == nullptr) {
MS_LOG(ERROR) << "Malloc primitive_buf_ failed.";
MS_LOG(ERROR) << "Malloc primitive buffer failed.";
fbb.Clear();
return nullptr;
}
@ -58,7 +58,7 @@ PrimitiveC *NPUPassUtils::CreateNhwc2NchwPrimitive() {
}
auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize()));
if (primitive_buf == nullptr) {
MS_LOG(ERROR) << "Malloc primitive_buf_ failed.";
MS_LOG(ERROR) << "Malloc primitive buffer failed.";
fbb.Clear();
return nullptr;
}

View File

@ -14,8 +14,8 @@
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_
#include <vector>
#include <string>
#include "src/ops/primitive_c.h"
@ -41,4 +41,4 @@ class NPUPassUtils {
static PrimitiveC *CreateNhwc2NchwPrimitive();
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_

View File

@ -13,11 +13,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/agent/npu/npu_transform_pass.h"
#include "src/runtime/agent/npu/optimizer/npu_transform_pass.h"
#include <vector>
#include "src/lite_kernel.h"
#include "src/runtime/agent/npu/npu_manager.h"
#include "src/runtime/agent/npu/npu_pass_utils.h"
#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h"
namespace mindspore::lite {
using kernel::KERNEL_ARCH::kCPU;
using kernel::KERNEL_ARCH::kNPU;
@ -77,6 +77,7 @@ int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vector<ker
NPUPassUtils::CreateNhwc2NchwKernel({kernel->in_tensors()[0]}, pre_trans_out_tensors, context, name);
// Insert Nhwc2Nchw into the front of the current queue
all_kernels->push_back(pre_trans_kernel);
insert_primitive_.push_back(pre_trans_kernel->GetPrimitive());
// Replace the output kernel of the previous node
std::vector<kernel::LiteKernel *> pre_trans_in_kernel;
if (is_input_kernel) {
@ -99,6 +100,10 @@ int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector<ke
std::vector<kernel::LiteKernel *> *all_kernels,
std::vector<Tensor *> *all_tensors) {
auto kernel = *it;
// Model output does not insert operator
if (kernel->out_kernels().empty()) {
return RET_OK;
}
// Single output multiple references
for (int i = 0; i < kernel->out_kernels().size(); i++) {
auto next_kernel = kernel->out_kernels().at(i);
@ -118,6 +123,7 @@ int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector<ke
// Replace the input tensor of the next node
NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {next_kernel}, kernel->out_tensors(),
post_trans_out_tensors);
insert_primitive_.push_back(post_trans_kernel->GetPrimitive());
// Directly insert in the back, will not affect the topological sort
all_kernels->push_back(post_trans_kernel);
UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, next_kernel);
@ -171,28 +177,27 @@ int NPUTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel
return RET_OK;
}
int NPUTransformPass::FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
std::vector<Tensor *> *all_tensors) {
if (context->IsNpuEnabled()) {
int NPUTransformPass::Run() {
if (context_->IsNpuEnabled()) {
std::vector<kernel::LiteKernel *> new_kernels;
for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) {
for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) {
auto kernel = *it;
if (kernel->desc().arch != kNPU) {
new_kernels.push_back(kernel);
continue;
}
if (npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) {
InsertPreNode(context, it, &new_kernels, all_tensors);
InsertPreNode(context_, it, &new_kernels, all_tensors_);
new_kernels.push_back(kernel);
InsertPostNode(context, it, &new_kernels, all_tensors);
InsertPostNode(context_, it, &new_kernels, all_tensors_);
} else {
new_kernels.push_back(kernel);
}
}
all_kernels->clear();
all_kernels_->clear();
for (int i = 0; i < new_kernels.size(); i++) {
all_kernels->push_back(new_kernels[i]);
all_kernels_->push_back(new_kernels[i]);
}
}
return RET_OK;

View File

@ -14,16 +14,30 @@
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_TRANSFORM_PASS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_TRANSFORM_PASS_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/ops/primitive_c.h"
#include "src/runtime/agent/npu/optimizer/npu_base_pass.h"
namespace mindspore::lite {
class NPUTransformPass {
class NPUTransformPass : public NPUBasePass {
public:
int FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
std::vector<Tensor *> *all_tensors);
int Run() override;
explicit NPUTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
std::vector<Tensor *> *all_tensors) {
context_ = context;
all_kernels_ = all_kernels;
all_tensors_ = all_tensors;
name_ = "NPUTransformPass";
}
~NPUTransformPass() override {
for (auto primitive : insert_primitive_) {
delete primitive;
}
insert_primitive_.clear();
}
private:
int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
@ -46,6 +60,10 @@ class NPUTransformPass {
private:
int total = 0;
const InnerContext *context_;
std::vector<kernel::LiteKernel *> *all_kernels_;
std::vector<Tensor *> *all_tensors_;
std::vector<const PrimitiveC *> insert_primitive_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_TRANSFORM_PASS_H_

View File

@ -70,7 +70,7 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() {
}
int SubGraphNpuKernel::Run() {
return reinterpret_cast<lite::NPUExecutor *>(this->executor_)->Run(in_tensors_, out_tensors_, nodes_);
return reinterpret_cast<lite::NPUExecutor *>(this->executor_)->Run(in_tensors_, out_tensors_, out_kernels_, nodes_);
}
int SubGraphNpuKernel::BuildNPUInputOp() {
@ -159,17 +159,17 @@ int SubGraphNpuKernel::BuildNPUOutputOp() {
std::string SubGraphNpuKernel::GetOMModelName() { return this->name_ + ".om"; }
int SubGraphNpuKernel::Init() {
if (!isCompiled_) {
model_buffer_data_ = BuildIRModel();
if (model_buffer_data_ == nullptr) {
if (!is_compiled_) {
auto model_buffer_data = BuildIRModel();
if (model_buffer_data == nullptr) {
MS_LOG(ERROR) << "Build IR model failed.";
return RET_ERROR;
}
name_ = "kNpuSubGraph" + std::to_string(mindspore::lite::NPUManager::GetInstance()->index());
mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data_->data, model_buffer_data_->length,
GetOMModelName(), context_->GetNpuInfo().frequency_);
mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data, GetOMModelName(),
context_->GetNpuInfo().frequency_);
executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName());
@ -177,7 +177,7 @@ int SubGraphNpuKernel::Init() {
MS_LOG(ERROR) << "Create NPUExecutor failed.";
return RET_ERROR;
}
isCompiled_ = true;
is_compiled_ = true;
}
return RET_OK;
}

View File

@ -69,9 +69,7 @@ class SubGraphNpuKernel : public SubGraphKernel {
std::string GetOMModelName();
private:
bool isCompiled_ = false;
domi::ModelBufferData *model_buffer_data_;
bool is_compiled_ = false;
std::vector<ge::Operator> subgraph_input_op_;

View File

@ -42,17 +42,10 @@ using mindspore::schema::PrimitiveType_Sub;
namespace mindspore::kernel {
int ArithmeticNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) {
if (primitive_->Type() == PrimitiveType_Mul || primitive_->Type() == PrimitiveType_Div) {
if (primitive_->Type() == PrimitiveType_Mul || primitive_->Type() == PrimitiveType_Div ||
primitive_->Type() == PrimitiveType_Add || primitive_->Type() == PrimitiveType_Sub) {
if (inputs[0]->shape() != inputs[1]->shape()) {
MS_LOG(WARNING) << "For the two inputs, the corresponding dimensions must have the same value."
<< " shape 1 is:" << inputs[0]->shape() << " shape 2 is:" << inputs[1]->shape();
return RET_ERROR;
}
}
if (primitive_->Type() == PrimitiveType_Add || primitive_->Type() == PrimitiveType_Sub) {
if (inputs[0]->shape().size() != inputs[1]->shape().size()) {
MS_LOG(WARNING)
<< "For the two inputs, the corresponding dimensions must have the same value, or one of them is 1."
MS_LOG(WARNING) << name_ << " for the two inputs, the corresponding dimensions must have the same value."
<< " shape 1 is:" << inputs[0]->shape() << " shape 2 is:" << inputs[1]->shape();
return RET_ERROR;
}

View File

@ -35,8 +35,8 @@ int CastNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const
return RET_ERROR;
}
op_->set_input_x(*npu_inputs[0]);
op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(dst_type_));
op_->set_attr_src_dtype(lite::ConverterToNPUDataType(src_type_));
op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(cast_parameter_->dst_type_)));
op_->set_attr_src_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(cast_parameter_->src_type_)));
return RET_OK;
}

View File

@ -27,9 +27,7 @@ class CastNPUKernel : public NPUKernel {
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
auto cast_parameter = reinterpret_cast<CastParameter *>(parameter);
dst_type_ = static_cast<TypeId>(cast_parameter->dst_type_);
src_type_ = static_cast<TypeId>(cast_parameter->src_type_);
cast_parameter_ = reinterpret_cast<CastParameter *>(parameter);
}
~CastNPUKernel() override;
@ -41,8 +39,7 @@ class CastNPUKernel : public NPUKernel {
private:
hiai::op::CastT *op_ = nullptr;
TypeId dst_type_;
TypeId src_type_;
CastParameter *cast_parameter_;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CAST_NPU_H_

View File

@ -18,8 +18,8 @@
#include <vector>
#include <memory>
#include "src/runtime/kernel/npu/npu_kernel.h"
#include "include/graph/op/all_ops.h"
#include "src/runtime/kernel/npu/transpose_base_npu.h"
#include "nnacl/conv_parameter.h"
namespace mindspore::kernel {

View File

@ -30,7 +30,7 @@ int ConvolutionNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs,
int ConvolutionNPUKernel::SetConvParam() {
conv_->set_attr_strides(ge::AttrValue::LIST_INT({conv_param_->stride_h_, conv_param_->stride_w_}));
conv_->set_attr_dilations(ge::AttrValue::LIST_INT({conv_param_->dilation_h_, conv_param_->dilation_w_}));
conv_->set_attr_groups(1);
conv_->set_attr_groups(conv_param_->group_);
if (conv_param_->pad_mode_ == Pad_Same) {
conv_->set_attr_pad_mode(ge::AttrValue::STR{"SAME"});

View File

@ -41,7 +41,7 @@ int GatherNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
op_->set_input_x(*npu_inputs[0]);
op_->set_input_indices(*npu_inputs[1]);
op_->set_attr_axis(axis_);
op_->set_attr_axis(gather_parameter_->axis_);
return RET_OK;
}

View File

@ -27,8 +27,7 @@ class GatherNPUKernel : public NPUKernel {
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
auto gather_parameter = reinterpret_cast<GatherParameter *>(parameter);
axis_ = gather_parameter->axis_;
gather_parameter_ = reinterpret_cast<GatherParameter *>(parameter);
}
~GatherNPUKernel() override;
@ -40,7 +39,7 @@ class GatherNPUKernel : public NPUKernel {
private:
hiai::op::GatherV2D *op_ = nullptr;
int axis_;
GatherParameter *gather_parameter_;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_GATHER_NPU_H_

View File

@ -33,8 +33,8 @@ int MatMulNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
op_->set_input_x1(*npu_inputs[0]);
op_->set_input_x2(*npu_inputs[1]);
op_->set_attr_transpose_x1(a_transpose_);
op_->set_attr_transpose_x2(b_transpose_);
op_->set_attr_transpose_x1(matmul_parameter_->a_transpose_);
op_->set_attr_transpose_x2(matmul_parameter_->b_transpose_);
return RET_OK;
}

View File

@ -28,9 +28,7 @@ class MatMulNPUKernel : public NPUKernel {
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
auto matmul_parameter = reinterpret_cast<MatMulParameter *>(parameter);
a_transpose_ = matmul_parameter->a_transpose_;
b_transpose_ = matmul_parameter->b_transpose_;
matmul_parameter_ = reinterpret_cast<MatMulParameter *>(parameter);
}
~MatMulNPUKernel() override;
@ -42,8 +40,7 @@ class MatMulNPUKernel : public NPUKernel {
private:
hiai::op::MatMul *op_ = nullptr;
bool a_transpose_ = false;
bool b_transpose_ = false;
MatMulParameter *matmul_parameter_;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_MATMUL_NPU_H_

View File

@ -53,6 +53,11 @@ kernel::LiteKernel *NPUKernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const lite::InnerContext *ctx, const kernel::KernelKey &desc,
const mindspore::lite::PrimitiveC *primitive) {
if (!primitive->infer_flag()) {
MS_LOG(ERROR) << "NPU does not support runtime inference shape";
return nullptr;
}
auto *kernel = new (std::nothrow) T(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel " << opParameter->name_ << "is nullptr.";

View File

@ -25,7 +25,7 @@ using mindspore::schema::PrimitiveType_Pad;
namespace mindspore::kernel {
int PadNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
OpParameter *opParameter) {
if (padding_mode_ != schema::PaddingMode_CONSTANT) {
if (pad_->GetPaddingMode() != schema::PaddingMode_CONSTANT) {
MS_LOG(WARNING) << "NPU only support CONSTANT padding mode";
return RET_ERROR;
}
@ -39,16 +39,16 @@ int PadNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const
MS_LOG(ERROR) << name_ << " op is nullptr";
return RET_ERROR;
}
int size = static_cast<int>(paddings_.size() / 2);
int size = static_cast<int>(pad_->GetPaddings().size() / 2);
ge::TensorDesc padding_tensor_desc(ge::Shape({size, 2}), ge::FORMAT_NCHW, ge::DT_INT32);
ge::TensorPtr padding_tensor = std::make_shared<hiai::Tensor>(padding_tensor_desc);
padding_tensor->SetData(reinterpret_cast<uint8_t *>(paddings_.data()), size * sizeof(int));
padding_tensor->SetData(reinterpret_cast<uint8_t *>(pad_->GetPaddings().data()), size * sizeof(int));
auto paddings = new hiai::op::Const(name_ + "paddings");
paddings->set_attr_value(padding_tensor);
ge::TensorDesc constant_values_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT);
ge::TensorPtr constant_values_tensor = std::make_shared<hiai::Tensor>(constant_values_tensor_desc);
vector<float> constant_values_data_value = {constant_value_};
vector<float> constant_values_data_value = {pad_->GetConstantValue()};
constant_values_tensor->SetData(reinterpret_cast<uint8_t *>(constant_values_data_value.data()), 1 * sizeof(float));
auto constant = new hiai::op::Const(name_ + "constant");
constant->set_attr_value(constant_values_tensor);

View File

@ -28,10 +28,7 @@ class PadNPUKernel : public NPUKernel {
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
auto pad = reinterpret_cast<const mindspore::lite::Pad *>(primitive);
constant_value_ = pad->GetConstantValue();
paddings_ = pad->GetPaddings();
padding_mode_ = pad->GetPaddingMode();
pad_ = reinterpret_cast<const mindspore::lite::Pad *>(primitive);
}
~PadNPUKernel() override;
@ -43,9 +40,7 @@ class PadNPUKernel : public NPUKernel {
private:
hiai::op::PadV2 *op_ = nullptr;
std::vector<int> paddings_;
int padding_mode_;
float constant_value_;
const mindspore::lite::Pad *pad_;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_PAD_NPU_H_

View File

@ -27,8 +27,9 @@ using mindspore::schema::PrimitiveType_Resize;
namespace mindspore::kernel {
int ResizeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
OpParameter *opParameter) {
if (method_ != schema::ResizeMethod_LINEAR || method_ == schema::ResizeMethod_NEAREST) {
MS_LOG(WARNING) << "Unsupported resize method type:" << method_;
if (resize_parameter_->method_ != schema::ResizeMethod_LINEAR ||
resize_parameter_->method_ == schema::ResizeMethod_NEAREST) {
MS_LOG(WARNING) << "Unsupported resize method type:" << resize_parameter_->method_;
return RET_ERROR;
}
return RET_OK;
@ -38,20 +39,21 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
const std::vector<ge::Operator *> &npu_inputs) {
ge::TensorDesc sizeTensorDesc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_INT32);
ge::TensorPtr sizeTensor = std::make_shared<hiai::Tensor>(sizeTensorDesc);
vector<int32_t> dataValue = {static_cast<int32_t>(new_height_), static_cast<int32_t>(new_width_)};
vector<int32_t> dataValue = {static_cast<int32_t>(resize_parameter_->new_height_),
static_cast<int32_t>(resize_parameter_->new_width_)};
sizeTensor->SetData(reinterpret_cast<uint8_t *>(dataValue.data()), 2 * sizeof(int32_t));
auto out_size = new (std::nothrow) hiai::op::Const(name_ + "_size");
out_size->set_attr_value(sizeTensor);
if (method_ == schema::ResizeMethod_LINEAR) {
if (resize_parameter_->method_ == schema::ResizeMethod_LINEAR) {
auto op = new (std::nothrow) hiai::op::ResizeBilinearV2(name_);
if (op == nullptr) {
MS_LOG(ERROR) << " op is nullptr.";
return RET_ERROR;
}
op->set_attr_align_corners(align_corners_);
op->set_attr_align_corners(resize_parameter_->align_corners_);
op->set_input_x(*npu_inputs[0]);
op->set_input_size(*out_size);
op->set_attr_half_pixel_centers(preserve_aspect_ratio_);
op->set_attr_half_pixel_centers(resize_parameter_->preserve_aspect_ratio_);
op_ = op;
} else {
auto op = new (std::nothrow) hiai::op::ResizeNearestNeighborV2(name_);
@ -59,7 +61,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
MS_LOG(ERROR) << " op is nullptr.";
return RET_ERROR;
}
op->set_attr_align_corners(align_corners_);
op->set_attr_align_corners(resize_parameter_->align_corners_);
op->set_input_x(*npu_inputs[0]);
op->set_input_size(*out_size);
op_ = op;

View File

@ -22,7 +22,6 @@
#include "nnacl/arithmetic_common.h"
#include "src/runtime/kernel/npu/npu_kernel.h"
#include "include/graph/op/all_ops.h"
#include "src/runtime/kernel/npu/transpose_base_npu.h"
namespace mindspore::kernel {
class ResizeNPUKernel : public NPUKernel {
public:
@ -30,12 +29,7 @@ class ResizeNPUKernel : public NPUKernel {
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
auto resize_parameter = reinterpret_cast<ResizeParameter *>(parameter);
method_ = resize_parameter->method_;
new_height_ = resize_parameter->new_height_;
new_width_ = resize_parameter->new_width_;
align_corners_ = resize_parameter->align_corners_;
preserve_aspect_ratio_ = resize_parameter->preserve_aspect_ratio_;
resize_parameter_ = reinterpret_cast<ResizeParameter *>(parameter);
}
~ResizeNPUKernel() override;
@ -48,11 +42,7 @@ class ResizeNPUKernel : public NPUKernel {
private:
ge::Operator *op_ = nullptr;
int method_;
int64_t new_height_;
int64_t new_width_;
bool align_corners_;
bool preserve_aspect_ratio_;
ResizeParameter *resize_parameter_;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_RESIZE_NPU_H_

View File

@ -34,7 +34,7 @@ int ScaleNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, cons
MS_LOG(ERROR) << name_ << " op is nullptr";
return RET_ERROR;
}
op_->set_attr_axis(this->axis_);
op_->set_attr_axis(scale_parameter_->axis_);
op_->set_input_x(*npu_inputs[0]);
op_->set_input_scale(*npu_inputs[1]);
op_->set_input_bias(*npu_inputs[2]);

View File

@ -27,7 +27,7 @@ class ScaleNPUKernel : public NPUKernel {
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
axis_ = reinterpret_cast<ScaleParameter *>(parameter)->axis_;
scale_parameter_ = reinterpret_cast<ScaleParameter *>(parameter);
}
~ScaleNPUKernel() override;
@ -39,7 +39,7 @@ class ScaleNPUKernel : public NPUKernel {
private:
hiai::op::Scale *op_ = nullptr;
int axis_;
ScaleParameter *scale_parameter_;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_Scale_NPU_H_

View File

@ -35,10 +35,10 @@ int SoftmaxNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
MS_LOG(ERROR) << name_ << " op is nullptr";
return RET_ERROR;
}
if (axis_ == -1) {
if (softmax_parameter_->axis_ == -1) {
op_->set_attr_axis(inputs[0]->shape().size() - 1);
} else {
op_->set_attr_axis(axis_);
op_->set_attr_axis(softmax_parameter_->axis_);
}
op_->set_input_x(*npu_inputs[0]);
return RET_OK;

View File

@ -27,8 +27,7 @@ class SoftmaxNPUKernel : public NPUKernel {
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
auto softmax_parameter = reinterpret_cast<SoftmaxParameter *>(parameter);
axis_ = softmax_parameter->axis_;
softmax_parameter_ = reinterpret_cast<SoftmaxParameter *>(parameter);
}
~SoftmaxNPUKernel() override;
@ -40,7 +39,7 @@ class SoftmaxNPUKernel : public NPUKernel {
private:
hiai::op::Softmax *op_ = nullptr;
int axis_;
SoftmaxParameter *softmax_parameter_;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SOFTMAX_NPU_H_

View File

@ -35,25 +35,25 @@ int SplitNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, cons
MS_LOG(ERROR) << name_ << " op is nullptr";
return RET_ERROR;
}
int size = size_splits_.size();
int size = split_->size_splits().size();
ge::TensorDesc size_splits_tensor_desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32);
ge::TensorPtr size_splits_tensor = std::make_shared<hiai::Tensor>(size_splits_tensor_desc);
size_splits_tensor->SetData(reinterpret_cast<uint8_t *>(size_splits_.data()), size * sizeof(int));
size_splits_tensor->SetData(reinterpret_cast<uint8_t *>(split_->size_splits().data()), size * sizeof(int));
auto size_splits = new hiai::op::Const(name_ + "_size");
size_splits->set_attr_value(size_splits_tensor);
ge::TensorDesc split_dim_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_INT32);
ge::TensorPtr split_dim_tensor = std::make_shared<hiai::Tensor>(split_dim_tensor_desc);
vector<int32_t> split_dim_data_value = {split_dim_};
vector<int32_t> split_dim_data_value = {split_->GetSplitDim()};
split_dim_tensor->SetData(reinterpret_cast<uint8_t *>(split_dim_data_value.data()), 1 * sizeof(int));
auto split_dim = new hiai::op::Const(name_ + "_dim");
split_dim->set_attr_value(split_dim_tensor);
op_->set_input_x(*npu_inputs[0]);
op_->set_attr_num_split(num_split_);
op_->set_attr_num_split(split_->GetNumberSplit());
op_->set_input_split_dim(*split_dim);
op_->set_input_size_splits(*size_splits);
op_->create_dynamic_output_y(num_split_);
op_->create_dynamic_output_y(split_->GetNumberSplit());
return RET_OK;
}

View File

@ -27,10 +27,7 @@ class SplitNPUKernel : public NPUKernel {
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
auto split = reinterpret_cast<const mindspore::lite::Split *>(primitive);
num_split_ = split->GetNumberSplit();
size_splits_ = split->GetSizeSplit();
split_dim_ = split->GetSplitDim();
split_ = reinterpret_cast<const mindspore::lite::Split *>(primitive);
}
~SplitNPUKernel() override;
@ -42,9 +39,7 @@ class SplitNPUKernel : public NPUKernel {
private:
hiai::op::SplitV *op_ = nullptr;
int num_split_;
std::vector<int> size_splits_;
int split_dim_;
const mindspore::lite::Split *split_;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SPLIT_NPU_H_

View File

@ -59,11 +59,11 @@ int StridedSliceNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &input
} else {
op_->set_input_strides(*npu_inputs[3]);
}
op_->set_attr_begin_mask(begin_mask_);
op_->set_attr_ellipsis_mask(ellipsis_mask_);
op_->set_attr_end_mask(end_mask_);
op_->set_attr_shrink_axis_mask(shrink_axis_mask_);
op_->set_attr_new_axis_mask(new_axis_mask_);
op_->set_attr_begin_mask(strided_slice_->GetBeginMask());
op_->set_attr_ellipsis_mask(strided_slice_->GetEllipsisMask());
op_->set_attr_end_mask(strided_slice_->GetEndMask());
op_->set_attr_shrink_axis_mask(strided_slice_->GetShrinkAxisMask());
op_->set_attr_new_axis_mask(strided_slice_->GetNewAxisMask());
return RET_OK;
}

View File

@ -28,12 +28,7 @@ class StridedSliceNPUKernel : public NPUKernel {
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
auto strided_slice = reinterpret_cast<const mindspore::lite::StridedSlice *>(primitive);
begin_mask_ = strided_slice->GetBeginMask();
end_mask_ = strided_slice->GetEndMask();
ellipsis_mask_ = strided_slice->GetEllipsisMask();
new_axis_mask_ = strided_slice->GetNewAxisMask();
shrink_axis_mask_ = strided_slice->GetShrinkAxisMask();
strided_slice_ = reinterpret_cast<const mindspore::lite::StridedSlice *>(primitive);
}
~StridedSliceNPUKernel() override;
@ -45,11 +40,7 @@ class StridedSliceNPUKernel : public NPUKernel {
private:
hiai::op::StridedSlice *op_ = nullptr;
int begin_mask_;
int end_mask_;
int ellipsis_mask_;
int new_axis_mask_;
int shrink_axis_mask_;
const mindspore::lite::StridedSlice *strided_slice_;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_STRIDEDSLICE_NPU_H_

View File

@ -1,54 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/npu/transpose_base_npu.h"
namespace mindspore::kernel {
TransposeBaseNPUKernel::~TransposeBaseNPUKernel() {
if (pre_trans_ != nullptr) {
delete pre_trans_;
pre_trans_ = nullptr;
}
if (post_trans_ != nullptr) {
delete post_trans_;
post_trans_ = nullptr;
}
}
int TransposeBaseNPUKernel::SetPreTranspose(const ge::Operator *input) {
// input permute: NHWC -> NCHW
pre_trans_ = new (std::nothrow) hiai::op::Permute(name_ + "_pre_transpose");
if (pre_trans_ == nullptr) {
MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed.";
return RET_ERROR;
}
pre_trans_->set_input_x(*input);
pre_trans_->set_attr_order(ge::AttrValue::LIST_INT({0, 3, 1, 2}));
return RET_OK;
}
int TransposeBaseNPUKernel::SetPostTranspose(const ge::Operator *input) {
// permute: NCHW -> NHWC
post_trans_ = new (std::nothrow) hiai::op::Permute(name_ + "_post_transpose");
if (post_trans_ == nullptr) {
MS_LOG(ERROR) << "New post transpose operator (NCHW -> NHWC) for op " << name_ << " failed.";
return RET_ERROR;
}
post_trans_->set_input_x(*input);
post_trans_->set_attr_order(ge::AttrValue::LIST_INT({0, 2, 3, 1}));
return RET_OK;
}
} // namespace mindspore::kernel

View File

@ -1,41 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TRANSPOSE_BASE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TRANSPOSE_BASE_H_
#include <vector>
#include "include/graph/op/all_ops.h"
#include "include/graph/compatible/all_ops.h"
#include "src/runtime/kernel/npu/npu_kernel.h"
#include "nnacl/op_base.h"
namespace mindspore::kernel {
class TransposeBaseNPUKernel : public NPUKernel {
public:
TransposeBaseNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~TransposeBaseNPUKernel() override;
protected:
int SetPreTranspose(const ge::Operator *input);
int SetPostTranspose(const ge::Operator *input);
hiai::op::Permute *pre_trans_ = nullptr;
hiai::op::Permute *post_trans_ = nullptr;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TRANSPOSE_BASE_H_

View File

@ -30,7 +30,7 @@ int TransposeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, con
MS_LOG(ERROR) << "Unsupported conjugate transpose.";
return RET_ERROR;
}
return RET_OK;
return RET_ERROR;
}
int TransposeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
@ -57,6 +57,4 @@ TransposeNPUKernel::~TransposeNPUKernel() {
}
REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Transpose, NPUKernelCreator<TransposeNPUKernel>)
// REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Nhwc2Nchw, NPUKernelCreator<TransposeNPUKernel>)
// REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Nchw2Nhwc, NPUKernelCreator<TransposeNPUKernel>)
} // namespace mindspore::kernel

View File

@ -34,10 +34,11 @@
#if SUPPORT_NPU
#include "src/runtime/agent/npu/subgraph_npu_kernel.h"
#include "src/runtime/agent/npu/npu_manager.h"
#include "src/runtime/agent/npu/npu_transform_pass.h"
#include "src/runtime/agent/npu/npu_fusion_pass.h"
#include "src/runtime/agent/npu/npu_add_transform_pass.h"
#include "src/runtime/agent/npu/npu_concat_transform_pass.h"
#include "src/runtime/agent/npu/optimizer/npu_pass_manager.h"
#include "src/runtime/agent/npu/optimizer/npu_transform_pass.h"
#include "src/runtime/agent/npu/optimizer/npu_fusion_pass.h"
#include "src/runtime/agent/npu/optimizer/npu_add_transform_pass.h"
#include "src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h"
#endif
namespace mindspore::lite {
using kernel::KERNEL_ARCH::kCPU;
@ -89,12 +90,12 @@ void Scheduler::FindNodeInoutTensors(const lite::Model::Node &node, std::vector<
auto in_size = node.input_indices_.size();
inputs->reserve(in_size);
for (size_t j = 0; j < in_size; ++j) {
inputs->emplace_back(src_tensors_.at(node.input_indices_[j]));
inputs->emplace_back(src_tensors_->at(node.input_indices_[j]));
}
auto out_size = node.output_indices_.size();
outputs->reserve(out_size);
for (size_t j = 0; j < out_size; ++j) {
outputs->emplace_back(src_tensors_.at(node.output_indices_[j]));
outputs->emplace_back(src_tensors_->at(node.output_indices_[j]));
}
}
@ -303,11 +304,11 @@ int Scheduler::ScheduleSubGraphToKernels(size_t subgraph_index, std::vector<kern
}
if (in_tensors != nullptr) {
std::transform(subgraph->input_indices_.begin(), subgraph->input_indices_.end(), std::back_inserter(*in_tensors),
[&](const uint32_t index) { return this->src_tensors_.at(index); });
[&](const uint32_t index) { return this->src_tensors_->at(index); });
}
if (out_tensors != nullptr) {
std::transform(subgraph->output_indices_.begin(), subgraph->output_indices_.end(), std::back_inserter(*out_tensors),
[&](const uint32_t index) { return this->src_tensors_.at(index); });
[&](const uint32_t index) { return this->src_tensors_->at(index); });
}
return RET_OK;
}
@ -567,37 +568,16 @@ void Scheduler::FindAllInoutKernels(const std::vector<kernel::LiteKernel *> &ker
int Scheduler::RunPass(std::vector<kernel::LiteKernel *> *dst_kernels) {
int ret = RET_OK;
#if SUPPORT_NPU
auto transform_pass = new NPUTransformPass;
ret = transform_pass->FormatTransformPass(context_, dst_kernels, &src_tensors_);
delete transform_pass;
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run npu format transform pass failed.";
return ret;
}
auto add_format_pass = new NPUAddTransformPass;
ret = add_format_pass->Run(context_, dst_kernels, &src_tensors_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run npu add op insert transform pass failed.";
return ret;
}
delete add_format_pass;
auto concat_format_pass = new NPUConcatTransformPass;
ret = concat_format_pass->Run(context_, dst_kernels, &src_tensors_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run npu concat op insert transform pass failed.";
return ret;
}
delete concat_format_pass;
auto transform_pass = new NPUTransformPass(context_, dst_kernels, src_tensors_);
mindspore::lite::NPUPassManager::GetInstance()->AddPass(transform_pass);
auto add_format_pass = new NPUAddTransformPass(context_, dst_kernels, src_tensors_);
mindspore::lite::NPUPassManager::GetInstance()->AddPass(add_format_pass);
auto concat_format_pass = new NPUConcatTransformPass(context_, dst_kernels, src_tensors_);
mindspore::lite::NPUPassManager::GetInstance()->AddPass(concat_format_pass);
auto fusion_pass = new NPUFusionPass(dst_kernels);
ret = fusion_pass->Fusion();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run npu fussion transform pass failed.";
return ret;
}
delete fusion_pass;
mindspore::lite::NPUPassManager::GetInstance()->AddPass(fusion_pass);
ret = mindspore::lite::NPUPassManager::GetInstance()->Run();
#endif
return ret;
}

View File

@ -28,8 +28,8 @@
namespace mindspore::lite {
class Scheduler {
public:
Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> src_tensors)
: context_(ctx), src_model_(src_model), src_tensors_(std::move(src_tensors)) {}
Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> *src_tensors)
: context_(ctx), src_model_(src_model), src_tensors_(src_tensors) {}
~Scheduler() = default;
int Schedule(std::vector<kernel::LiteKernel *> *dst_kernels);
@ -85,7 +85,7 @@ class Scheduler {
protected:
const InnerContext *context_ = nullptr;
Model *src_model_ = nullptr;
std::vector<Tensor *> src_tensors_;
std::vector<Tensor *> *src_tensors_;
std::vector<size_t> graph_output_node_indexes_;
};
} // namespace mindspore::lite