From 7d61b0f82665aa0fd36f6346fa263b5e9a197d42 Mon Sep 17 00:00:00 2001 From: yeyunpeng Date: Mon, 21 Dec 2020 16:37:41 +0800 Subject: [PATCH] NPU Solve memory leaks --- mindspore/lite/src/lite_session.cc | 7 +- .../lite/src/runtime/agent/npu/CMakeLists.txt | 3 +- .../src/runtime/agent/npu/npu_executor.cc | 24 ++- .../lite/src/runtime/agent/npu/npu_executor.h | 5 +- .../lite/src/runtime/agent/npu/npu_manager.cc | 144 +++++++++++++----- .../lite/src/runtime/agent/npu/npu_manager.h | 40 +++-- .../{ => optimizer}/npu_add_transform_pass.cc | 19 +-- .../{ => optimizer}/npu_add_transform_pass.h | 29 +++- .../agent/npu/optimizer/npu_base_pass.h | 34 +++++ .../npu_concat_transform_pass.cc | 19 +-- .../npu_concat_transform_pass.h | 29 +++- .../npu/{ => optimizer}/npu_fusion_pass.cc | 4 +- .../npu/{ => optimizer}/npu_fusion_pass.h | 20 ++- .../agent/npu/optimizer/npu_pass_manager.cc | 39 +++++ .../agent/npu/optimizer/npu_pass_manager.h | 41 +++++ .../npu/{ => optimizer}/npu_pass_utils.cc | 6 +- .../npu/{ => optimizer}/npu_pass_utils.h | 6 +- .../npu/{ => optimizer}/npu_transform_pass.cc | 25 +-- .../npu/{ => optimizer}/npu_transform_pass.h | 30 +++- .../runtime/agent/npu/subgraph_npu_kernel.cc | 14 +- .../runtime/agent/npu/subgraph_npu_kernel.h | 4 +- .../src/runtime/kernel/npu/arithmetic_npu.cc | 13 +- .../lite/src/runtime/kernel/npu/cast_npu.cc | 4 +- .../lite/src/runtime/kernel/npu/cast_npu.h | 7 +- .../runtime/kernel/npu/convolution_base_npu.h | 2 +- .../src/runtime/kernel/npu/convolution_npu.cc | 2 +- .../lite/src/runtime/kernel/npu/gather_npu.cc | 2 +- .../lite/src/runtime/kernel/npu/gather_npu.h | 5 +- .../lite/src/runtime/kernel/npu/matmul_npu.cc | 4 +- .../lite/src/runtime/kernel/npu/matmul_npu.h | 7 +- .../lite/src/runtime/kernel/npu/npu_kernel.h | 5 + .../lite/src/runtime/kernel/npu/pad_npu.cc | 8 +- .../lite/src/runtime/kernel/npu/pad_npu.h | 9 +- .../lite/src/runtime/kernel/npu/resize_npu.cc | 16 +- .../lite/src/runtime/kernel/npu/resize_npu.h | 14 +- .../lite/src/runtime/kernel/npu/scale_npu.cc | 2 +- .../lite/src/runtime/kernel/npu/scale_npu.h | 4 +- .../src/runtime/kernel/npu/softmax_npu.cc | 4 +- .../lite/src/runtime/kernel/npu/softmax_npu.h | 5 +- .../lite/src/runtime/kernel/npu/split_npu.cc | 10 +- .../lite/src/runtime/kernel/npu/split_npu.h | 9 +- .../runtime/kernel/npu/strided_slice_npu.cc | 10 +- .../runtime/kernel/npu/strided_slice_npu.h | 13 +- .../runtime/kernel/npu/transpose_base_npu.cc | 54 ------- .../runtime/kernel/npu/transpose_base_npu.h | 41 ----- .../src/runtime/kernel/npu/transpose_npu.cc | 4 +- mindspore/lite/src/scheduler.cc | 56 +++---- mindspore/lite/src/scheduler.h | 6 +- 48 files changed, 497 insertions(+), 361 deletions(-) rename mindspore/lite/src/runtime/agent/npu/{ => optimizer}/npu_add_transform_pass.cc (90%) rename mindspore/lite/src/runtime/agent/npu/{ => optimizer}/npu_add_transform_pass.h (57%) create mode 100644 mindspore/lite/src/runtime/agent/npu/optimizer/npu_base_pass.h rename mindspore/lite/src/runtime/agent/npu/{ => optimizer}/npu_concat_transform_pass.cc (90%) rename mindspore/lite/src/runtime/agent/npu/{ => optimizer}/npu_concat_transform_pass.h (56%) rename mindspore/lite/src/runtime/agent/npu/{ => optimizer}/npu_fusion_pass.cc (98%) rename mindspore/lite/src/runtime/agent/npu/{ => optimizer}/npu_fusion_pass.h (71%) create mode 100644 mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_manager.cc create mode 100644 mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_manager.h rename mindspore/lite/src/runtime/agent/npu/{ => optimizer}/npu_pass_utils.cc (96%) rename mindspore/lite/src/runtime/agent/npu/{ => optimizer}/npu_pass_utils.h (88%) rename mindspore/lite/src/runtime/agent/npu/{ => optimizer}/npu_transform_pass.cc (92%) rename mindspore/lite/src/runtime/agent/npu/{ => optimizer}/npu_transform_pass.h (65%) delete mode 100644 mindspore/lite/src/runtime/kernel/npu/transpose_base_npu.cc delete mode 100644 mindspore/lite/src/runtime/kernel/npu/transpose_base_npu.h diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc index dca4d394ee4..e2c6b0e30aa 100644 --- a/mindspore/lite/src/lite_session.cc +++ b/mindspore/lite/src/lite_session.cc @@ -30,6 +30,7 @@ #include "src/runtime/kernel/arm/base/dequant.h" #if SUPPORT_NPU #include "src/runtime/agent/npu/npu_manager.h" +#include "src/runtime/agent/npu/optimizer/npu_pass_manager.h" #endif namespace mindspore { @@ -366,7 +367,7 @@ int LiteSession::CompileGraph(Model *model) { return ret; } // scheduler kernels - Scheduler scheduler(context_, model, tensors_); + Scheduler scheduler(context_, model, &tensors_); ret = scheduler.Schedule(&kernels_); if (ret != RET_OK) { MS_LOG(ERROR) << "Schedule kernels failed: " << ret; @@ -537,6 +538,10 @@ LiteSession::~LiteSession() { delete this->context_; delete this->executor_; this->executor_ = nullptr; +#if SUPPORT_NPU + mindspore::lite::NPUPassManager::GetInstance()->Clear(); + mindspore::lite::NPUManager::GetInstance()->Reset(); +#endif is_running_.store(false); } diff --git a/mindspore/lite/src/runtime/agent/npu/CMakeLists.txt b/mindspore/lite/src/runtime/agent/npu/CMakeLists.txt index 63c883d9bd8..6971dfa3f55 100644 --- a/mindspore/lite/src/runtime/agent/npu/CMakeLists.txt +++ b/mindspore/lite/src/runtime/agent/npu/CMakeLists.txt @@ -1,9 +1,8 @@ include_directories(${DDK_PATH}) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kernel) file(GLOB_RECURSE NPU_RUNTIME_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cc ${CMAKE_CURRENT_SOURCE_DIR}/../../kernel/npu/*.cc + ${CMAKE_CURRENT_SOURCE_DIR}/optimizer/*.cc ) add_library(hiai SHARED IMPORTED) set_target_properties(hiai PROPERTIES IMPORTED_LOCATION diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc index 082a18966bd..39640df1dbe 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc +++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc @@ -17,6 +17,7 @@ #include "src/runtime/agent/npu/npu_executor.h" #include "include/errorcode.h" #include "src/runtime/agent/npu/npu_manager.h" +#include "nnacl/pack.h" namespace mindspore::lite { int NPUExecutor::Prepare(const std::vector &kernels) { this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(model_name_); @@ -32,6 +33,7 @@ int NPUExecutor::Prepare(const std::vector &kernels) { } int NPUExecutor::Run(const std::vector &in_tensors, const std::vector &out_tensors, + const std::vector &out_kernels, const std::vector &kernels, Allocator *allocator, const KernelCallBack &before, const KernelCallBack &after) { hiai::AiContext context; @@ -63,14 +65,32 @@ int NPUExecutor::Run(const std::vector &in_tensors, const std::vector< return RET_ERROR; } + // For the output kernel of the entire model, and the format is nchw, the output tensor needs to be nchw TO nhwc. + std::vector trans_tensors; + for (auto kernel : out_kernels) { + if (kernel->out_kernels().empty() && npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) { + for (int i = 0; i < kernel->out_tensors().size(); ++i) { + trans_tensors.push_back(kernel->out_tensors()[i]); + } + } + } for (int i = 0; i < npu_output_tensors_.size(); ++i) { void *data = out_tensors[i]->MutableData(); if (data == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } - memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); - out_tensors[i]->ResetRefCount(); + + if (std::find(trans_tensors.begin(), trans_tensors.end(), out_tensors[i]) != trans_tensors.end()) { + // Change data&tensor shape nc->nh + PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data, out_tensors[i]->Batch(), + out_tensors[i]->Width() * out_tensors[i]->Height(), out_tensors[i]->Channel()); + out_tensors[i]->set_shape({out_tensors[i]->shape()[0], out_tensors[i]->shape()[2], out_tensors[i]->shape()[3], + out_tensors[i]->shape()[1]}); + } else { + memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); + out_tensors[i]->ResetRefCount(); + } } return RET_OK; } diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.h b/mindspore/lite/src/runtime/agent/npu/npu_executor.h index 899239e313e..95a82c79c09 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_executor.h +++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.h @@ -32,8 +32,9 @@ class NPUExecutor : public Executor { int Prepare(const std::vector &kernels) override; int Run(const std::vector &in_tensors, const std::vector &out_tensors, - const std::vector &kernels, Allocator *allocator = nullptr, - const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr); + const std::vector &out_kernels, const std::vector &kernels, + Allocator *allocator = nullptr, const KernelCallBack &before = nullptr, + const KernelCallBack &after = nullptr); private: int GetIOTensorVec(); diff --git a/mindspore/lite/src/runtime/agent/npu/npu_manager.cc b/mindspore/lite/src/runtime/agent/npu/npu_manager.cc index c4dff4e56bb..fc0cc9e0ef5 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_manager.cc +++ b/mindspore/lite/src/runtime/agent/npu/npu_manager.cc @@ -55,6 +55,26 @@ bool NPUManager::CheckEMUIVersion() { return true; } +void NPUManager::Reset() { + index_ = 0; + domi::HiaiIrBuild ir_build; + for (const auto &model_map : models_) { + auto model = model_map.second; + if (!model->is_freed) { + ir_build.ReleaseModelBuff(*model->model_buffer_data_); + model->model_buffer_data_ = nullptr; + model->is_freed = true; + model->desc_.reset(); + model->desc_ = nullptr; + } + } + models_.clear(); + for (auto client : clients_) { + client.reset(); + } + clients_.clear(); +} + bool NPUManager::CheckDDKVersion() { auto client = std::make_shared(); if (client->GetVersion() != nullptr) { @@ -104,54 +124,102 @@ bool NPUManager::IsKirinChip() { return false; } -int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency) { - hiai::MemBuffer *buffer = mc_builder_->InputMemBufferCreate(model_buf, size); - if (buffer == nullptr) { - MS_LOG(ERROR) << "MemBuffer is null."; - return RET_ERROR; - } - +int NPUManager::AddModel(domi::ModelBufferData *model_buffer_data, const std::string &model_name, int frequency) { + auto model = new SubGraphModel(index_, model_name, model_buffer_data, frequency); auto desc = std::make_shared(model_name, frequency, 0, 0, 0); - desc->SetModelBuffer(buffer->GetMemBufferData(), buffer->GetMemBufferSize()); - model_desc_.push_back(desc); - mc_builder_->MemBufferDestroy(buffer); - - model_map_.insert({model_name, index_}); + model->desc_ = desc; + models_.insert({model_name, model}); index_++; return RET_OK; } - -int NPUManager::LoadOMModel() { - for (int i = 0; i < index_ / MAX_MODEL_NUM + 1; i++) { - auto client = std::make_shared(); - if (client == nullptr) { - MS_LOG(ERROR) << "NPU client is nullptr."; - return RET_ERROR; - } - int ret = client->Init(nullptr); - if (ret != hiai::AI_SUCCESS) { - MS_LOG(ERROR) << "NPU client init failed. code is " << ret; - return RET_ERROR; - } - mc_builder_ = std::make_shared(client); - - vector> desc(model_desc_.begin() + i * MAX_MODEL_NUM, - ((i + 1) * MAX_MODEL_NUM > index_) - ? model_desc_.begin() + index_ - : model_desc_.begin() + (i + 1) * MAX_MODEL_NUM); - ret = client->Load(desc); - if (ret != hiai::AI_SUCCESS) { - MS_LOG(ERROR) << "Client load model failed." << ret; - return RET_ERROR; - } - clients_.push_back(client); +std::shared_ptr NPUManager::CreateAiModelMngerClient() { + auto client = std::make_shared(); + if (client == nullptr) { + MS_LOG(ERROR) << "NPU client is nullptr."; + return nullptr; } + int ret = client->Init(nullptr); + if (ret != hiai::AI_SUCCESS) { + MS_LOG(ERROR) << "NPU client init failed. code is " << ret; + return nullptr; + } + return client; +} +int NPUManager::LoadOMModel() { + std::vector> models_desc; + std::shared_ptr client = nullptr; + std::shared_ptr mc_builder = nullptr; + int total = 0; + for (const auto &model_map : models_) { + if (total % MAX_MODEL_NUM == 0) { + client = CreateAiModelMngerClient(); + if (client == nullptr) { + MS_LOG(ERROR) << "Create Client failed."; + return RET_ERROR; + } + mc_builder = std::make_shared(client); + if (mc_builder == nullptr) { + MS_LOG(ERROR) << "Create AiModelBuilder failed."; + return RET_ERROR; + } + } + total++; + auto model = model_map.second; + if (model->is_loaded && model->is_freed) { + continue; + } + models_desc.push_back(model->desc_); + + auto buffer = mc_builder->InputMemBufferCreate(model->model_buffer_data_->data, model->model_buffer_data_->length); + if (buffer == nullptr) { + MS_LOG(ERROR) << "NPU input memory buffer create failed."; + return RET_ERROR; + } + model->desc_->SetModelBuffer(model->model_buffer_data_->data, model->model_buffer_data_->length); + if (models_desc.size() == MAX_MODEL_NUM) { + auto ret = LoadModel(client, models_desc); + if (ret != RET_ERROR) { + MS_LOG(ERROR) << "Client load model failed."; + return RET_ERROR; + } + models_desc.clear(); + } + } + + if (!models_desc.empty()) { + auto ret = LoadModel(client, models_desc); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Client load model failed."; + return RET_ERROR; + } + models_desc.clear(); + } + return RET_OK; } std::shared_ptr NPUManager::GetClient(const std::string &model_name) { - return clients_[model_map_[model_name] / MAX_MODEL_NUM]; + return models_[model_name]->client_; } int NPUManager::index() const { return index_; } + +int NPUManager::LoadModel(const std::shared_ptr &client, + std::vector> desc_list) { + auto ret = client->Load(desc_list); + if (ret != hiai::AI_SUCCESS) { + MS_LOG(ERROR) << "Client load model failed." << ret; + return RET_ERROR; + } + + for (const auto &desc : desc_list) { + MS_LOG(ERROR) << desc->GetName(); + auto it = models_.find(desc->GetName()); + it->second->is_loaded = true; + it->second->client_ = client; + } + + this->clients_.push_back(client); + return RET_OK; +} } // namespace mindspore::lite diff --git a/mindspore/lite/src/runtime/agent/npu/npu_manager.h b/mindspore/lite/src/runtime/agent/npu/npu_manager.h index d2d4b011bf2..d7a8728a00f 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_manager.h +++ b/mindspore/lite/src/runtime/agent/npu/npu_manager.h @@ -18,9 +18,11 @@ #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_ #include #include +#include #include #include #include +#include "include/hiai_ir_build.h" #include "schema/model_generated.h" #include "include/HiAiModelManagerService.h" @@ -29,17 +31,34 @@ static std::set npu_trans_nodes = { schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, schema::PrimitiveType_DepthwiseConv2D, schema::PrimitiveType_DeDepthwiseConv2D, schema::PrimitiveType_Resize, schema::PrimitiveType_Pooling}; +struct SubGraphModel { + public: + SubGraphModel(int index, std::string model_name, domi::ModelBufferData *model_buffer_data, int frequency) + : index_(index), model_name_(std::move(model_name)), model_buffer_data_(model_buffer_data) { + std::cout << model_name; + } + + bool is_freed = false; + bool is_loaded = false; + int index_; + std::string model_name_; + domi::ModelBufferData *model_buffer_data_; + std::shared_ptr client_; + std::shared_ptr desc_; +}; class NPUManager { public: static NPUManager *GetInstance() { - static NPUManager npuManager; - return &npuManager; + static NPUManager manager; + return &manager; } + ~NPUManager() { Reset(); } + bool IsSupportNPU(); // provide to subgraph to add model. - int AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency); + int AddModel(domi::ModelBufferData *model_buffer_data, const std::string &model_name, int frequency); // scheduler to load om model. int LoadOMModel(); @@ -49,6 +68,11 @@ class NPUManager { int index() const; + void Reset(); + + int LoadModel(const std::shared_ptr &client, + std::vector> desc_list); + private: bool IsKirinChip(); @@ -58,16 +82,12 @@ class NPUManager { int CompareVersion(const std::string &version1, const std::string &version2); + std::shared_ptr CreateAiModelMngerClient(); + private: int index_ = 0; - + std::unordered_map models_; std::vector> clients_; - - std::vector> model_desc_; - - std::shared_ptr mc_builder_ = nullptr; - - std::unordered_map model_map_; }; } // namespace mindspore::lite diff --git a/mindspore/lite/src/runtime/agent/npu/npu_add_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_add_transform_pass.cc similarity index 90% rename from mindspore/lite/src/runtime/agent/npu/npu_add_transform_pass.cc rename to mindspore/lite/src/runtime/agent/npu/optimizer/npu_add_transform_pass.cc index 6ae717cb480..349703c4afa 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_add_transform_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_add_transform_pass.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "src/runtime/agent/npu/npu_add_transform_pass.h" -#include "src/runtime/agent/npu/npu_pass_utils.h" +#include "src/runtime/agent/npu/optimizer/npu_add_transform_pass.h" +#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" namespace mindspore::lite { using kernel::KERNEL_ARCH::kNPU; int NPUAddTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, @@ -56,9 +56,11 @@ int NPUAddTransformPass::InsertNode(const InnerContext *context, std::vectorname() + "_nh2nc_" + std::to_string(total++); auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); all_kernels->push_back(nh2nc_kernel); + insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++); auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); all_kernels->push_back(nc2nh_kernel); + insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors); NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors); UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel); @@ -91,12 +93,11 @@ int NPUAddTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *ker return RET_OK; } -int NPUAddTransformPass::Run(const InnerContext *context, std::vector *all_kernels, - std::vector *all_tensors) { - if (context->IsNpuEnabled()) { +int NPUAddTransformPass::Run() { + if (context_->IsNpuEnabled()) { std::vector new_kernels; - for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) { + for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) { auto kernel = *it; new_kernels.push_back(kernel); if (kernel->desc().arch != kNPU) { @@ -110,14 +111,14 @@ int NPUAddTransformPass::Run(const InnerContext *context, std::vectorout_kernels().size() != sum) { - InsertNode(context, it, &new_kernels, all_tensors); + InsertNode(context_, it, &new_kernels, all_tensors_); } } } - all_kernels->clear(); + all_kernels_->clear(); for (int i = 0; i < new_kernels.size(); i++) { - all_kernels->push_back(new_kernels[i]); + all_kernels_->push_back(new_kernels[i]); } } return RET_OK; diff --git a/mindspore/lite/src/runtime/agent/npu/npu_add_transform_pass.h b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_add_transform_pass.h similarity index 57% rename from mindspore/lite/src/runtime/agent/npu/npu_add_transform_pass.h rename to mindspore/lite/src/runtime/agent/npu/optimizer/npu_add_transform_pass.h index bc7ff39f1b9..e77f3e93ef5 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_add_transform_pass.h +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_add_transform_pass.h @@ -14,16 +14,29 @@ * limitations under the License. */ -#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_ #include #include "src/lite_kernel.h" #include "src/ops/primitive_c.h" +#include "src/runtime/agent/npu/optimizer/npu_base_pass.h" namespace mindspore::lite { -class NPUAddTransformPass { +class NPUAddTransformPass : public NPUBasePass { public: - int Run(const InnerContext *context, std::vector *all_kernels, - std::vector *all_tensors); + explicit NPUAddTransformPass(const InnerContext *context, std::vector *all_kernels, + std::vector *all_tensors) { + context_ = context; + all_kernels_ = all_kernels; + all_tensors_ = all_tensors; + name_ = "NPUConcatTransformPass"; + } + ~NPUAddTransformPass() override { + for (auto primitive : insert_primitive_) { + delete primitive; + } + insert_primitive_.clear(); + } + int Run() override; private: int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, @@ -37,6 +50,10 @@ class NPUAddTransformPass { private: int total = 0; + const InnerContext *context_; + std::vector *all_kernels_; + std::vector insert_primitive_; + std::vector *all_tensors_; }; } // namespace mindspore::lite -#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_ +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_ diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_base_pass.h b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_base_pass.h new file mode 100644 index 00000000000..5087cad99dd --- /dev/null +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_base_pass.h @@ -0,0 +1,34 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_BASE_PASS_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_BASE_PASS_H_ +#include +namespace mindspore::lite { +class NPUBasePass { + public: + virtual int Run() = 0; + + virtual ~NPUBasePass() = default; + + std::string name() { return name_; } + + protected: + std::string name_; +}; +} // namespace mindspore::lite + +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_BASE_PASS_H_ diff --git a/mindspore/lite/src/runtime/agent/npu/npu_concat_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_concat_transform_pass.cc similarity index 90% rename from mindspore/lite/src/runtime/agent/npu/npu_concat_transform_pass.cc rename to mindspore/lite/src/runtime/agent/npu/optimizer/npu_concat_transform_pass.cc index bf35aa22157..8eb32399616 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_concat_transform_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_concat_transform_pass.cc @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "src/runtime/agent/npu/npu_concat_transform_pass.h" -#include "src/runtime/agent/npu/npu_pass_utils.h" +#include "src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h" +#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" namespace mindspore::lite { using kernel::KERNEL_ARCH::kNPU; int NPUConcatTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, @@ -54,9 +54,11 @@ int NPUConcatTransformPass::InsertNode(const InnerContext *context, std::vector< auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++); auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); all_kernels->push_back(nh2nc_kernel); + insert_primitive_.push_back(nh2nc_kernel->GetPrimitive()); auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++); auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); all_kernels->push_back(nc2nh_kernel); + insert_primitive_.push_back(nc2nh_kernel->GetPrimitive()); NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors); NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors); UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel); @@ -90,12 +92,11 @@ int NPUConcatTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel * return RET_OK; } -int NPUConcatTransformPass::Run(const InnerContext *context, std::vector *all_kernels, - std::vector *all_tensors) { - if (context->IsNpuEnabled()) { +int NPUConcatTransformPass::Run() { + if (context_->IsNpuEnabled()) { std::vector new_kernels; - for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) { + for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) { auto kernel = *it; if (kernel->desc().arch != kNPU) { new_kernels.push_back(kernel); @@ -109,15 +110,15 @@ int NPUConcatTransformPass::Run(const InnerContext *context, std::vectorout_kernels().size() != sum) { - InsertNode(context, it, &new_kernels, all_tensors); + InsertNode(context_, it, &new_kernels, all_tensors_); } } new_kernels.push_back(kernel); } - all_kernels->clear(); + all_kernels_->clear(); for (int i = 0; i < new_kernels.size(); i++) { - all_kernels->push_back(new_kernels[i]); + all_kernels_->push_back(new_kernels[i]); } } return RET_OK; diff --git a/mindspore/lite/src/runtime/agent/npu/npu_concat_transform_pass.h b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h similarity index 56% rename from mindspore/lite/src/runtime/agent/npu/npu_concat_transform_pass.h rename to mindspore/lite/src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h index 7a15879cd04..50fa3846e5d 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_concat_transform_pass.h +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h @@ -14,16 +14,29 @@ * limitations under the License. */ -#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_ #include #include "src/lite_kernel.h" #include "src/ops/primitive_c.h" +#include "src/runtime/agent/npu/optimizer/npu_base_pass.h" namespace mindspore::lite { -class NPUConcatTransformPass { +class NPUConcatTransformPass : public NPUBasePass { public: - int Run(const InnerContext *context, std::vector *all_kernels, - std::vector *all_tensors); + explicit NPUConcatTransformPass(const InnerContext *context, std::vector *all_kernels, + std::vector *all_tensors) { + context_ = context; + all_kernels_ = all_kernels; + all_tensors_ = all_tensors; + name_ = "NPUConcatTransformPass"; + } + ~NPUConcatTransformPass() override { + for (auto primitive : insert_primitive_) { + delete primitive; + } + insert_primitive_.clear(); + } + int Run() override; private: int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, @@ -37,6 +50,10 @@ class NPUConcatTransformPass { private: int total = 0; + const InnerContext *context_; + std::vector *all_kernels_; + std::vector *all_tensors_; + std::vector insert_primitive_; }; } // namespace mindspore::lite -#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_ +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_ diff --git a/mindspore/lite/src/runtime/agent/npu/npu_fusion_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc similarity index 98% rename from mindspore/lite/src/runtime/agent/npu/npu_fusion_pass.cc rename to mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc index ab408dbcde2..2cea2677320 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_fusion_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "src/runtime/agent/npu/npu_fusion_pass.h" +#include "src/runtime/agent/npu/optimizer/npu_fusion_pass.h" #include #include "src/lite_kernel.h" #include "nnacl/concat_parameter.h" @@ -198,7 +198,7 @@ int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) { return RET_OK; } -int NPUFusionPass::Fusion() { +int NPUFusionPass::Run() { for (auto kernel : *kernels) { switch (kernel->Type()) { case schema::PrimitiveType_Concat: diff --git a/mindspore/lite/src/runtime/agent/npu/npu_fusion_pass.h b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.h similarity index 71% rename from mindspore/lite/src/runtime/agent/npu/npu_fusion_pass.h rename to mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.h index 53d77984c7b..f31ff54a64d 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_fusion_pass.h +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.h @@ -14,17 +14,23 @@ * limitations under the License. */ -#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_FUSION_PASS_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_FUSION_PASS_H_ #include #include "src/lite_kernel.h" #include "src/ops/primitive_c.h" +#include "src/runtime/agent/npu/optimizer/npu_base_pass.h" namespace mindspore::lite { -class NPUFusionPass { +class NPUFusionPass : public NPUBasePass { public: - explicit NPUFusionPass(std::vector *dst_kernels) { kernels = dst_kernels; } - ~NPUFusionPass() = default; - int Fusion(); + explicit NPUFusionPass(std::vector *dst_kernels) { + kernels = dst_kernels; + name_ = "NPUFusionPass"; + } + + ~NPUFusionPass() override = default; + + int Run() override; protected: int ConcatFusion(kernel::LiteKernel *kernel); @@ -37,4 +43,4 @@ class NPUFusionPass { std::vector *kernels; }; } // namespace mindspore::lite -#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_ +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_FUSION_PASS_H_ diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_manager.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_manager.cc new file mode 100644 index 00000000000..ab555c599df --- /dev/null +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_manager.cc @@ -0,0 +1,39 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/agent/npu/optimizer/npu_pass_manager.h" +#include "include/errorcode.h" +#include "src/common/log_adapter.h" +namespace mindspore::lite { + +void NPUPassManager::AddPass(NPUBasePass *pass) { all_pass_.push_back(pass); } +int NPUPassManager::Run() { + for (auto pass : all_pass_) { + auto ret = pass->Run(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "NPU Pass Run failed. Pass name is:" << pass->name(); + return ret; + } + } + return RET_OK; +} +void NPUPassManager::Clear() { + for (auto pass : all_pass_) { + delete pass; + } + all_pass_.clear(); +} +} // namespace mindspore::lite diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_manager.h b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_manager.h new file mode 100644 index 00000000000..2f0a56282d5 --- /dev/null +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_manager.h @@ -0,0 +1,41 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_MANAGER_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_MANAGER_H_ +#include +#include "src/runtime/agent/npu/optimizer/npu_base_pass.h" +namespace mindspore::lite { +class NPUPassManager { + public: + static NPUPassManager *GetInstance() { + static NPUPassManager pass_manager; + return &pass_manager; + } + + ~NPUPassManager() { Clear(); } + + void AddPass(NPUBasePass *pass); + + int Run(); + + void Clear(); + + private: + std::vector all_pass_; +}; +} // namespace mindspore::lite +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_MANAGER_H_ diff --git a/mindspore/lite/src/runtime/agent/npu/npu_pass_utils.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc similarity index 96% rename from mindspore/lite/src/runtime/agent/npu/npu_pass_utils.cc rename to mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc index 4daaff3c32f..06fb85e6c66 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_pass_utils.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc @@ -17,7 +17,7 @@ #include "src/kernel_registry.h" #include "src/ops/nhwc2nchw.h" #include "src/ops/nchw2nhwc.h" -#include "src/runtime/agent/npu/npu_pass_utils.h" +#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" namespace mindspore::lite { using kernel::KERNEL_ARCH::kCPU; using kernel::KERNEL_ARCH::kNPU; @@ -34,7 +34,7 @@ PrimitiveC *NPUPassUtils::CreateNchw2NhwcPrimitive() { } auto primitive_buf = reinterpret_cast(malloc(fbb.GetSize())); if (primitive_buf == nullptr) { - MS_LOG(ERROR) << "Malloc primitive_buf_ failed."; + MS_LOG(ERROR) << "Malloc primitive buffer failed."; fbb.Clear(); return nullptr; } @@ -58,7 +58,7 @@ PrimitiveC *NPUPassUtils::CreateNhwc2NchwPrimitive() { } auto primitive_buf = reinterpret_cast(malloc(fbb.GetSize())); if (primitive_buf == nullptr) { - MS_LOG(ERROR) << "Malloc primitive_buf_ failed."; + MS_LOG(ERROR) << "Malloc primitive buffer failed."; fbb.Clear(); return nullptr; } diff --git a/mindspore/lite/src/runtime/agent/npu/npu_pass_utils.h b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h similarity index 88% rename from mindspore/lite/src/runtime/agent/npu/npu_pass_utils.h rename to mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h index c1ae241abe9..2f843d1de06 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_pass_utils.h +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_ #include #include #include "src/ops/primitive_c.h" @@ -41,4 +41,4 @@ class NPUPassUtils { static PrimitiveC *CreateNhwc2NchwPrimitive(); }; } // namespace mindspore::lite -#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_ +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_ diff --git a/mindspore/lite/src/runtime/agent/npu/npu_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc similarity index 92% rename from mindspore/lite/src/runtime/agent/npu/npu_transform_pass.cc rename to mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc index 232d7d0c6cb..918843c6653 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_transform_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc @@ -13,11 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "src/runtime/agent/npu/npu_transform_pass.h" +#include "src/runtime/agent/npu/optimizer/npu_transform_pass.h" #include #include "src/lite_kernel.h" #include "src/runtime/agent/npu/npu_manager.h" -#include "src/runtime/agent/npu/npu_pass_utils.h" +#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" namespace mindspore::lite { using kernel::KERNEL_ARCH::kCPU; using kernel::KERNEL_ARCH::kNPU; @@ -77,6 +77,7 @@ int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vectorin_tensors()[0]}, pre_trans_out_tensors, context, name); // Insert Nhwc2Nchw into the front of the current queue all_kernels->push_back(pre_trans_kernel); + insert_primitive_.push_back(pre_trans_kernel->GetPrimitive()); // Replace the output kernel of the previous node std::vector pre_trans_in_kernel; if (is_input_kernel) { @@ -99,6 +100,10 @@ int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector *all_kernels, std::vector *all_tensors) { auto kernel = *it; + // Model output does not insert operator + if (kernel->out_kernels().empty()) { + return RET_OK; + } // Single output multiple references for (int i = 0; i < kernel->out_kernels().size(); i++) { auto next_kernel = kernel->out_kernels().at(i); @@ -118,6 +123,7 @@ int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vectorout_tensors(), post_trans_out_tensors); + insert_primitive_.push_back(post_trans_kernel->GetPrimitive()); // Directly insert in the back, will not affect the topological sort all_kernels->push_back(post_trans_kernel); UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, next_kernel); @@ -171,28 +177,27 @@ int NPUTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel return RET_OK; } -int NPUTransformPass::FormatTransformPass(const InnerContext *context, std::vector *all_kernels, - std::vector *all_tensors) { - if (context->IsNpuEnabled()) { +int NPUTransformPass::Run() { + if (context_->IsNpuEnabled()) { std::vector new_kernels; - for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) { + for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) { auto kernel = *it; if (kernel->desc().arch != kNPU) { new_kernels.push_back(kernel); continue; } if (npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) { - InsertPreNode(context, it, &new_kernels, all_tensors); + InsertPreNode(context_, it, &new_kernels, all_tensors_); new_kernels.push_back(kernel); - InsertPostNode(context, it, &new_kernels, all_tensors); + InsertPostNode(context_, it, &new_kernels, all_tensors_); } else { new_kernels.push_back(kernel); } } - all_kernels->clear(); + all_kernels_->clear(); for (int i = 0; i < new_kernels.size(); i++) { - all_kernels->push_back(new_kernels[i]); + all_kernels_->push_back(new_kernels[i]); } } return RET_OK; diff --git a/mindspore/lite/src/runtime/agent/npu/npu_transform_pass.h b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.h similarity index 65% rename from mindspore/lite/src/runtime/agent/npu/npu_transform_pass.h rename to mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.h index 34253d29ac0..09dd5626318 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_transform_pass.h +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.h @@ -14,16 +14,30 @@ * limitations under the License. */ -#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_TRANSFORM_PASS_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_TRANSFORM_PASS_H_ #include #include "src/lite_kernel.h" #include "src/ops/primitive_c.h" +#include "src/runtime/agent/npu/optimizer/npu_base_pass.h" namespace mindspore::lite { -class NPUTransformPass { +class NPUTransformPass : public NPUBasePass { public: - int FormatTransformPass(const InnerContext *context, std::vector *all_kernels, - std::vector *all_tensors); + int Run() override; + + explicit NPUTransformPass(const InnerContext *context, std::vector *all_kernels, + std::vector *all_tensors) { + context_ = context; + all_kernels_ = all_kernels; + all_tensors_ = all_tensors; + name_ = "NPUTransformPass"; + } + ~NPUTransformPass() override { + for (auto primitive : insert_primitive_) { + delete primitive; + } + insert_primitive_.clear(); + } private: int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, @@ -46,6 +60,10 @@ class NPUTransformPass { private: int total = 0; + const InnerContext *context_; + std::vector *all_kernels_; + std::vector *all_tensors_; + std::vector insert_primitive_; }; } // namespace mindspore::lite -#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_ +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_TRANSFORM_PASS_H_ diff --git a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc index ab46cc70cb8..5fb4f92c2ea 100644 --- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc +++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc @@ -70,7 +70,7 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { } int SubGraphNpuKernel::Run() { - return reinterpret_cast(this->executor_)->Run(in_tensors_, out_tensors_, nodes_); + return reinterpret_cast(this->executor_)->Run(in_tensors_, out_tensors_, out_kernels_, nodes_); } int SubGraphNpuKernel::BuildNPUInputOp() { @@ -159,17 +159,17 @@ int SubGraphNpuKernel::BuildNPUOutputOp() { std::string SubGraphNpuKernel::GetOMModelName() { return this->name_ + ".om"; } int SubGraphNpuKernel::Init() { - if (!isCompiled_) { - model_buffer_data_ = BuildIRModel(); - if (model_buffer_data_ == nullptr) { + if (!is_compiled_) { + auto model_buffer_data = BuildIRModel(); + if (model_buffer_data == nullptr) { MS_LOG(ERROR) << "Build IR model failed."; return RET_ERROR; } name_ = "kNpuSubGraph" + std::to_string(mindspore::lite::NPUManager::GetInstance()->index()); - mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data_->data, model_buffer_data_->length, - GetOMModelName(), context_->GetNpuInfo().frequency_); + mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data, GetOMModelName(), + context_->GetNpuInfo().frequency_); executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName()); @@ -177,7 +177,7 @@ int SubGraphNpuKernel::Init() { MS_LOG(ERROR) << "Create NPUExecutor failed."; return RET_ERROR; } - isCompiled_ = true; + is_compiled_ = true; } return RET_OK; } diff --git a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h index a670f4e23af..eed57206918 100644 --- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h +++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h @@ -69,9 +69,7 @@ class SubGraphNpuKernel : public SubGraphKernel { std::string GetOMModelName(); private: - bool isCompiled_ = false; - - domi::ModelBufferData *model_buffer_data_; + bool is_compiled_ = false; std::vector subgraph_input_op_; diff --git a/mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.cc b/mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.cc index 54b54a503c7..4b5051c52f8 100644 --- a/mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.cc @@ -42,21 +42,14 @@ using mindspore::schema::PrimitiveType_Sub; namespace mindspore::kernel { int ArithmeticNPUKernel::IsSupport(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter) { - if (primitive_->Type() == PrimitiveType_Mul || primitive_->Type() == PrimitiveType_Div) { + if (primitive_->Type() == PrimitiveType_Mul || primitive_->Type() == PrimitiveType_Div || + primitive_->Type() == PrimitiveType_Add || primitive_->Type() == PrimitiveType_Sub) { if (inputs[0]->shape() != inputs[1]->shape()) { - MS_LOG(WARNING) << "For the two inputs, the corresponding dimensions must have the same value." + MS_LOG(WARNING) << name_ << " for the two inputs, the corresponding dimensions must have the same value." << " shape 1 is:" << inputs[0]->shape() << " shape 2 is:" << inputs[1]->shape(); return RET_ERROR; } } - if (primitive_->Type() == PrimitiveType_Add || primitive_->Type() == PrimitiveType_Sub) { - if (inputs[0]->shape().size() != inputs[1]->shape().size()) { - MS_LOG(WARNING) - << "For the two inputs, the corresponding dimensions must have the same value, or one of them is 1." - << " shape 1 is:" << inputs[0]->shape() << " shape 2 is:" << inputs[1]->shape(); - return RET_ERROR; - } - } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/npu/cast_npu.cc b/mindspore/lite/src/runtime/kernel/npu/cast_npu.cc index d60d26d33a8..338694ac011 100644 --- a/mindspore/lite/src/runtime/kernel/npu/cast_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/cast_npu.cc @@ -35,8 +35,8 @@ int CastNPUKernel::SetNPUInputs(const std::vector &inputs, const return RET_ERROR; } op_->set_input_x(*npu_inputs[0]); - op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(dst_type_)); - op_->set_attr_src_dtype(lite::ConverterToNPUDataType(src_type_)); + op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(static_cast(cast_parameter_->dst_type_))); + op_->set_attr_src_dtype(lite::ConverterToNPUDataType(static_cast(cast_parameter_->src_type_))); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/npu/cast_npu.h b/mindspore/lite/src/runtime/kernel/npu/cast_npu.h index 4ed5727e99b..9da4e714f8b 100644 --- a/mindspore/lite/src/runtime/kernel/npu/cast_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/cast_npu.h @@ -27,9 +27,7 @@ class CastNPUKernel : public NPUKernel { const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) : NPUKernel(parameter, inputs, outputs, ctx, primitive) { - auto cast_parameter = reinterpret_cast(parameter); - dst_type_ = static_cast(cast_parameter->dst_type_); - src_type_ = static_cast(cast_parameter->src_type_); + cast_parameter_ = reinterpret_cast(parameter); } ~CastNPUKernel() override; @@ -41,8 +39,7 @@ class CastNPUKernel : public NPUKernel { private: hiai::op::CastT *op_ = nullptr; - TypeId dst_type_; - TypeId src_type_; + CastParameter *cast_parameter_; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CAST_NPU_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/convolution_base_npu.h b/mindspore/lite/src/runtime/kernel/npu/convolution_base_npu.h index 88b6bd5aba8..a15163a8ca9 100644 --- a/mindspore/lite/src/runtime/kernel/npu/convolution_base_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/convolution_base_npu.h @@ -18,8 +18,8 @@ #include #include +#include "src/runtime/kernel/npu/npu_kernel.h" #include "include/graph/op/all_ops.h" -#include "src/runtime/kernel/npu/transpose_base_npu.h" #include "nnacl/conv_parameter.h" namespace mindspore::kernel { diff --git a/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc b/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc index 3126b13286f..9e02e6e3d90 100644 --- a/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc @@ -30,7 +30,7 @@ int ConvolutionNPUKernel::IsSupport(const std::vector &inputs, int ConvolutionNPUKernel::SetConvParam() { conv_->set_attr_strides(ge::AttrValue::LIST_INT({conv_param_->stride_h_, conv_param_->stride_w_})); conv_->set_attr_dilations(ge::AttrValue::LIST_INT({conv_param_->dilation_h_, conv_param_->dilation_w_})); - conv_->set_attr_groups(1); + conv_->set_attr_groups(conv_param_->group_); if (conv_param_->pad_mode_ == Pad_Same) { conv_->set_attr_pad_mode(ge::AttrValue::STR{"SAME"}); diff --git a/mindspore/lite/src/runtime/kernel/npu/gather_npu.cc b/mindspore/lite/src/runtime/kernel/npu/gather_npu.cc index 2a638c144d9..66440e08ee9 100644 --- a/mindspore/lite/src/runtime/kernel/npu/gather_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/gather_npu.cc @@ -41,7 +41,7 @@ int GatherNPUKernel::SetNPUInputs(const std::vector &inputs, con op_->set_input_x(*npu_inputs[0]); op_->set_input_indices(*npu_inputs[1]); - op_->set_attr_axis(axis_); + op_->set_attr_axis(gather_parameter_->axis_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/npu/gather_npu.h b/mindspore/lite/src/runtime/kernel/npu/gather_npu.h index af81d6558db..c1c7717a8f4 100644 --- a/mindspore/lite/src/runtime/kernel/npu/gather_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/gather_npu.h @@ -27,8 +27,7 @@ class GatherNPUKernel : public NPUKernel { const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) : NPUKernel(parameter, inputs, outputs, ctx, primitive) { - auto gather_parameter = reinterpret_cast(parameter); - axis_ = gather_parameter->axis_; + gather_parameter_ = reinterpret_cast(parameter); } ~GatherNPUKernel() override; @@ -40,7 +39,7 @@ class GatherNPUKernel : public NPUKernel { private: hiai::op::GatherV2D *op_ = nullptr; - int axis_; + GatherParameter *gather_parameter_; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_GATHER_NPU_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/matmul_npu.cc b/mindspore/lite/src/runtime/kernel/npu/matmul_npu.cc index f2a74205f8b..ba4cf5b76e1 100644 --- a/mindspore/lite/src/runtime/kernel/npu/matmul_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/matmul_npu.cc @@ -33,8 +33,8 @@ int MatMulNPUKernel::SetNPUInputs(const std::vector &inputs, con op_->set_input_x1(*npu_inputs[0]); op_->set_input_x2(*npu_inputs[1]); - op_->set_attr_transpose_x1(a_transpose_); - op_->set_attr_transpose_x2(b_transpose_); + op_->set_attr_transpose_x1(matmul_parameter_->a_transpose_); + op_->set_attr_transpose_x2(matmul_parameter_->b_transpose_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/npu/matmul_npu.h b/mindspore/lite/src/runtime/kernel/npu/matmul_npu.h index befa0014307..02fc31d3a67 100644 --- a/mindspore/lite/src/runtime/kernel/npu/matmul_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/matmul_npu.h @@ -28,9 +28,7 @@ class MatMulNPUKernel : public NPUKernel { const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) : NPUKernel(parameter, inputs, outputs, ctx, primitive) { - auto matmul_parameter = reinterpret_cast(parameter); - a_transpose_ = matmul_parameter->a_transpose_; - b_transpose_ = matmul_parameter->b_transpose_; + matmul_parameter_ = reinterpret_cast(parameter); } ~MatMulNPUKernel() override; @@ -42,8 +40,7 @@ class MatMulNPUKernel : public NPUKernel { private: hiai::op::MatMul *op_ = nullptr; - bool a_transpose_ = false; - bool b_transpose_ = false; + MatMulParameter *matmul_parameter_; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_MATMUL_NPU_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h b/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h index c1b85f4dd54..d87d4539376 100644 --- a/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h +++ b/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h @@ -53,6 +53,11 @@ kernel::LiteKernel *NPUKernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const lite::InnerContext *ctx, const kernel::KernelKey &desc, const mindspore::lite::PrimitiveC *primitive) { + if (!primitive->infer_flag()) { + MS_LOG(ERROR) << "NPU does not support runtime inference shape"; + return nullptr; + } + auto *kernel = new (std::nothrow) T(opParameter, inputs, outputs, ctx, primitive); if (kernel == nullptr) { MS_LOG(ERROR) << "kernel " << opParameter->name_ << "is nullptr."; diff --git a/mindspore/lite/src/runtime/kernel/npu/pad_npu.cc b/mindspore/lite/src/runtime/kernel/npu/pad_npu.cc index b3c853ea6e2..3b63ab2c007 100644 --- a/mindspore/lite/src/runtime/kernel/npu/pad_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/pad_npu.cc @@ -25,7 +25,7 @@ using mindspore::schema::PrimitiveType_Pad; namespace mindspore::kernel { int PadNPUKernel::IsSupport(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter) { - if (padding_mode_ != schema::PaddingMode_CONSTANT) { + if (pad_->GetPaddingMode() != schema::PaddingMode_CONSTANT) { MS_LOG(WARNING) << "NPU only support CONSTANT padding mode"; return RET_ERROR; } @@ -39,16 +39,16 @@ int PadNPUKernel::SetNPUInputs(const std::vector &inputs, const MS_LOG(ERROR) << name_ << " op is nullptr"; return RET_ERROR; } - int size = static_cast(paddings_.size() / 2); + int size = static_cast(pad_->GetPaddings().size() / 2); ge::TensorDesc padding_tensor_desc(ge::Shape({size, 2}), ge::FORMAT_NCHW, ge::DT_INT32); ge::TensorPtr padding_tensor = std::make_shared(padding_tensor_desc); - padding_tensor->SetData(reinterpret_cast(paddings_.data()), size * sizeof(int)); + padding_tensor->SetData(reinterpret_cast(pad_->GetPaddings().data()), size * sizeof(int)); auto paddings = new hiai::op::Const(name_ + "paddings"); paddings->set_attr_value(padding_tensor); ge::TensorDesc constant_values_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT); ge::TensorPtr constant_values_tensor = std::make_shared(constant_values_tensor_desc); - vector constant_values_data_value = {constant_value_}; + vector constant_values_data_value = {pad_->GetConstantValue()}; constant_values_tensor->SetData(reinterpret_cast(constant_values_data_value.data()), 1 * sizeof(float)); auto constant = new hiai::op::Const(name_ + "constant"); constant->set_attr_value(constant_values_tensor); diff --git a/mindspore/lite/src/runtime/kernel/npu/pad_npu.h b/mindspore/lite/src/runtime/kernel/npu/pad_npu.h index c1bda4e5569..fcb71e877cc 100644 --- a/mindspore/lite/src/runtime/kernel/npu/pad_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/pad_npu.h @@ -28,10 +28,7 @@ class PadNPUKernel : public NPUKernel { const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) : NPUKernel(parameter, inputs, outputs, ctx, primitive) { - auto pad = reinterpret_cast(primitive); - constant_value_ = pad->GetConstantValue(); - paddings_ = pad->GetPaddings(); - padding_mode_ = pad->GetPaddingMode(); + pad_ = reinterpret_cast(primitive); } ~PadNPUKernel() override; @@ -43,9 +40,7 @@ class PadNPUKernel : public NPUKernel { private: hiai::op::PadV2 *op_ = nullptr; - std::vector paddings_; - int padding_mode_; - float constant_value_; + const mindspore::lite::Pad *pad_; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_PAD_NPU_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/resize_npu.cc b/mindspore/lite/src/runtime/kernel/npu/resize_npu.cc index b478e31b250..37668750a2a 100644 --- a/mindspore/lite/src/runtime/kernel/npu/resize_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/resize_npu.cc @@ -27,8 +27,9 @@ using mindspore::schema::PrimitiveType_Resize; namespace mindspore::kernel { int ResizeNPUKernel::IsSupport(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter) { - if (method_ != schema::ResizeMethod_LINEAR || method_ == schema::ResizeMethod_NEAREST) { - MS_LOG(WARNING) << "Unsupported resize method type:" << method_; + if (resize_parameter_->method_ != schema::ResizeMethod_LINEAR || + resize_parameter_->method_ == schema::ResizeMethod_NEAREST) { + MS_LOG(WARNING) << "Unsupported resize method type:" << resize_parameter_->method_; return RET_ERROR; } return RET_OK; @@ -38,20 +39,21 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector &inputs, con const std::vector &npu_inputs) { ge::TensorDesc sizeTensorDesc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_INT32); ge::TensorPtr sizeTensor = std::make_shared(sizeTensorDesc); - vector dataValue = {static_cast(new_height_), static_cast(new_width_)}; + vector dataValue = {static_cast(resize_parameter_->new_height_), + static_cast(resize_parameter_->new_width_)}; sizeTensor->SetData(reinterpret_cast(dataValue.data()), 2 * sizeof(int32_t)); auto out_size = new (std::nothrow) hiai::op::Const(name_ + "_size"); out_size->set_attr_value(sizeTensor); - if (method_ == schema::ResizeMethod_LINEAR) { + if (resize_parameter_->method_ == schema::ResizeMethod_LINEAR) { auto op = new (std::nothrow) hiai::op::ResizeBilinearV2(name_); if (op == nullptr) { MS_LOG(ERROR) << " op is nullptr."; return RET_ERROR; } - op->set_attr_align_corners(align_corners_); + op->set_attr_align_corners(resize_parameter_->align_corners_); op->set_input_x(*npu_inputs[0]); op->set_input_size(*out_size); - op->set_attr_half_pixel_centers(preserve_aspect_ratio_); + op->set_attr_half_pixel_centers(resize_parameter_->preserve_aspect_ratio_); op_ = op; } else { auto op = new (std::nothrow) hiai::op::ResizeNearestNeighborV2(name_); @@ -59,7 +61,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector &inputs, con MS_LOG(ERROR) << " op is nullptr."; return RET_ERROR; } - op->set_attr_align_corners(align_corners_); + op->set_attr_align_corners(resize_parameter_->align_corners_); op->set_input_x(*npu_inputs[0]); op->set_input_size(*out_size); op_ = op; diff --git a/mindspore/lite/src/runtime/kernel/npu/resize_npu.h b/mindspore/lite/src/runtime/kernel/npu/resize_npu.h index 726d8ef6551..80bade7352e 100644 --- a/mindspore/lite/src/runtime/kernel/npu/resize_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/resize_npu.h @@ -22,7 +22,6 @@ #include "nnacl/arithmetic_common.h" #include "src/runtime/kernel/npu/npu_kernel.h" #include "include/graph/op/all_ops.h" -#include "src/runtime/kernel/npu/transpose_base_npu.h" namespace mindspore::kernel { class ResizeNPUKernel : public NPUKernel { public: @@ -30,12 +29,7 @@ class ResizeNPUKernel : public NPUKernel { const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) : NPUKernel(parameter, inputs, outputs, ctx, primitive) { - auto resize_parameter = reinterpret_cast(parameter); - method_ = resize_parameter->method_; - new_height_ = resize_parameter->new_height_; - new_width_ = resize_parameter->new_width_; - align_corners_ = resize_parameter->align_corners_; - preserve_aspect_ratio_ = resize_parameter->preserve_aspect_ratio_; + resize_parameter_ = reinterpret_cast(parameter); } ~ResizeNPUKernel() override; @@ -48,11 +42,7 @@ class ResizeNPUKernel : public NPUKernel { private: ge::Operator *op_ = nullptr; - int method_; - int64_t new_height_; - int64_t new_width_; - bool align_corners_; - bool preserve_aspect_ratio_; + ResizeParameter *resize_parameter_; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_RESIZE_NPU_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/scale_npu.cc b/mindspore/lite/src/runtime/kernel/npu/scale_npu.cc index c8dd55fa281..ce11a248752 100644 --- a/mindspore/lite/src/runtime/kernel/npu/scale_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/scale_npu.cc @@ -34,7 +34,7 @@ int ScaleNPUKernel::SetNPUInputs(const std::vector &inputs, cons MS_LOG(ERROR) << name_ << " op is nullptr"; return RET_ERROR; } - op_->set_attr_axis(this->axis_); + op_->set_attr_axis(scale_parameter_->axis_); op_->set_input_x(*npu_inputs[0]); op_->set_input_scale(*npu_inputs[1]); op_->set_input_bias(*npu_inputs[2]); diff --git a/mindspore/lite/src/runtime/kernel/npu/scale_npu.h b/mindspore/lite/src/runtime/kernel/npu/scale_npu.h index 156fd0aa749..a09e10651c5 100644 --- a/mindspore/lite/src/runtime/kernel/npu/scale_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/scale_npu.h @@ -27,7 +27,7 @@ class ScaleNPUKernel : public NPUKernel { const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) : NPUKernel(parameter, inputs, outputs, ctx, primitive) { - axis_ = reinterpret_cast(parameter)->axis_; + scale_parameter_ = reinterpret_cast(parameter); } ~ScaleNPUKernel() override; @@ -39,7 +39,7 @@ class ScaleNPUKernel : public NPUKernel { private: hiai::op::Scale *op_ = nullptr; - int axis_; + ScaleParameter *scale_parameter_; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_Scale_NPU_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/softmax_npu.cc b/mindspore/lite/src/runtime/kernel/npu/softmax_npu.cc index 874b71e0f99..a502a861098 100644 --- a/mindspore/lite/src/runtime/kernel/npu/softmax_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/softmax_npu.cc @@ -35,10 +35,10 @@ int SoftmaxNPUKernel::SetNPUInputs(const std::vector &inputs, MS_LOG(ERROR) << name_ << " op is nullptr"; return RET_ERROR; } - if (axis_ == -1) { + if (softmax_parameter_->axis_ == -1) { op_->set_attr_axis(inputs[0]->shape().size() - 1); } else { - op_->set_attr_axis(axis_); + op_->set_attr_axis(softmax_parameter_->axis_); } op_->set_input_x(*npu_inputs[0]); return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/npu/softmax_npu.h b/mindspore/lite/src/runtime/kernel/npu/softmax_npu.h index 1a4718878ea..f4d069e7cb8 100644 --- a/mindspore/lite/src/runtime/kernel/npu/softmax_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/softmax_npu.h @@ -27,8 +27,7 @@ class SoftmaxNPUKernel : public NPUKernel { const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) : NPUKernel(parameter, inputs, outputs, ctx, primitive) { - auto softmax_parameter = reinterpret_cast(parameter); - axis_ = softmax_parameter->axis_; + softmax_parameter_ = reinterpret_cast(parameter); } ~SoftmaxNPUKernel() override; @@ -40,7 +39,7 @@ class SoftmaxNPUKernel : public NPUKernel { private: hiai::op::Softmax *op_ = nullptr; - int axis_; + SoftmaxParameter *softmax_parameter_; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SOFTMAX_NPU_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/split_npu.cc b/mindspore/lite/src/runtime/kernel/npu/split_npu.cc index d49a4f9d834..b63f3d5d119 100644 --- a/mindspore/lite/src/runtime/kernel/npu/split_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/split_npu.cc @@ -35,25 +35,25 @@ int SplitNPUKernel::SetNPUInputs(const std::vector &inputs, cons MS_LOG(ERROR) << name_ << " op is nullptr"; return RET_ERROR; } - int size = size_splits_.size(); + int size = split_->size_splits().size(); ge::TensorDesc size_splits_tensor_desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32); ge::TensorPtr size_splits_tensor = std::make_shared(size_splits_tensor_desc); - size_splits_tensor->SetData(reinterpret_cast(size_splits_.data()), size * sizeof(int)); + size_splits_tensor->SetData(reinterpret_cast(split_->size_splits().data()), size * sizeof(int)); auto size_splits = new hiai::op::Const(name_ + "_size"); size_splits->set_attr_value(size_splits_tensor); ge::TensorDesc split_dim_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_INT32); ge::TensorPtr split_dim_tensor = std::make_shared(split_dim_tensor_desc); - vector split_dim_data_value = {split_dim_}; + vector split_dim_data_value = {split_->GetSplitDim()}; split_dim_tensor->SetData(reinterpret_cast(split_dim_data_value.data()), 1 * sizeof(int)); auto split_dim = new hiai::op::Const(name_ + "_dim"); split_dim->set_attr_value(split_dim_tensor); op_->set_input_x(*npu_inputs[0]); - op_->set_attr_num_split(num_split_); + op_->set_attr_num_split(split_->GetNumberSplit()); op_->set_input_split_dim(*split_dim); op_->set_input_size_splits(*size_splits); - op_->create_dynamic_output_y(num_split_); + op_->create_dynamic_output_y(split_->GetNumberSplit()); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/npu/split_npu.h b/mindspore/lite/src/runtime/kernel/npu/split_npu.h index 2f3eb146568..61aa18be613 100644 --- a/mindspore/lite/src/runtime/kernel/npu/split_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/split_npu.h @@ -27,10 +27,7 @@ class SplitNPUKernel : public NPUKernel { const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) : NPUKernel(parameter, inputs, outputs, ctx, primitive) { - auto split = reinterpret_cast(primitive); - num_split_ = split->GetNumberSplit(); - size_splits_ = split->GetSizeSplit(); - split_dim_ = split->GetSplitDim(); + split_ = reinterpret_cast(primitive); } ~SplitNPUKernel() override; @@ -42,9 +39,7 @@ class SplitNPUKernel : public NPUKernel { private: hiai::op::SplitV *op_ = nullptr; - int num_split_; - std::vector size_splits_; - int split_dim_; + const mindspore::lite::Split *split_; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SPLIT_NPU_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/strided_slice_npu.cc b/mindspore/lite/src/runtime/kernel/npu/strided_slice_npu.cc index d645c5b4cc1..747d33efea1 100644 --- a/mindspore/lite/src/runtime/kernel/npu/strided_slice_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/strided_slice_npu.cc @@ -59,11 +59,11 @@ int StridedSliceNPUKernel::SetNPUInputs(const std::vector &input } else { op_->set_input_strides(*npu_inputs[3]); } - op_->set_attr_begin_mask(begin_mask_); - op_->set_attr_ellipsis_mask(ellipsis_mask_); - op_->set_attr_end_mask(end_mask_); - op_->set_attr_shrink_axis_mask(shrink_axis_mask_); - op_->set_attr_new_axis_mask(new_axis_mask_); + op_->set_attr_begin_mask(strided_slice_->GetBeginMask()); + op_->set_attr_ellipsis_mask(strided_slice_->GetEllipsisMask()); + op_->set_attr_end_mask(strided_slice_->GetEndMask()); + op_->set_attr_shrink_axis_mask(strided_slice_->GetShrinkAxisMask()); + op_->set_attr_new_axis_mask(strided_slice_->GetNewAxisMask()); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/npu/strided_slice_npu.h b/mindspore/lite/src/runtime/kernel/npu/strided_slice_npu.h index d6d4c0f4638..09de545a3e8 100644 --- a/mindspore/lite/src/runtime/kernel/npu/strided_slice_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/strided_slice_npu.h @@ -28,12 +28,7 @@ class StridedSliceNPUKernel : public NPUKernel { const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) : NPUKernel(parameter, inputs, outputs, ctx, primitive) { - auto strided_slice = reinterpret_cast(primitive); - begin_mask_ = strided_slice->GetBeginMask(); - end_mask_ = strided_slice->GetEndMask(); - ellipsis_mask_ = strided_slice->GetEllipsisMask(); - new_axis_mask_ = strided_slice->GetNewAxisMask(); - shrink_axis_mask_ = strided_slice->GetShrinkAxisMask(); + strided_slice_ = reinterpret_cast(primitive); } ~StridedSliceNPUKernel() override; @@ -45,11 +40,7 @@ class StridedSliceNPUKernel : public NPUKernel { private: hiai::op::StridedSlice *op_ = nullptr; - int begin_mask_; - int end_mask_; - int ellipsis_mask_; - int new_axis_mask_; - int shrink_axis_mask_; + const mindspore::lite::StridedSlice *strided_slice_; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_STRIDEDSLICE_NPU_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/transpose_base_npu.cc b/mindspore/lite/src/runtime/kernel/npu/transpose_base_npu.cc deleted file mode 100644 index 5d04d2ca305..00000000000 --- a/mindspore/lite/src/runtime/kernel/npu/transpose_base_npu.cc +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "src/runtime/kernel/npu/transpose_base_npu.h" - -namespace mindspore::kernel { -TransposeBaseNPUKernel::~TransposeBaseNPUKernel() { - if (pre_trans_ != nullptr) { - delete pre_trans_; - pre_trans_ = nullptr; - } - if (post_trans_ != nullptr) { - delete post_trans_; - post_trans_ = nullptr; - } -} - -int TransposeBaseNPUKernel::SetPreTranspose(const ge::Operator *input) { - // input permute: NHWC -> NCHW - pre_trans_ = new (std::nothrow) hiai::op::Permute(name_ + "_pre_transpose"); - if (pre_trans_ == nullptr) { - MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed."; - return RET_ERROR; - } - pre_trans_->set_input_x(*input); - pre_trans_->set_attr_order(ge::AttrValue::LIST_INT({0, 3, 1, 2})); - return RET_OK; -} - -int TransposeBaseNPUKernel::SetPostTranspose(const ge::Operator *input) { - // permute: NCHW -> NHWC - post_trans_ = new (std::nothrow) hiai::op::Permute(name_ + "_post_transpose"); - if (post_trans_ == nullptr) { - MS_LOG(ERROR) << "New post transpose operator (NCHW -> NHWC) for op " << name_ << " failed."; - return RET_ERROR; - } - post_trans_->set_input_x(*input); - post_trans_->set_attr_order(ge::AttrValue::LIST_INT({0, 2, 3, 1})); - return RET_OK; -} -} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/npu/transpose_base_npu.h b/mindspore/lite/src/runtime/kernel/npu/transpose_base_npu.h deleted file mode 100644 index 3b7c6cdd40a..00000000000 --- a/mindspore/lite/src/runtime/kernel/npu/transpose_base_npu.h +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TRANSPOSE_BASE_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TRANSPOSE_BASE_H_ - -#include -#include "include/graph/op/all_ops.h" -#include "include/graph/compatible/all_ops.h" -#include "src/runtime/kernel/npu/npu_kernel.h" -#include "nnacl/op_base.h" - -namespace mindspore::kernel { -class TransposeBaseNPUKernel : public NPUKernel { - public: - TransposeBaseNPUKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs, const lite::InnerContext *ctx, - const mindspore::lite::PrimitiveC *primitive) - : NPUKernel(parameter, inputs, outputs, ctx, primitive) {} - ~TransposeBaseNPUKernel() override; - - protected: - int SetPreTranspose(const ge::Operator *input); - int SetPostTranspose(const ge::Operator *input); - hiai::op::Permute *pre_trans_ = nullptr; - hiai::op::Permute *post_trans_ = nullptr; -}; -} // namespace mindspore::kernel -#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TRANSPOSE_BASE_H_ diff --git a/mindspore/lite/src/runtime/kernel/npu/transpose_npu.cc b/mindspore/lite/src/runtime/kernel/npu/transpose_npu.cc index 7aa543291ae..259f1147cc4 100644 --- a/mindspore/lite/src/runtime/kernel/npu/transpose_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/transpose_npu.cc @@ -30,7 +30,7 @@ int TransposeNPUKernel::IsSupport(const std::vector &inputs, con MS_LOG(ERROR) << "Unsupported conjugate transpose."; return RET_ERROR; } - return RET_OK; + return RET_ERROR; } int TransposeNPUKernel::SetNPUInputs(const std::vector &inputs, @@ -57,6 +57,4 @@ TransposeNPUKernel::~TransposeNPUKernel() { } REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Transpose, NPUKernelCreator) -// REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Nhwc2Nchw, NPUKernelCreator) -// REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Nchw2Nhwc, NPUKernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index c46d6c89f28..cd108f22217 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -34,10 +34,11 @@ #if SUPPORT_NPU #include "src/runtime/agent/npu/subgraph_npu_kernel.h" #include "src/runtime/agent/npu/npu_manager.h" -#include "src/runtime/agent/npu/npu_transform_pass.h" -#include "src/runtime/agent/npu/npu_fusion_pass.h" -#include "src/runtime/agent/npu/npu_add_transform_pass.h" -#include "src/runtime/agent/npu/npu_concat_transform_pass.h" +#include "src/runtime/agent/npu/optimizer/npu_pass_manager.h" +#include "src/runtime/agent/npu/optimizer/npu_transform_pass.h" +#include "src/runtime/agent/npu/optimizer/npu_fusion_pass.h" +#include "src/runtime/agent/npu/optimizer/npu_add_transform_pass.h" +#include "src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h" #endif namespace mindspore::lite { using kernel::KERNEL_ARCH::kCPU; @@ -89,12 +90,12 @@ void Scheduler::FindNodeInoutTensors(const lite::Model::Node &node, std::vector< auto in_size = node.input_indices_.size(); inputs->reserve(in_size); for (size_t j = 0; j < in_size; ++j) { - inputs->emplace_back(src_tensors_.at(node.input_indices_[j])); + inputs->emplace_back(src_tensors_->at(node.input_indices_[j])); } auto out_size = node.output_indices_.size(); outputs->reserve(out_size); for (size_t j = 0; j < out_size; ++j) { - outputs->emplace_back(src_tensors_.at(node.output_indices_[j])); + outputs->emplace_back(src_tensors_->at(node.output_indices_[j])); } } @@ -303,11 +304,11 @@ int Scheduler::ScheduleSubGraphToKernels(size_t subgraph_index, std::vectorinput_indices_.begin(), subgraph->input_indices_.end(), std::back_inserter(*in_tensors), - [&](const uint32_t index) { return this->src_tensors_.at(index); }); + [&](const uint32_t index) { return this->src_tensors_->at(index); }); } if (out_tensors != nullptr) { std::transform(subgraph->output_indices_.begin(), subgraph->output_indices_.end(), std::back_inserter(*out_tensors), - [&](const uint32_t index) { return this->src_tensors_.at(index); }); + [&](const uint32_t index) { return this->src_tensors_->at(index); }); } return RET_OK; } @@ -567,37 +568,16 @@ void Scheduler::FindAllInoutKernels(const std::vector &ker int Scheduler::RunPass(std::vector *dst_kernels) { int ret = RET_OK; #if SUPPORT_NPU - auto transform_pass = new NPUTransformPass; - ret = transform_pass->FormatTransformPass(context_, dst_kernels, &src_tensors_); - delete transform_pass; - if (ret != RET_OK) { - MS_LOG(ERROR) << "Run npu format transform pass failed."; - return ret; - } - - auto add_format_pass = new NPUAddTransformPass; - ret = add_format_pass->Run(context_, dst_kernels, &src_tensors_); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Run npu add op insert transform pass failed."; - return ret; - } - delete add_format_pass; - - auto concat_format_pass = new NPUConcatTransformPass; - ret = concat_format_pass->Run(context_, dst_kernels, &src_tensors_); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Run npu concat op insert transform pass failed."; - return ret; - } - delete concat_format_pass; - + auto transform_pass = new NPUTransformPass(context_, dst_kernels, src_tensors_); + mindspore::lite::NPUPassManager::GetInstance()->AddPass(transform_pass); + auto add_format_pass = new NPUAddTransformPass(context_, dst_kernels, src_tensors_); + mindspore::lite::NPUPassManager::GetInstance()->AddPass(add_format_pass); + auto concat_format_pass = new NPUConcatTransformPass(context_, dst_kernels, src_tensors_); + mindspore::lite::NPUPassManager::GetInstance()->AddPass(concat_format_pass); auto fusion_pass = new NPUFusionPass(dst_kernels); - ret = fusion_pass->Fusion(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Run npu fussion transform pass failed."; - return ret; - } - delete fusion_pass; + mindspore::lite::NPUPassManager::GetInstance()->AddPass(fusion_pass); + + ret = mindspore::lite::NPUPassManager::GetInstance()->Run(); #endif return ret; } diff --git a/mindspore/lite/src/scheduler.h b/mindspore/lite/src/scheduler.h index eff071ddd6e..bd5c9fac177 100644 --- a/mindspore/lite/src/scheduler.h +++ b/mindspore/lite/src/scheduler.h @@ -28,8 +28,8 @@ namespace mindspore::lite { class Scheduler { public: - Scheduler(const InnerContext *ctx, Model *src_model, std::vector src_tensors) - : context_(ctx), src_model_(src_model), src_tensors_(std::move(src_tensors)) {} + Scheduler(const InnerContext *ctx, Model *src_model, std::vector *src_tensors) + : context_(ctx), src_model_(src_model), src_tensors_(src_tensors) {} ~Scheduler() = default; int Schedule(std::vector *dst_kernels); @@ -85,7 +85,7 @@ class Scheduler { protected: const InnerContext *context_ = nullptr; Model *src_model_ = nullptr; - std::vector src_tensors_; + std::vector *src_tensors_; std::vector graph_output_node_indexes_; }; } // namespace mindspore::lite