forked from mindspore-Ecosystem/mindspore
!10269 npu solve memory leak
From: @yeyunpeng2020 Reviewed-by: Signed-off-by:
This commit is contained in:
commit
8c7b616992
|
@ -30,6 +30,7 @@
|
|||
#include "src/runtime/kernel/arm/base/dequant.h"
|
||||
#if SUPPORT_NPU
|
||||
#include "src/runtime/agent/npu/npu_manager.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_pass_manager.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -366,7 +367,7 @@ int LiteSession::CompileGraph(Model *model) {
|
|||
return ret;
|
||||
}
|
||||
// scheduler kernels
|
||||
Scheduler scheduler(context_, model, tensors_);
|
||||
Scheduler scheduler(context_, model, &tensors_);
|
||||
ret = scheduler.Schedule(&kernels_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Schedule kernels failed: " << ret;
|
||||
|
@ -537,6 +538,10 @@ LiteSession::~LiteSession() {
|
|||
delete this->context_;
|
||||
delete this->executor_;
|
||||
this->executor_ = nullptr;
|
||||
#if SUPPORT_NPU
|
||||
mindspore::lite::NPUPassManager::GetInstance()->Clear();
|
||||
mindspore::lite::NPUManager::GetInstance()->Reset();
|
||||
#endif
|
||||
is_running_.store(false);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
include_directories(${DDK_PATH})
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kernel)
|
||||
file(GLOB_RECURSE NPU_RUNTIME_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/*.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../kernel/npu/*.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/optimizer/*.cc
|
||||
)
|
||||
add_library(hiai SHARED IMPORTED)
|
||||
set_target_properties(hiai PROPERTIES IMPORTED_LOCATION
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "src/runtime/agent/npu/npu_executor.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "src/runtime/agent/npu/npu_manager.h"
|
||||
#include "nnacl/pack.h"
|
||||
namespace mindspore::lite {
|
||||
int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
|
||||
this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient(model_name_);
|
||||
|
@ -32,6 +33,7 @@ int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
|
|||
}
|
||||
|
||||
int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const std::vector<kernel::LiteKernel *> &out_kernels,
|
||||
const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator,
|
||||
const KernelCallBack &before, const KernelCallBack &after) {
|
||||
hiai::AiContext context;
|
||||
|
@ -63,15 +65,33 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
// For the output kernel of the entire model, and the format is nchw, the output tensor needs to be nchw TO nhwc.
|
||||
std::vector<Tensor *> trans_tensors;
|
||||
for (auto kernel : out_kernels) {
|
||||
if (kernel->out_kernels().empty() && npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) {
|
||||
for (int i = 0; i < kernel->out_tensors().size(); ++i) {
|
||||
trans_tensors.push_back(kernel->out_tensors()[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < npu_output_tensors_.size(); ++i) {
|
||||
void *data = out_tensors[i]->MutableData();
|
||||
if (data == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc buffer failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (std::find(trans_tensors.begin(), trans_tensors.end(), out_tensors[i]) != trans_tensors.end()) {
|
||||
// Change data&tensor shape nc->nh
|
||||
PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data, out_tensors[i]->Batch(),
|
||||
out_tensors[i]->Width() * out_tensors[i]->Height(), out_tensors[i]->Channel());
|
||||
out_tensors[i]->set_shape({out_tensors[i]->shape()[0], out_tensors[i]->shape()[2], out_tensors[i]->shape()[3],
|
||||
out_tensors[i]->shape()[1]});
|
||||
} else {
|
||||
memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
|
||||
out_tensors[i]->ResetRefCount();
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -32,8 +32,9 @@ class NPUExecutor : public Executor {
|
|||
int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override;
|
||||
|
||||
int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator = nullptr,
|
||||
const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr);
|
||||
const std::vector<kernel::LiteKernel *> &out_kernels, const std::vector<kernel::LiteKernel *> &kernels,
|
||||
Allocator *allocator = nullptr, const KernelCallBack &before = nullptr,
|
||||
const KernelCallBack &after = nullptr);
|
||||
|
||||
private:
|
||||
int GetIOTensorVec();
|
||||
|
|
|
@ -55,6 +55,26 @@ bool NPUManager::CheckEMUIVersion() {
|
|||
return true;
|
||||
}
|
||||
|
||||
void NPUManager::Reset() {
|
||||
index_ = 0;
|
||||
domi::HiaiIrBuild ir_build;
|
||||
for (const auto &model_map : models_) {
|
||||
auto model = model_map.second;
|
||||
if (!model->is_freed) {
|
||||
ir_build.ReleaseModelBuff(*model->model_buffer_data_);
|
||||
model->model_buffer_data_ = nullptr;
|
||||
model->is_freed = true;
|
||||
model->desc_.reset();
|
||||
model->desc_ = nullptr;
|
||||
}
|
||||
}
|
||||
models_.clear();
|
||||
for (auto client : clients_) {
|
||||
client.reset();
|
||||
}
|
||||
clients_.clear();
|
||||
}
|
||||
|
||||
bool NPUManager::CheckDDKVersion() {
|
||||
auto client = std::make_shared<hiai::AiModelMngerClient>();
|
||||
if (client->GetVersion() != nullptr) {
|
||||
|
@ -104,54 +124,102 @@ bool NPUManager::IsKirinChip() {
|
|||
return false;
|
||||
}
|
||||
|
||||
int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency) {
|
||||
hiai::MemBuffer *buffer = mc_builder_->InputMemBufferCreate(model_buf, size);
|
||||
if (buffer == nullptr) {
|
||||
MS_LOG(ERROR) << "MemBuffer is null.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
int NPUManager::AddModel(domi::ModelBufferData *model_buffer_data, const std::string &model_name, int frequency) {
|
||||
auto model = new SubGraphModel(index_, model_name, model_buffer_data, frequency);
|
||||
auto desc = std::make_shared<hiai::AiModelDescription>(model_name, frequency, 0, 0, 0);
|
||||
desc->SetModelBuffer(buffer->GetMemBufferData(), buffer->GetMemBufferSize());
|
||||
model_desc_.push_back(desc);
|
||||
mc_builder_->MemBufferDestroy(buffer);
|
||||
|
||||
model_map_.insert({model_name, index_});
|
||||
model->desc_ = desc;
|
||||
models_.insert({model_name, model});
|
||||
index_++;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUManager::LoadOMModel() {
|
||||
for (int i = 0; i < index_ / MAX_MODEL_NUM + 1; i++) {
|
||||
std::shared_ptr<hiai::AiModelMngerClient> NPUManager::CreateAiModelMngerClient() {
|
||||
auto client = std::make_shared<hiai::AiModelMngerClient>();
|
||||
if (client == nullptr) {
|
||||
MS_LOG(ERROR) << "NPU client is nullptr.";
|
||||
return RET_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
int ret = client->Init(nullptr);
|
||||
if (ret != hiai::AI_SUCCESS) {
|
||||
MS_LOG(ERROR) << "NPU client init failed. code is " << ret;
|
||||
return nullptr;
|
||||
}
|
||||
return client;
|
||||
}
|
||||
int NPUManager::LoadOMModel() {
|
||||
std::vector<std::shared_ptr<hiai::AiModelDescription>> models_desc;
|
||||
std::shared_ptr<hiai::AiModelMngerClient> client = nullptr;
|
||||
std::shared_ptr<hiai::AiModelBuilder> mc_builder = nullptr;
|
||||
int total = 0;
|
||||
for (const auto &model_map : models_) {
|
||||
if (total % MAX_MODEL_NUM == 0) {
|
||||
client = CreateAiModelMngerClient();
|
||||
if (client == nullptr) {
|
||||
MS_LOG(ERROR) << "Create Client failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
mc_builder_ = std::make_shared<hiai::AiModelBuilder>(client);
|
||||
mc_builder = std::make_shared<hiai::AiModelBuilder>(client);
|
||||
if (mc_builder == nullptr) {
|
||||
MS_LOG(ERROR) << "Create AiModelBuilder failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
total++;
|
||||
auto model = model_map.second;
|
||||
if (model->is_loaded && model->is_freed) {
|
||||
continue;
|
||||
}
|
||||
models_desc.push_back(model->desc_);
|
||||
|
||||
vector<std::shared_ptr<hiai::AiModelDescription>> desc(model_desc_.begin() + i * MAX_MODEL_NUM,
|
||||
((i + 1) * MAX_MODEL_NUM > index_)
|
||||
? model_desc_.begin() + index_
|
||||
: model_desc_.begin() + (i + 1) * MAX_MODEL_NUM);
|
||||
ret = client->Load(desc);
|
||||
if (ret != hiai::AI_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Client load model failed." << ret;
|
||||
auto buffer = mc_builder->InputMemBufferCreate(model->model_buffer_data_->data, model->model_buffer_data_->length);
|
||||
if (buffer == nullptr) {
|
||||
MS_LOG(ERROR) << "NPU input memory buffer create failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
clients_.push_back(client);
|
||||
model->desc_->SetModelBuffer(model->model_buffer_data_->data, model->model_buffer_data_->length);
|
||||
if (models_desc.size() == MAX_MODEL_NUM) {
|
||||
auto ret = LoadModel(client, models_desc);
|
||||
if (ret != RET_ERROR) {
|
||||
MS_LOG(ERROR) << "Client load model failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
models_desc.clear();
|
||||
}
|
||||
}
|
||||
|
||||
if (!models_desc.empty()) {
|
||||
auto ret = LoadModel(client, models_desc);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Client load model failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
models_desc.clear();
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient(const std::string &model_name) {
|
||||
return clients_[model_map_[model_name] / MAX_MODEL_NUM];
|
||||
return models_[model_name]->client_;
|
||||
}
|
||||
|
||||
int NPUManager::index() const { return index_; }
|
||||
|
||||
int NPUManager::LoadModel(const std::shared_ptr<hiai::AiModelMngerClient> &client,
|
||||
std::vector<std::shared_ptr<hiai::AiModelDescription>> desc_list) {
|
||||
auto ret = client->Load(desc_list);
|
||||
if (ret != hiai::AI_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Client load model failed." << ret;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
for (const auto &desc : desc_list) {
|
||||
MS_LOG(ERROR) << desc->GetName();
|
||||
auto it = models_.find(desc->GetName());
|
||||
it->second->is_loaded = true;
|
||||
it->second->client_ = client;
|
||||
}
|
||||
|
||||
this->clients_.push_back(client);
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -18,9 +18,11 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_MANAGER_H_
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <set>
|
||||
#include "include/hiai_ir_build.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "include/HiAiModelManagerService.h"
|
||||
|
||||
|
@ -29,17 +31,34 @@ static std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = {
|
|||
schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D,
|
||||
schema::PrimitiveType_DepthwiseConv2D, schema::PrimitiveType_DeDepthwiseConv2D,
|
||||
schema::PrimitiveType_Resize, schema::PrimitiveType_Pooling};
|
||||
struct SubGraphModel {
|
||||
public:
|
||||
SubGraphModel(int index, std::string model_name, domi::ModelBufferData *model_buffer_data, int frequency)
|
||||
: index_(index), model_name_(std::move(model_name)), model_buffer_data_(model_buffer_data) {
|
||||
std::cout << model_name;
|
||||
}
|
||||
|
||||
bool is_freed = false;
|
||||
bool is_loaded = false;
|
||||
int index_;
|
||||
std::string model_name_;
|
||||
domi::ModelBufferData *model_buffer_data_;
|
||||
std::shared_ptr<hiai::AiModelMngerClient> client_;
|
||||
std::shared_ptr<hiai::AiModelDescription> desc_;
|
||||
};
|
||||
class NPUManager {
|
||||
public:
|
||||
static NPUManager *GetInstance() {
|
||||
static NPUManager npuManager;
|
||||
return &npuManager;
|
||||
static NPUManager manager;
|
||||
return &manager;
|
||||
}
|
||||
|
||||
~NPUManager() { Reset(); }
|
||||
|
||||
bool IsSupportNPU();
|
||||
|
||||
// provide to subgraph to add model.
|
||||
int AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency);
|
||||
int AddModel(domi::ModelBufferData *model_buffer_data, const std::string &model_name, int frequency);
|
||||
|
||||
// scheduler to load om model.
|
||||
int LoadOMModel();
|
||||
|
@ -49,6 +68,11 @@ class NPUManager {
|
|||
|
||||
int index() const;
|
||||
|
||||
void Reset();
|
||||
|
||||
int LoadModel(const std::shared_ptr<hiai::AiModelMngerClient> &client,
|
||||
std::vector<std::shared_ptr<hiai::AiModelDescription>> desc_list);
|
||||
|
||||
private:
|
||||
bool IsKirinChip();
|
||||
|
||||
|
@ -58,16 +82,12 @@ class NPUManager {
|
|||
|
||||
int CompareVersion(const std::string &version1, const std::string &version2);
|
||||
|
||||
std::shared_ptr<hiai::AiModelMngerClient> CreateAiModelMngerClient();
|
||||
|
||||
private:
|
||||
int index_ = 0;
|
||||
|
||||
std::unordered_map<std::string, SubGraphModel *> models_;
|
||||
std::vector<std::shared_ptr<hiai::AiModelMngerClient>> clients_;
|
||||
|
||||
std::vector<std::shared_ptr<hiai::AiModelDescription>> model_desc_;
|
||||
|
||||
std::shared_ptr<hiai::AiModelBuilder> mc_builder_ = nullptr;
|
||||
|
||||
std::unordered_map<std::string, int> model_map_;
|
||||
};
|
||||
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -13,8 +13,8 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/agent/npu/npu_add_transform_pass.h"
|
||||
#include "src/runtime/agent/npu/npu_pass_utils.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_add_transform_pass.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h"
|
||||
namespace mindspore::lite {
|
||||
using kernel::KERNEL_ARCH::kNPU;
|
||||
int NPUAddTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
|
@ -56,9 +56,11 @@ int NPUAddTransformPass::InsertNode(const InnerContext *context, std::vector<ker
|
|||
auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++);
|
||||
auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name);
|
||||
all_kernels->push_back(nh2nc_kernel);
|
||||
insert_primitive_.push_back(nh2nc_kernel->GetPrimitive());
|
||||
auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++);
|
||||
auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name);
|
||||
all_kernels->push_back(nc2nh_kernel);
|
||||
insert_primitive_.push_back(nc2nh_kernel->GetPrimitive());
|
||||
NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors);
|
||||
NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors);
|
||||
UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel);
|
||||
|
@ -91,12 +93,11 @@ int NPUAddTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *ker
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUAddTransformPass::Run(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors) {
|
||||
if (context->IsNpuEnabled()) {
|
||||
int NPUAddTransformPass::Run() {
|
||||
if (context_->IsNpuEnabled()) {
|
||||
std::vector<kernel::LiteKernel *> new_kernels;
|
||||
|
||||
for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) {
|
||||
for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) {
|
||||
auto kernel = *it;
|
||||
new_kernels.push_back(kernel);
|
||||
if (kernel->desc().arch != kNPU) {
|
||||
|
@ -110,14 +111,14 @@ int NPUAddTransformPass::Run(const InnerContext *context, std::vector<kernel::Li
|
|||
}
|
||||
}
|
||||
if (kernel->out_kernels().size() != sum) {
|
||||
InsertNode(context, it, &new_kernels, all_tensors);
|
||||
InsertNode(context_, it, &new_kernels, all_tensors_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
all_kernels->clear();
|
||||
all_kernels_->clear();
|
||||
for (int i = 0; i < new_kernels.size(); i++) {
|
||||
all_kernels->push_back(new_kernels[i]);
|
||||
all_kernels_->push_back(new_kernels[i]);
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
|
@ -14,16 +14,29 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/ops/primitive_c.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_base_pass.h"
|
||||
namespace mindspore::lite {
|
||||
class NPUAddTransformPass {
|
||||
class NPUAddTransformPass : public NPUBasePass {
|
||||
public:
|
||||
int Run(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors);
|
||||
explicit NPUAddTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors) {
|
||||
context_ = context;
|
||||
all_kernels_ = all_kernels;
|
||||
all_tensors_ = all_tensors;
|
||||
name_ = "NPUConcatTransformPass";
|
||||
}
|
||||
~NPUAddTransformPass() override {
|
||||
for (auto primitive : insert_primitive_) {
|
||||
delete primitive;
|
||||
}
|
||||
insert_primitive_.clear();
|
||||
}
|
||||
int Run() override;
|
||||
|
||||
private:
|
||||
int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
|
@ -37,6 +50,10 @@ class NPUAddTransformPass {
|
|||
|
||||
private:
|
||||
int total = 0;
|
||||
const InnerContext *context_;
|
||||
std::vector<kernel::LiteKernel *> *all_kernels_;
|
||||
std::vector<const PrimitiveC *> insert_primitive_;
|
||||
std::vector<Tensor *> *all_tensors_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_ADD_TRANSFORM_PASS_H_
|
|
@ -0,0 +1,34 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_BASE_PASS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_BASE_PASS_H_
|
||||
#include <string>
|
||||
namespace mindspore::lite {
|
||||
class NPUBasePass {
|
||||
public:
|
||||
virtual int Run() = 0;
|
||||
|
||||
virtual ~NPUBasePass() = default;
|
||||
|
||||
std::string name() { return name_; }
|
||||
|
||||
protected:
|
||||
std::string name_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_BASE_PASS_H_
|
|
@ -13,8 +13,8 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/agent/npu/npu_concat_transform_pass.h"
|
||||
#include "src/runtime/agent/npu/npu_pass_utils.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h"
|
||||
namespace mindspore::lite {
|
||||
using kernel::KERNEL_ARCH::kNPU;
|
||||
int NPUConcatTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
|
@ -54,9 +54,11 @@ int NPUConcatTransformPass::InsertNode(const InnerContext *context, std::vector<
|
|||
auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++);
|
||||
auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name);
|
||||
all_kernels->push_back(nh2nc_kernel);
|
||||
insert_primitive_.push_back(nh2nc_kernel->GetPrimitive());
|
||||
auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++);
|
||||
auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name);
|
||||
all_kernels->push_back(nc2nh_kernel);
|
||||
insert_primitive_.push_back(nc2nh_kernel->GetPrimitive());
|
||||
NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors);
|
||||
NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors);
|
||||
UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel);
|
||||
|
@ -90,12 +92,11 @@ int NPUConcatTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUConcatTransformPass::Run(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors) {
|
||||
if (context->IsNpuEnabled()) {
|
||||
int NPUConcatTransformPass::Run() {
|
||||
if (context_->IsNpuEnabled()) {
|
||||
std::vector<kernel::LiteKernel *> new_kernels;
|
||||
|
||||
for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) {
|
||||
for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) {
|
||||
auto kernel = *it;
|
||||
if (kernel->desc().arch != kNPU) {
|
||||
new_kernels.push_back(kernel);
|
||||
|
@ -109,15 +110,15 @@ int NPUConcatTransformPass::Run(const InnerContext *context, std::vector<kernel:
|
|||
}
|
||||
}
|
||||
if (kernel->out_kernels().size() != sum) {
|
||||
InsertNode(context, it, &new_kernels, all_tensors);
|
||||
InsertNode(context_, it, &new_kernels, all_tensors_);
|
||||
}
|
||||
}
|
||||
new_kernels.push_back(kernel);
|
||||
}
|
||||
|
||||
all_kernels->clear();
|
||||
all_kernels_->clear();
|
||||
for (int i = 0; i < new_kernels.size(); i++) {
|
||||
all_kernels->push_back(new_kernels[i]);
|
||||
all_kernels_->push_back(new_kernels[i]);
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
|
@ -14,16 +14,29 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/ops/primitive_c.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_base_pass.h"
|
||||
namespace mindspore::lite {
|
||||
class NPUConcatTransformPass {
|
||||
class NPUConcatTransformPass : public NPUBasePass {
|
||||
public:
|
||||
int Run(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors);
|
||||
explicit NPUConcatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors) {
|
||||
context_ = context;
|
||||
all_kernels_ = all_kernels;
|
||||
all_tensors_ = all_tensors;
|
||||
name_ = "NPUConcatTransformPass";
|
||||
}
|
||||
~NPUConcatTransformPass() override {
|
||||
for (auto primitive : insert_primitive_) {
|
||||
delete primitive;
|
||||
}
|
||||
insert_primitive_.clear();
|
||||
}
|
||||
int Run() override;
|
||||
|
||||
private:
|
||||
int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
|
@ -37,6 +50,10 @@ class NPUConcatTransformPass {
|
|||
|
||||
private:
|
||||
int total = 0;
|
||||
const InnerContext *context_;
|
||||
std::vector<kernel::LiteKernel *> *all_kernels_;
|
||||
std::vector<Tensor *> *all_tensors_;
|
||||
std::vector<const PrimitiveC *> insert_primitive_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_CONCAT_TRANSFORM_PASS_H_
|
|
@ -13,7 +13,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/agent/npu/npu_fusion_pass.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_fusion_pass.h"
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "nnacl/concat_parameter.h"
|
||||
|
@ -198,7 +198,7 @@ int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUFusionPass::Fusion() {
|
||||
int NPUFusionPass::Run() {
|
||||
for (auto kernel : *kernels) {
|
||||
switch (kernel->Type()) {
|
||||
case schema::PrimitiveType_Concat:
|
|
@ -14,17 +14,23 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_FUSION_PASS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_FUSION_PASS_H_
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/ops/primitive_c.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_base_pass.h"
|
||||
namespace mindspore::lite {
|
||||
class NPUFusionPass {
|
||||
class NPUFusionPass : public NPUBasePass {
|
||||
public:
|
||||
explicit NPUFusionPass(std::vector<kernel::LiteKernel *> *dst_kernels) { kernels = dst_kernels; }
|
||||
~NPUFusionPass() = default;
|
||||
int Fusion();
|
||||
explicit NPUFusionPass(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
||||
kernels = dst_kernels;
|
||||
name_ = "NPUFusionPass";
|
||||
}
|
||||
|
||||
~NPUFusionPass() override = default;
|
||||
|
||||
int Run() override;
|
||||
|
||||
protected:
|
||||
int ConcatFusion(kernel::LiteKernel *kernel);
|
||||
|
@ -37,4 +43,4 @@ class NPUFusionPass {
|
|||
std::vector<kernel::LiteKernel *> *kernels;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_FUSION_PASS_H_
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_FUSION_PASS_H_
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/agent/npu/optimizer/npu_pass_manager.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "src/common/log_adapter.h"
|
||||
namespace mindspore::lite {
|
||||
|
||||
void NPUPassManager::AddPass(NPUBasePass *pass) { all_pass_.push_back(pass); }
|
||||
int NPUPassManager::Run() {
|
||||
for (auto pass : all_pass_) {
|
||||
auto ret = pass->Run();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "NPU Pass Run failed. Pass name is:" << pass->name();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
void NPUPassManager::Clear() {
|
||||
for (auto pass : all_pass_) {
|
||||
delete pass;
|
||||
}
|
||||
all_pass_.clear();
|
||||
}
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_MANAGER_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_MANAGER_H_
|
||||
#include <vector>
|
||||
#include "src/runtime/agent/npu/optimizer/npu_base_pass.h"
|
||||
namespace mindspore::lite {
|
||||
class NPUPassManager {
|
||||
public:
|
||||
static NPUPassManager *GetInstance() {
|
||||
static NPUPassManager pass_manager;
|
||||
return &pass_manager;
|
||||
}
|
||||
|
||||
~NPUPassManager() { Clear(); }
|
||||
|
||||
void AddPass(NPUBasePass *pass);
|
||||
|
||||
int Run();
|
||||
|
||||
void Clear();
|
||||
|
||||
private:
|
||||
std::vector<NPUBasePass *> all_pass_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_MANAGER_H_
|
|
@ -17,7 +17,7 @@
|
|||
#include "src/kernel_registry.h"
|
||||
#include "src/ops/nhwc2nchw.h"
|
||||
#include "src/ops/nchw2nhwc.h"
|
||||
#include "src/runtime/agent/npu/npu_pass_utils.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h"
|
||||
namespace mindspore::lite {
|
||||
using kernel::KERNEL_ARCH::kCPU;
|
||||
using kernel::KERNEL_ARCH::kNPU;
|
||||
|
@ -34,7 +34,7 @@ PrimitiveC *NPUPassUtils::CreateNchw2NhwcPrimitive() {
|
|||
}
|
||||
auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize()));
|
||||
if (primitive_buf == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc primitive_buf_ failed.";
|
||||
MS_LOG(ERROR) << "Malloc primitive buffer failed.";
|
||||
fbb.Clear();
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -58,7 +58,7 @@ PrimitiveC *NPUPassUtils::CreateNhwc2NchwPrimitive() {
|
|||
}
|
||||
auto primitive_buf = reinterpret_cast<char *>(malloc(fbb.GetSize()));
|
||||
if (primitive_buf == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc primitive_buf_ failed.";
|
||||
MS_LOG(ERROR) << "Malloc primitive buffer failed.";
|
||||
fbb.Clear();
|
||||
return nullptr;
|
||||
}
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "src/ops/primitive_c.h"
|
||||
|
@ -41,4 +41,4 @@ class NPUPassUtils {
|
|||
static PrimitiveC *CreateNhwc2NchwPrimitive();
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_PASS_UTILS_H_
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_
|
|
@ -13,11 +13,11 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/agent/npu/npu_transform_pass.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_transform_pass.h"
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/runtime/agent/npu/npu_manager.h"
|
||||
#include "src/runtime/agent/npu/npu_pass_utils.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h"
|
||||
namespace mindspore::lite {
|
||||
using kernel::KERNEL_ARCH::kCPU;
|
||||
using kernel::KERNEL_ARCH::kNPU;
|
||||
|
@ -77,6 +77,7 @@ int NPUTransformPass::InsertPreNode(const InnerContext *context, std::vector<ker
|
|||
NPUPassUtils::CreateNhwc2NchwKernel({kernel->in_tensors()[0]}, pre_trans_out_tensors, context, name);
|
||||
// Insert Nhwc2Nchw into the front of the current queue
|
||||
all_kernels->push_back(pre_trans_kernel);
|
||||
insert_primitive_.push_back(pre_trans_kernel->GetPrimitive());
|
||||
// Replace the output kernel of the previous node
|
||||
std::vector<kernel::LiteKernel *> pre_trans_in_kernel;
|
||||
if (is_input_kernel) {
|
||||
|
@ -99,6 +100,10 @@ int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector<ke
|
|||
std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors) {
|
||||
auto kernel = *it;
|
||||
// Model output does not insert operator
|
||||
if (kernel->out_kernels().empty()) {
|
||||
return RET_OK;
|
||||
}
|
||||
// Single output multiple references
|
||||
for (int i = 0; i < kernel->out_kernels().size(); i++) {
|
||||
auto next_kernel = kernel->out_kernels().at(i);
|
||||
|
@ -118,6 +123,7 @@ int NPUTransformPass::InsertPostNode(const InnerContext *context, std::vector<ke
|
|||
// Replace the input tensor of the next node
|
||||
NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {next_kernel}, kernel->out_tensors(),
|
||||
post_trans_out_tensors);
|
||||
insert_primitive_.push_back(post_trans_kernel->GetPrimitive());
|
||||
// Directly insert in the back, will not affect the topological sort
|
||||
all_kernels->push_back(post_trans_kernel);
|
||||
UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, next_kernel);
|
||||
|
@ -171,28 +177,27 @@ int NPUTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int NPUTransformPass::FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors) {
|
||||
if (context->IsNpuEnabled()) {
|
||||
int NPUTransformPass::Run() {
|
||||
if (context_->IsNpuEnabled()) {
|
||||
std::vector<kernel::LiteKernel *> new_kernels;
|
||||
|
||||
for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) {
|
||||
for (auto it = all_kernels_->begin(); it != all_kernels_->end(); it++) {
|
||||
auto kernel = *it;
|
||||
if (kernel->desc().arch != kNPU) {
|
||||
new_kernels.push_back(kernel);
|
||||
continue;
|
||||
}
|
||||
if (npu_trans_nodes.find(kernel->Type()) != npu_trans_nodes.end()) {
|
||||
InsertPreNode(context, it, &new_kernels, all_tensors);
|
||||
InsertPreNode(context_, it, &new_kernels, all_tensors_);
|
||||
new_kernels.push_back(kernel);
|
||||
InsertPostNode(context, it, &new_kernels, all_tensors);
|
||||
InsertPostNode(context_, it, &new_kernels, all_tensors_);
|
||||
} else {
|
||||
new_kernels.push_back(kernel);
|
||||
}
|
||||
}
|
||||
all_kernels->clear();
|
||||
all_kernels_->clear();
|
||||
for (int i = 0; i < new_kernels.size(); i++) {
|
||||
all_kernels->push_back(new_kernels[i]);
|
||||
all_kernels_->push_back(new_kernels[i]);
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
|
@ -14,16 +14,30 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_TRANSFORM_PASS_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_TRANSFORM_PASS_H_
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/ops/primitive_c.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_base_pass.h"
|
||||
namespace mindspore::lite {
|
||||
class NPUTransformPass {
|
||||
class NPUTransformPass : public NPUBasePass {
|
||||
public:
|
||||
int FormatTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors);
|
||||
int Run() override;
|
||||
|
||||
explicit NPUTransformPass(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels,
|
||||
std::vector<Tensor *> *all_tensors) {
|
||||
context_ = context;
|
||||
all_kernels_ = all_kernels;
|
||||
all_tensors_ = all_tensors;
|
||||
name_ = "NPUTransformPass";
|
||||
}
|
||||
~NPUTransformPass() override {
|
||||
for (auto primitive : insert_primitive_) {
|
||||
delete primitive;
|
||||
}
|
||||
insert_primitive_.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel,
|
||||
|
@ -46,6 +60,10 @@ class NPUTransformPass {
|
|||
|
||||
private:
|
||||
int total = 0;
|
||||
const InnerContext *context_;
|
||||
std::vector<kernel::LiteKernel *> *all_kernels_;
|
||||
std::vector<Tensor *> *all_tensors_;
|
||||
std::vector<const PrimitiveC *> insert_primitive_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_TRANSFORM_PASS_H_
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_TRANSFORM_PASS_H_
|
|
@ -70,7 +70,7 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() {
|
|||
}
|
||||
|
||||
int SubGraphNpuKernel::Run() {
|
||||
return reinterpret_cast<lite::NPUExecutor *>(this->executor_)->Run(in_tensors_, out_tensors_, nodes_);
|
||||
return reinterpret_cast<lite::NPUExecutor *>(this->executor_)->Run(in_tensors_, out_tensors_, out_kernels_, nodes_);
|
||||
}
|
||||
|
||||
int SubGraphNpuKernel::BuildNPUInputOp() {
|
||||
|
@ -159,17 +159,17 @@ int SubGraphNpuKernel::BuildNPUOutputOp() {
|
|||
std::string SubGraphNpuKernel::GetOMModelName() { return this->name_ + ".om"; }
|
||||
|
||||
int SubGraphNpuKernel::Init() {
|
||||
if (!isCompiled_) {
|
||||
model_buffer_data_ = BuildIRModel();
|
||||
if (model_buffer_data_ == nullptr) {
|
||||
if (!is_compiled_) {
|
||||
auto model_buffer_data = BuildIRModel();
|
||||
if (model_buffer_data == nullptr) {
|
||||
MS_LOG(ERROR) << "Build IR model failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
name_ = "kNpuSubGraph" + std::to_string(mindspore::lite::NPUManager::GetInstance()->index());
|
||||
|
||||
mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data_->data, model_buffer_data_->length,
|
||||
GetOMModelName(), context_->GetNpuInfo().frequency_);
|
||||
mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data, GetOMModelName(),
|
||||
context_->GetNpuInfo().frequency_);
|
||||
|
||||
executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName());
|
||||
|
||||
|
@ -177,7 +177,7 @@ int SubGraphNpuKernel::Init() {
|
|||
MS_LOG(ERROR) << "Create NPUExecutor failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
isCompiled_ = true;
|
||||
is_compiled_ = true;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -69,9 +69,7 @@ class SubGraphNpuKernel : public SubGraphKernel {
|
|||
std::string GetOMModelName();
|
||||
|
||||
private:
|
||||
bool isCompiled_ = false;
|
||||
|
||||
domi::ModelBufferData *model_buffer_data_;
|
||||
bool is_compiled_ = false;
|
||||
|
||||
std::vector<ge::Operator> subgraph_input_op_;
|
||||
|
||||
|
|
|
@ -42,17 +42,10 @@ using mindspore::schema::PrimitiveType_Sub;
|
|||
namespace mindspore::kernel {
|
||||
int ArithmeticNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter) {
|
||||
if (primitive_->Type() == PrimitiveType_Mul || primitive_->Type() == PrimitiveType_Div) {
|
||||
if (primitive_->Type() == PrimitiveType_Mul || primitive_->Type() == PrimitiveType_Div ||
|
||||
primitive_->Type() == PrimitiveType_Add || primitive_->Type() == PrimitiveType_Sub) {
|
||||
if (inputs[0]->shape() != inputs[1]->shape()) {
|
||||
MS_LOG(WARNING) << "For the two inputs, the corresponding dimensions must have the same value."
|
||||
<< " shape 1 is:" << inputs[0]->shape() << " shape 2 is:" << inputs[1]->shape();
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
if (primitive_->Type() == PrimitiveType_Add || primitive_->Type() == PrimitiveType_Sub) {
|
||||
if (inputs[0]->shape().size() != inputs[1]->shape().size()) {
|
||||
MS_LOG(WARNING)
|
||||
<< "For the two inputs, the corresponding dimensions must have the same value, or one of them is 1."
|
||||
MS_LOG(WARNING) << name_ << " for the two inputs, the corresponding dimensions must have the same value."
|
||||
<< " shape 1 is:" << inputs[0]->shape() << " shape 2 is:" << inputs[1]->shape();
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
|
|
@ -35,8 +35,8 @@ int CastNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const
|
|||
return RET_ERROR;
|
||||
}
|
||||
op_->set_input_x(*npu_inputs[0]);
|
||||
op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(dst_type_));
|
||||
op_->set_attr_src_dtype(lite::ConverterToNPUDataType(src_type_));
|
||||
op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(cast_parameter_->dst_type_)));
|
||||
op_->set_attr_src_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(cast_parameter_->src_type_)));
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -27,9 +27,7 @@ class CastNPUKernel : public NPUKernel {
|
|||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
auto cast_parameter = reinterpret_cast<CastParameter *>(parameter);
|
||||
dst_type_ = static_cast<TypeId>(cast_parameter->dst_type_);
|
||||
src_type_ = static_cast<TypeId>(cast_parameter->src_type_);
|
||||
cast_parameter_ = reinterpret_cast<CastParameter *>(parameter);
|
||||
}
|
||||
~CastNPUKernel() override;
|
||||
|
||||
|
@ -41,8 +39,7 @@ class CastNPUKernel : public NPUKernel {
|
|||
|
||||
private:
|
||||
hiai::op::CastT *op_ = nullptr;
|
||||
TypeId dst_type_;
|
||||
TypeId src_type_;
|
||||
CastParameter *cast_parameter_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CAST_NPU_H_
|
||||
|
|
|
@ -18,8 +18,8 @@
|
|||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "src/runtime/kernel/npu/npu_kernel.h"
|
||||
#include "include/graph/op/all_ops.h"
|
||||
#include "src/runtime/kernel/npu/transpose_base_npu.h"
|
||||
#include "nnacl/conv_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
|
|
@ -30,7 +30,7 @@ int ConvolutionNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs,
|
|||
int ConvolutionNPUKernel::SetConvParam() {
|
||||
conv_->set_attr_strides(ge::AttrValue::LIST_INT({conv_param_->stride_h_, conv_param_->stride_w_}));
|
||||
conv_->set_attr_dilations(ge::AttrValue::LIST_INT({conv_param_->dilation_h_, conv_param_->dilation_w_}));
|
||||
conv_->set_attr_groups(1);
|
||||
conv_->set_attr_groups(conv_param_->group_);
|
||||
|
||||
if (conv_param_->pad_mode_ == Pad_Same) {
|
||||
conv_->set_attr_pad_mode(ge::AttrValue::STR{"SAME"});
|
||||
|
|
|
@ -41,7 +41,7 @@ int GatherNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
|
|||
|
||||
op_->set_input_x(*npu_inputs[0]);
|
||||
op_->set_input_indices(*npu_inputs[1]);
|
||||
op_->set_attr_axis(axis_);
|
||||
op_->set_attr_axis(gather_parameter_->axis_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -27,8 +27,7 @@ class GatherNPUKernel : public NPUKernel {
|
|||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
auto gather_parameter = reinterpret_cast<GatherParameter *>(parameter);
|
||||
axis_ = gather_parameter->axis_;
|
||||
gather_parameter_ = reinterpret_cast<GatherParameter *>(parameter);
|
||||
}
|
||||
~GatherNPUKernel() override;
|
||||
|
||||
|
@ -40,7 +39,7 @@ class GatherNPUKernel : public NPUKernel {
|
|||
|
||||
private:
|
||||
hiai::op::GatherV2D *op_ = nullptr;
|
||||
int axis_;
|
||||
GatherParameter *gather_parameter_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_GATHER_NPU_H_
|
||||
|
|
|
@ -33,8 +33,8 @@ int MatMulNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
|
|||
op_->set_input_x1(*npu_inputs[0]);
|
||||
op_->set_input_x2(*npu_inputs[1]);
|
||||
|
||||
op_->set_attr_transpose_x1(a_transpose_);
|
||||
op_->set_attr_transpose_x2(b_transpose_);
|
||||
op_->set_attr_transpose_x1(matmul_parameter_->a_transpose_);
|
||||
op_->set_attr_transpose_x2(matmul_parameter_->b_transpose_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,9 +28,7 @@ class MatMulNPUKernel : public NPUKernel {
|
|||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
auto matmul_parameter = reinterpret_cast<MatMulParameter *>(parameter);
|
||||
a_transpose_ = matmul_parameter->a_transpose_;
|
||||
b_transpose_ = matmul_parameter->b_transpose_;
|
||||
matmul_parameter_ = reinterpret_cast<MatMulParameter *>(parameter);
|
||||
}
|
||||
~MatMulNPUKernel() override;
|
||||
|
||||
|
@ -42,8 +40,7 @@ class MatMulNPUKernel : public NPUKernel {
|
|||
|
||||
private:
|
||||
hiai::op::MatMul *op_ = nullptr;
|
||||
bool a_transpose_ = false;
|
||||
bool b_transpose_ = false;
|
||||
MatMulParameter *matmul_parameter_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_MATMUL_NPU_H_
|
||||
|
|
|
@ -53,6 +53,11 @@ kernel::LiteKernel *NPUKernelCreator(const std::vector<lite::Tensor *> &inputs,
|
|||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
|
||||
const lite::InnerContext *ctx, const kernel::KernelKey &desc,
|
||||
const mindspore::lite::PrimitiveC *primitive) {
|
||||
if (!primitive->infer_flag()) {
|
||||
MS_LOG(ERROR) << "NPU does not support runtime inference shape";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto *kernel = new (std::nothrow) T(opParameter, inputs, outputs, ctx, primitive);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "kernel " << opParameter->name_ << "is nullptr.";
|
||||
|
|
|
@ -25,7 +25,7 @@ using mindspore::schema::PrimitiveType_Pad;
|
|||
namespace mindspore::kernel {
|
||||
int PadNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
|
||||
OpParameter *opParameter) {
|
||||
if (padding_mode_ != schema::PaddingMode_CONSTANT) {
|
||||
if (pad_->GetPaddingMode() != schema::PaddingMode_CONSTANT) {
|
||||
MS_LOG(WARNING) << "NPU only support CONSTANT padding mode";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
@ -39,16 +39,16 @@ int PadNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const
|
|||
MS_LOG(ERROR) << name_ << " op is nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
int size = static_cast<int>(paddings_.size() / 2);
|
||||
int size = static_cast<int>(pad_->GetPaddings().size() / 2);
|
||||
ge::TensorDesc padding_tensor_desc(ge::Shape({size, 2}), ge::FORMAT_NCHW, ge::DT_INT32);
|
||||
ge::TensorPtr padding_tensor = std::make_shared<hiai::Tensor>(padding_tensor_desc);
|
||||
padding_tensor->SetData(reinterpret_cast<uint8_t *>(paddings_.data()), size * sizeof(int));
|
||||
padding_tensor->SetData(reinterpret_cast<uint8_t *>(pad_->GetPaddings().data()), size * sizeof(int));
|
||||
auto paddings = new hiai::op::Const(name_ + "paddings");
|
||||
paddings->set_attr_value(padding_tensor);
|
||||
|
||||
ge::TensorDesc constant_values_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT);
|
||||
ge::TensorPtr constant_values_tensor = std::make_shared<hiai::Tensor>(constant_values_tensor_desc);
|
||||
vector<float> constant_values_data_value = {constant_value_};
|
||||
vector<float> constant_values_data_value = {pad_->GetConstantValue()};
|
||||
constant_values_tensor->SetData(reinterpret_cast<uint8_t *>(constant_values_data_value.data()), 1 * sizeof(float));
|
||||
auto constant = new hiai::op::Const(name_ + "constant");
|
||||
constant->set_attr_value(constant_values_tensor);
|
||||
|
|
|
@ -28,10 +28,7 @@ class PadNPUKernel : public NPUKernel {
|
|||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
auto pad = reinterpret_cast<const mindspore::lite::Pad *>(primitive);
|
||||
constant_value_ = pad->GetConstantValue();
|
||||
paddings_ = pad->GetPaddings();
|
||||
padding_mode_ = pad->GetPaddingMode();
|
||||
pad_ = reinterpret_cast<const mindspore::lite::Pad *>(primitive);
|
||||
}
|
||||
~PadNPUKernel() override;
|
||||
|
||||
|
@ -43,9 +40,7 @@ class PadNPUKernel : public NPUKernel {
|
|||
|
||||
private:
|
||||
hiai::op::PadV2 *op_ = nullptr;
|
||||
std::vector<int> paddings_;
|
||||
int padding_mode_;
|
||||
float constant_value_;
|
||||
const mindspore::lite::Pad *pad_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_PAD_NPU_H_
|
||||
|
|
|
@ -27,8 +27,9 @@ using mindspore::schema::PrimitiveType_Resize;
|
|||
namespace mindspore::kernel {
|
||||
int ResizeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
|
||||
OpParameter *opParameter) {
|
||||
if (method_ != schema::ResizeMethod_LINEAR || method_ == schema::ResizeMethod_NEAREST) {
|
||||
MS_LOG(WARNING) << "Unsupported resize method type:" << method_;
|
||||
if (resize_parameter_->method_ != schema::ResizeMethod_LINEAR ||
|
||||
resize_parameter_->method_ == schema::ResizeMethod_NEAREST) {
|
||||
MS_LOG(WARNING) << "Unsupported resize method type:" << resize_parameter_->method_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
|
@ -38,20 +39,21 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
|
|||
const std::vector<ge::Operator *> &npu_inputs) {
|
||||
ge::TensorDesc sizeTensorDesc(ge::Shape({2}), ge::FORMAT_NCHW, ge::DT_INT32);
|
||||
ge::TensorPtr sizeTensor = std::make_shared<hiai::Tensor>(sizeTensorDesc);
|
||||
vector<int32_t> dataValue = {static_cast<int32_t>(new_height_), static_cast<int32_t>(new_width_)};
|
||||
vector<int32_t> dataValue = {static_cast<int32_t>(resize_parameter_->new_height_),
|
||||
static_cast<int32_t>(resize_parameter_->new_width_)};
|
||||
sizeTensor->SetData(reinterpret_cast<uint8_t *>(dataValue.data()), 2 * sizeof(int32_t));
|
||||
auto out_size = new (std::nothrow) hiai::op::Const(name_ + "_size");
|
||||
out_size->set_attr_value(sizeTensor);
|
||||
if (method_ == schema::ResizeMethod_LINEAR) {
|
||||
if (resize_parameter_->method_ == schema::ResizeMethod_LINEAR) {
|
||||
auto op = new (std::nothrow) hiai::op::ResizeBilinearV2(name_);
|
||||
if (op == nullptr) {
|
||||
MS_LOG(ERROR) << " op is nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
op->set_attr_align_corners(align_corners_);
|
||||
op->set_attr_align_corners(resize_parameter_->align_corners_);
|
||||
op->set_input_x(*npu_inputs[0]);
|
||||
op->set_input_size(*out_size);
|
||||
op->set_attr_half_pixel_centers(preserve_aspect_ratio_);
|
||||
op->set_attr_half_pixel_centers(resize_parameter_->preserve_aspect_ratio_);
|
||||
op_ = op;
|
||||
} else {
|
||||
auto op = new (std::nothrow) hiai::op::ResizeNearestNeighborV2(name_);
|
||||
|
@ -59,7 +61,7 @@ int ResizeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con
|
|||
MS_LOG(ERROR) << " op is nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
op->set_attr_align_corners(align_corners_);
|
||||
op->set_attr_align_corners(resize_parameter_->align_corners_);
|
||||
op->set_input_x(*npu_inputs[0]);
|
||||
op->set_input_size(*out_size);
|
||||
op_ = op;
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
#include "nnacl/arithmetic_common.h"
|
||||
#include "src/runtime/kernel/npu/npu_kernel.h"
|
||||
#include "include/graph/op/all_ops.h"
|
||||
#include "src/runtime/kernel/npu/transpose_base_npu.h"
|
||||
namespace mindspore::kernel {
|
||||
class ResizeNPUKernel : public NPUKernel {
|
||||
public:
|
||||
|
@ -30,12 +29,7 @@ class ResizeNPUKernel : public NPUKernel {
|
|||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
auto resize_parameter = reinterpret_cast<ResizeParameter *>(parameter);
|
||||
method_ = resize_parameter->method_;
|
||||
new_height_ = resize_parameter->new_height_;
|
||||
new_width_ = resize_parameter->new_width_;
|
||||
align_corners_ = resize_parameter->align_corners_;
|
||||
preserve_aspect_ratio_ = resize_parameter->preserve_aspect_ratio_;
|
||||
resize_parameter_ = reinterpret_cast<ResizeParameter *>(parameter);
|
||||
}
|
||||
~ResizeNPUKernel() override;
|
||||
|
||||
|
@ -48,11 +42,7 @@ class ResizeNPUKernel : public NPUKernel {
|
|||
|
||||
private:
|
||||
ge::Operator *op_ = nullptr;
|
||||
int method_;
|
||||
int64_t new_height_;
|
||||
int64_t new_width_;
|
||||
bool align_corners_;
|
||||
bool preserve_aspect_ratio_;
|
||||
ResizeParameter *resize_parameter_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_RESIZE_NPU_H_
|
||||
|
|
|
@ -34,7 +34,7 @@ int ScaleNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, cons
|
|||
MS_LOG(ERROR) << name_ << " op is nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
op_->set_attr_axis(this->axis_);
|
||||
op_->set_attr_axis(scale_parameter_->axis_);
|
||||
op_->set_input_x(*npu_inputs[0]);
|
||||
op_->set_input_scale(*npu_inputs[1]);
|
||||
op_->set_input_bias(*npu_inputs[2]);
|
||||
|
|
|
@ -27,7 +27,7 @@ class ScaleNPUKernel : public NPUKernel {
|
|||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
axis_ = reinterpret_cast<ScaleParameter *>(parameter)->axis_;
|
||||
scale_parameter_ = reinterpret_cast<ScaleParameter *>(parameter);
|
||||
}
|
||||
~ScaleNPUKernel() override;
|
||||
|
||||
|
@ -39,7 +39,7 @@ class ScaleNPUKernel : public NPUKernel {
|
|||
|
||||
private:
|
||||
hiai::op::Scale *op_ = nullptr;
|
||||
int axis_;
|
||||
ScaleParameter *scale_parameter_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_Scale_NPU_H_
|
||||
|
|
|
@ -35,10 +35,10 @@ int SoftmaxNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
|
|||
MS_LOG(ERROR) << name_ << " op is nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (axis_ == -1) {
|
||||
if (softmax_parameter_->axis_ == -1) {
|
||||
op_->set_attr_axis(inputs[0]->shape().size() - 1);
|
||||
} else {
|
||||
op_->set_attr_axis(axis_);
|
||||
op_->set_attr_axis(softmax_parameter_->axis_);
|
||||
}
|
||||
op_->set_input_x(*npu_inputs[0]);
|
||||
return RET_OK;
|
||||
|
|
|
@ -27,8 +27,7 @@ class SoftmaxNPUKernel : public NPUKernel {
|
|||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
auto softmax_parameter = reinterpret_cast<SoftmaxParameter *>(parameter);
|
||||
axis_ = softmax_parameter->axis_;
|
||||
softmax_parameter_ = reinterpret_cast<SoftmaxParameter *>(parameter);
|
||||
}
|
||||
~SoftmaxNPUKernel() override;
|
||||
|
||||
|
@ -40,7 +39,7 @@ class SoftmaxNPUKernel : public NPUKernel {
|
|||
|
||||
private:
|
||||
hiai::op::Softmax *op_ = nullptr;
|
||||
int axis_;
|
||||
SoftmaxParameter *softmax_parameter_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SOFTMAX_NPU_H_
|
||||
|
|
|
@ -35,25 +35,25 @@ int SplitNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, cons
|
|||
MS_LOG(ERROR) << name_ << " op is nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
int size = size_splits_.size();
|
||||
int size = split_->size_splits().size();
|
||||
ge::TensorDesc size_splits_tensor_desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32);
|
||||
ge::TensorPtr size_splits_tensor = std::make_shared<hiai::Tensor>(size_splits_tensor_desc);
|
||||
size_splits_tensor->SetData(reinterpret_cast<uint8_t *>(size_splits_.data()), size * sizeof(int));
|
||||
size_splits_tensor->SetData(reinterpret_cast<uint8_t *>(split_->size_splits().data()), size * sizeof(int));
|
||||
auto size_splits = new hiai::op::Const(name_ + "_size");
|
||||
size_splits->set_attr_value(size_splits_tensor);
|
||||
|
||||
ge::TensorDesc split_dim_tensor_desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_INT32);
|
||||
ge::TensorPtr split_dim_tensor = std::make_shared<hiai::Tensor>(split_dim_tensor_desc);
|
||||
vector<int32_t> split_dim_data_value = {split_dim_};
|
||||
vector<int32_t> split_dim_data_value = {split_->GetSplitDim()};
|
||||
split_dim_tensor->SetData(reinterpret_cast<uint8_t *>(split_dim_data_value.data()), 1 * sizeof(int));
|
||||
auto split_dim = new hiai::op::Const(name_ + "_dim");
|
||||
split_dim->set_attr_value(split_dim_tensor);
|
||||
|
||||
op_->set_input_x(*npu_inputs[0]);
|
||||
op_->set_attr_num_split(num_split_);
|
||||
op_->set_attr_num_split(split_->GetNumberSplit());
|
||||
op_->set_input_split_dim(*split_dim);
|
||||
op_->set_input_size_splits(*size_splits);
|
||||
op_->create_dynamic_output_y(num_split_);
|
||||
op_->create_dynamic_output_y(split_->GetNumberSplit());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -27,10 +27,7 @@ class SplitNPUKernel : public NPUKernel {
|
|||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
auto split = reinterpret_cast<const mindspore::lite::Split *>(primitive);
|
||||
num_split_ = split->GetNumberSplit();
|
||||
size_splits_ = split->GetSizeSplit();
|
||||
split_dim_ = split->GetSplitDim();
|
||||
split_ = reinterpret_cast<const mindspore::lite::Split *>(primitive);
|
||||
}
|
||||
~SplitNPUKernel() override;
|
||||
|
||||
|
@ -42,9 +39,7 @@ class SplitNPUKernel : public NPUKernel {
|
|||
|
||||
private:
|
||||
hiai::op::SplitV *op_ = nullptr;
|
||||
int num_split_;
|
||||
std::vector<int> size_splits_;
|
||||
int split_dim_;
|
||||
const mindspore::lite::Split *split_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SPLIT_NPU_H_
|
||||
|
|
|
@ -59,11 +59,11 @@ int StridedSliceNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &input
|
|||
} else {
|
||||
op_->set_input_strides(*npu_inputs[3]);
|
||||
}
|
||||
op_->set_attr_begin_mask(begin_mask_);
|
||||
op_->set_attr_ellipsis_mask(ellipsis_mask_);
|
||||
op_->set_attr_end_mask(end_mask_);
|
||||
op_->set_attr_shrink_axis_mask(shrink_axis_mask_);
|
||||
op_->set_attr_new_axis_mask(new_axis_mask_);
|
||||
op_->set_attr_begin_mask(strided_slice_->GetBeginMask());
|
||||
op_->set_attr_ellipsis_mask(strided_slice_->GetEllipsisMask());
|
||||
op_->set_attr_end_mask(strided_slice_->GetEndMask());
|
||||
op_->set_attr_shrink_axis_mask(strided_slice_->GetShrinkAxisMask());
|
||||
op_->set_attr_new_axis_mask(strided_slice_->GetNewAxisMask());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,12 +28,7 @@ class StridedSliceNPUKernel : public NPUKernel {
|
|||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
auto strided_slice = reinterpret_cast<const mindspore::lite::StridedSlice *>(primitive);
|
||||
begin_mask_ = strided_slice->GetBeginMask();
|
||||
end_mask_ = strided_slice->GetEndMask();
|
||||
ellipsis_mask_ = strided_slice->GetEllipsisMask();
|
||||
new_axis_mask_ = strided_slice->GetNewAxisMask();
|
||||
shrink_axis_mask_ = strided_slice->GetShrinkAxisMask();
|
||||
strided_slice_ = reinterpret_cast<const mindspore::lite::StridedSlice *>(primitive);
|
||||
}
|
||||
~StridedSliceNPUKernel() override;
|
||||
|
||||
|
@ -45,11 +40,7 @@ class StridedSliceNPUKernel : public NPUKernel {
|
|||
|
||||
private:
|
||||
hiai::op::StridedSlice *op_ = nullptr;
|
||||
int begin_mask_;
|
||||
int end_mask_;
|
||||
int ellipsis_mask_;
|
||||
int new_axis_mask_;
|
||||
int shrink_axis_mask_;
|
||||
const mindspore::lite::StridedSlice *strided_slice_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_STRIDEDSLICE_NPU_H_
|
||||
|
|
|
@ -1,54 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/npu/transpose_base_npu.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
TransposeBaseNPUKernel::~TransposeBaseNPUKernel() {
|
||||
if (pre_trans_ != nullptr) {
|
||||
delete pre_trans_;
|
||||
pre_trans_ = nullptr;
|
||||
}
|
||||
if (post_trans_ != nullptr) {
|
||||
delete post_trans_;
|
||||
post_trans_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
int TransposeBaseNPUKernel::SetPreTranspose(const ge::Operator *input) {
|
||||
// input permute: NHWC -> NCHW
|
||||
pre_trans_ = new (std::nothrow) hiai::op::Permute(name_ + "_pre_transpose");
|
||||
if (pre_trans_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New pre transpose npu operator (NHWC -> NCHW) for op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
pre_trans_->set_input_x(*input);
|
||||
pre_trans_->set_attr_order(ge::AttrValue::LIST_INT({0, 3, 1, 2}));
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int TransposeBaseNPUKernel::SetPostTranspose(const ge::Operator *input) {
|
||||
// permute: NCHW -> NHWC
|
||||
post_trans_ = new (std::nothrow) hiai::op::Permute(name_ + "_post_transpose");
|
||||
if (post_trans_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New post transpose operator (NCHW -> NHWC) for op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
post_trans_->set_input_x(*input);
|
||||
post_trans_->set_attr_order(ge::AttrValue::LIST_INT({0, 2, 3, 1}));
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
|
@ -1,41 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TRANSPOSE_BASE_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TRANSPOSE_BASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "include/graph/op/all_ops.h"
|
||||
#include "include/graph/compatible/all_ops.h"
|
||||
#include "src/runtime/kernel/npu/npu_kernel.h"
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class TransposeBaseNPUKernel : public NPUKernel {
|
||||
public:
|
||||
TransposeBaseNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: NPUKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
~TransposeBaseNPUKernel() override;
|
||||
|
||||
protected:
|
||||
int SetPreTranspose(const ge::Operator *input);
|
||||
int SetPostTranspose(const ge::Operator *input);
|
||||
hiai::op::Permute *pre_trans_ = nullptr;
|
||||
hiai::op::Permute *post_trans_ = nullptr;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_TRANSPOSE_BASE_H_
|
|
@ -30,7 +30,7 @@ int TransposeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, con
|
|||
MS_LOG(ERROR) << "Unsupported conjugate transpose.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
int TransposeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
|
||||
|
@ -57,6 +57,4 @@ TransposeNPUKernel::~TransposeNPUKernel() {
|
|||
}
|
||||
|
||||
REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Transpose, NPUKernelCreator<TransposeNPUKernel>)
|
||||
// REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Nhwc2Nchw, NPUKernelCreator<TransposeNPUKernel>)
|
||||
// REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Nchw2Nhwc, NPUKernelCreator<TransposeNPUKernel>)
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -34,10 +34,11 @@
|
|||
#if SUPPORT_NPU
|
||||
#include "src/runtime/agent/npu/subgraph_npu_kernel.h"
|
||||
#include "src/runtime/agent/npu/npu_manager.h"
|
||||
#include "src/runtime/agent/npu/npu_transform_pass.h"
|
||||
#include "src/runtime/agent/npu/npu_fusion_pass.h"
|
||||
#include "src/runtime/agent/npu/npu_add_transform_pass.h"
|
||||
#include "src/runtime/agent/npu/npu_concat_transform_pass.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_pass_manager.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_transform_pass.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_fusion_pass.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_add_transform_pass.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_concat_transform_pass.h"
|
||||
#endif
|
||||
namespace mindspore::lite {
|
||||
using kernel::KERNEL_ARCH::kCPU;
|
||||
|
@ -89,12 +90,12 @@ void Scheduler::FindNodeInoutTensors(const lite::Model::Node &node, std::vector<
|
|||
auto in_size = node.input_indices_.size();
|
||||
inputs->reserve(in_size);
|
||||
for (size_t j = 0; j < in_size; ++j) {
|
||||
inputs->emplace_back(src_tensors_.at(node.input_indices_[j]));
|
||||
inputs->emplace_back(src_tensors_->at(node.input_indices_[j]));
|
||||
}
|
||||
auto out_size = node.output_indices_.size();
|
||||
outputs->reserve(out_size);
|
||||
for (size_t j = 0; j < out_size; ++j) {
|
||||
outputs->emplace_back(src_tensors_.at(node.output_indices_[j]));
|
||||
outputs->emplace_back(src_tensors_->at(node.output_indices_[j]));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -303,11 +304,11 @@ int Scheduler::ScheduleSubGraphToKernels(size_t subgraph_index, std::vector<kern
|
|||
}
|
||||
if (in_tensors != nullptr) {
|
||||
std::transform(subgraph->input_indices_.begin(), subgraph->input_indices_.end(), std::back_inserter(*in_tensors),
|
||||
[&](const uint32_t index) { return this->src_tensors_.at(index); });
|
||||
[&](const uint32_t index) { return this->src_tensors_->at(index); });
|
||||
}
|
||||
if (out_tensors != nullptr) {
|
||||
std::transform(subgraph->output_indices_.begin(), subgraph->output_indices_.end(), std::back_inserter(*out_tensors),
|
||||
[&](const uint32_t index) { return this->src_tensors_.at(index); });
|
||||
[&](const uint32_t index) { return this->src_tensors_->at(index); });
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -567,37 +568,16 @@ void Scheduler::FindAllInoutKernels(const std::vector<kernel::LiteKernel *> &ker
|
|||
int Scheduler::RunPass(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
||||
int ret = RET_OK;
|
||||
#if SUPPORT_NPU
|
||||
auto transform_pass = new NPUTransformPass;
|
||||
ret = transform_pass->FormatTransformPass(context_, dst_kernels, &src_tensors_);
|
||||
delete transform_pass;
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Run npu format transform pass failed.";
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto add_format_pass = new NPUAddTransformPass;
|
||||
ret = add_format_pass->Run(context_, dst_kernels, &src_tensors_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Run npu add op insert transform pass failed.";
|
||||
return ret;
|
||||
}
|
||||
delete add_format_pass;
|
||||
|
||||
auto concat_format_pass = new NPUConcatTransformPass;
|
||||
ret = concat_format_pass->Run(context_, dst_kernels, &src_tensors_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Run npu concat op insert transform pass failed.";
|
||||
return ret;
|
||||
}
|
||||
delete concat_format_pass;
|
||||
|
||||
auto transform_pass = new NPUTransformPass(context_, dst_kernels, src_tensors_);
|
||||
mindspore::lite::NPUPassManager::GetInstance()->AddPass(transform_pass);
|
||||
auto add_format_pass = new NPUAddTransformPass(context_, dst_kernels, src_tensors_);
|
||||
mindspore::lite::NPUPassManager::GetInstance()->AddPass(add_format_pass);
|
||||
auto concat_format_pass = new NPUConcatTransformPass(context_, dst_kernels, src_tensors_);
|
||||
mindspore::lite::NPUPassManager::GetInstance()->AddPass(concat_format_pass);
|
||||
auto fusion_pass = new NPUFusionPass(dst_kernels);
|
||||
ret = fusion_pass->Fusion();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Run npu fussion transform pass failed.";
|
||||
return ret;
|
||||
}
|
||||
delete fusion_pass;
|
||||
mindspore::lite::NPUPassManager::GetInstance()->AddPass(fusion_pass);
|
||||
|
||||
ret = mindspore::lite::NPUPassManager::GetInstance()->Run();
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -28,8 +28,8 @@
|
|||
namespace mindspore::lite {
|
||||
class Scheduler {
|
||||
public:
|
||||
Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> src_tensors)
|
||||
: context_(ctx), src_model_(src_model), src_tensors_(std::move(src_tensors)) {}
|
||||
Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> *src_tensors)
|
||||
: context_(ctx), src_model_(src_model), src_tensors_(src_tensors) {}
|
||||
~Scheduler() = default;
|
||||
|
||||
int Schedule(std::vector<kernel::LiteKernel *> *dst_kernels);
|
||||
|
@ -85,7 +85,7 @@ class Scheduler {
|
|||
protected:
|
||||
const InnerContext *context_ = nullptr;
|
||||
Model *src_model_ = nullptr;
|
||||
std::vector<Tensor *> src_tensors_;
|
||||
std::vector<Tensor *> *src_tensors_;
|
||||
std::vector<size_t> graph_output_node_indexes_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
|
|
Loading…
Reference in New Issue