!13867 add pad and strided slice fusion npu

From: @zhaozhenlong
Reviewed-by: 
Signed-off-by:
This commit is contained in:
mindspore-ci-bot 2021-03-25 08:45:42 +08:00 committed by Gitee
commit 9b23952fc2
11 changed files with 231 additions and 81 deletions

View File

@ -18,6 +18,9 @@
#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h"
#include "src/lite_kernel.h"
#include "nnacl/concat_parameter.h"
#include "nnacl/split_parameter.h"
#include "nnacl/pad_parameter.h"
#include "nnacl/strided_slice_parameter.h"
namespace mindspore::lite {
bool CheckFusion(kernel::LiteKernel *kernel) {
@ -119,7 +122,7 @@ void NPUFusionPass::UpdatePostKernels(kernel::LiteKernel *cur_kernel) {
}
void UpdatePreTensors(kernel::LiteKernel *cur_kernel) {
auto tensors_vec = cur_kernel->in_tensors();
auto tensors_vec = NPUPassUtils::GetNonConstInputs(cur_kernel);
for (auto in_kernel : cur_kernel->in_kernels()) {
lite::Tensor *cur_tensor = nullptr;
auto in_tensor = in_kernel->in_tensors()[0];
@ -136,6 +139,15 @@ void UpdatePreTensors(kernel::LiteKernel *cur_kernel) {
}
}
}
// add constant inputs back
if (nodes2const_index.find(static_cast<schema::PrimitiveType>(cur_kernel->op_parameter()->type_)) !=
nodes2const_index.end()) {
tensors_vec.resize(cur_kernel->in_tensors().size());
auto const_index = nodes2const_index[static_cast<schema::PrimitiveType>(cur_kernel->op_parameter()->type_)];
for (auto index : const_index) {
tensors_vec[index] = cur_kernel->in_tensors()[index];
}
}
cur_kernel->set_in_tensors(tensors_vec);
}
@ -275,15 +287,75 @@ int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) {
return RET_OK;
}
int NPUFusionPass::SplitFusion(kernel::LiteKernel *kernel) {
UpdateKernel(kernel);
auto split_param = reinterpret_cast<SplitParameter *>(kernel->op_parameter());
split_param->split_dim_ = TransFormAxis(split_param->split_dim_);
return RET_OK;
}
int NPUFusionPass::PadFusion(kernel::LiteKernel *kernel) {
UpdateKernel(kernel);
auto pad_param = reinterpret_cast<PadParameter *>(kernel->op_parameter());
int c1 = pad_param->paddings_[6];
int c2 = pad_param->paddings_[7];
// 0 1 2 3 4 5 6 7
// n n h h w w c c
// n n c c h h w w
pad_param->paddings_[6] = pad_param->paddings_[4];
pad_param->paddings_[7] = pad_param->paddings_[5];
pad_param->paddings_[4] = pad_param->paddings_[2];
pad_param->paddings_[5] = pad_param->paddings_[3];
pad_param->paddings_[2] = c1;
pad_param->paddings_[3] = c2;
return RET_OK;
}
int NPUFusionPass::StridedSliceFusion(kernel::LiteKernel *kernel) {
// basic requirement: input is nhwc 4d
UpdateKernel(kernel);
auto param = reinterpret_cast<StridedSliceParameter *>(kernel->op_parameter());
auto begin_tensor = kernel->in_tensors().at(1);
int *begin = reinterpret_cast<int *>(begin_tensor->data_c());
(void)NPUPassUtils::AssistDataNHWC2NCHW(begin, 1);
auto end_tensor = kernel->in_tensors().at(2);
int *end = reinterpret_cast<int *>(end_tensor->data_c());
NPUPassUtils::AssistDataNHWC2NCHW(end, 1);
auto stride_tensor = kernel->in_tensors().at(3);
if (kernel->in_tensors().size() == 5) {
stride_tensor = kernel->in_tensors().at(4);
}
int *stride = reinterpret_cast<int *>(stride_tensor->data_c());
NPUPassUtils::AssistDataNHWC2NCHW(stride, 1);
param->begins_mask_ = NPUPassUtils::MaskDataNHWC2NCHW(param->begins_mask_);
param->ends_mask_ = NPUPassUtils::MaskDataNHWC2NCHW(param->ends_mask_);
param->ellipsisMask_ = NPUPassUtils::MaskDataNHWC2NCHW(param->ellipsisMask_);
param->newAxisMask_ = NPUPassUtils::MaskDataNHWC2NCHW(param->newAxisMask_);
param->shrinkAxisMask_ = NPUPassUtils::MaskDataNHWC2NCHW(param->shrinkAxisMask_);
return RET_OK;
}
int NPUFusionPass::Run() {
for (size_t i = 0; i < kernels->size(); i++) {
auto kernel = (*kernels)[i];
if (CheckFusion(kernel)) {
switch (kernel->Type()) {
case schema::PrimitiveType_Split:
i -= kernel->in_kernels().size();
SplitFusion(kernel);
continue;
case schema::PrimitiveType_Concat:
i -= kernel->in_kernels().size();
ConcatFusion(kernel);
continue;
case schema::PrimitiveType_PadFusion:
i -= kernel->in_kernels().size();
PadFusion(kernel);
continue;
case schema::PrimitiveType_StridedSlice:
i -= kernel->in_kernels().size();
StridedSliceFusion(kernel);
continue;
case schema::PrimitiveType_AddFusion:
case schema::PrimitiveType_Activation:
case schema::PrimitiveType_Eltwise:

View File

@ -39,6 +39,9 @@ class NPUFusionPass : public NPUBasePass {
int CommonFusion(kernel::LiteKernel *kernel);
int ConcatFusion(kernel::LiteKernel *kernel);
int FormatFusion(kernel::LiteKernel *kernel);
int SplitFusion(kernel::LiteKernel *kernel);
int PadFusion(kernel::LiteKernel *kernel);
int StridedSliceFusion(kernel::LiteKernel *kernel);
private:
std::vector<kernel::LiteKernel *> *kernels;

View File

@ -23,8 +23,10 @@ namespace mindspore::lite {
using kernel::KERNEL_ARCH::kNPU;
enum InsertState { InsertNone, PreInsert, PostInsert, BothInsert };
std::set<mindspore::schema::PrimitiveType> npu_insert_nodes = {
schema::PrimitiveType_Concat, schema::PrimitiveType_AddFusion, schema::PrimitiveType_Eltwise,
schema::PrimitiveType_Activation};
schema::PrimitiveType_Concat, schema::PrimitiveType_AddFusion, schema::PrimitiveType_Eltwise,
schema::PrimitiveType_Activation, schema::PrimitiveType_Split, schema::PrimitiveType_PadFusion,
schema::PrimitiveType_StridedSlice, schema::PrimitiveType_Activation};
// this pass goal is to minimize subgraphs generated
// by inserting nchw2nhwc or nhwc2nchw before or after the operator (e.g. concat, add, etc..) together with
// fusion pass. If transpose inserted are more than half of input output, we will insert remaining input
@ -44,7 +46,7 @@ std::set<mindspore::schema::PrimitiveType> npu_insert_nodes = {
// so we won't insert nc2nh or nh2nc when op's in kernels and out kernels contains no nc2nh or nh2nc.
// This pass should be run after npu_transform_pass, which insert transpose for nchw-input-limited op like conv2d.
int GetInsertState(kernel::LiteKernel *kernel) {
int NPUInsertTransformPass::GetInsertState(kernel::LiteKernel *kernel) {
// filter out irrelevant kernel
if (npu_insert_nodes.find(kernel->Type()) == npu_insert_nodes.end()) {
return InsertNone;
@ -52,15 +54,17 @@ int GetInsertState(kernel::LiteKernel *kernel) {
// current kernel is target kernel
// use out kernels to count how many out lines from current kernel
std::vector<Tensor *> in_tensors = NPUPassUtils::GetNonConstInputs(kernel);
size_t in_out_tensor_num =
kernel->in_tensors().size() + std::max(kernel->out_kernels().size(), static_cast<size_t>(1));
in_tensors.size() +
std::max(std::max(kernel->out_kernels().size(), static_cast<size_t>(1)), kernel->out_tensors().size());
size_t transpose_input_num = 0;
size_t transpose_output_num = 0;
bool need_pre_insert = false;
bool need_post_insert = false;
// count number of input tensor from nc2nh and output tensor to nh2nc
for (size_t i = 0; i < kernel->in_tensors().size(); ++i) {
auto in_kernel = NPUPassUtils::KernelInputFromKernel(kernel, i);
for (size_t i = 0; i < in_tensors.size(); ++i) {
auto in_kernel = NPUPassUtils::KernelInputFromKernel(kernel, in_tensors.at(i));
if (NPUPassUtils::IsNchw2Nhwc(in_kernel)) {
transpose_input_num++;
} else {
@ -81,21 +85,22 @@ int GetInsertState(kernel::LiteKernel *kernel) {
// won't insert any thing if num of transpose tensor is smaller than half of total input output.
// won't insert if total input output are all transpose tensor, the fusion pass will handle this.
size_t transpose_tensor_num = transpose_input_num + transpose_output_num;
if (transpose_tensor_num <= in_out_tensor_num / 2 || transpose_tensor_num == in_out_tensor_num) {
if (transpose_tensor_num == 0 || transpose_tensor_num * 2 < in_out_tensor_num ||
transpose_tensor_num == in_out_tensor_num) {
return InsertNone;
}
InsertState ret;
if (need_pre_insert && !need_post_insert) {
return PreInsert;
}
if (need_pre_insert && need_post_insert) {
return BothInsert;
}
if (!need_pre_insert && need_post_insert) {
return PostInsert;
ret = PreInsert;
} else if (need_pre_insert && need_post_insert) {
ret = BothInsert;
} else if (!need_pre_insert && need_post_insert) {
ret = PostInsert;
} else {
ret = InsertNone;
}
return InsertNone;
return ret;
}
int NPUInsertTransformPass::InsertNode(kernel::LiteKernel *kernel, kernel::LiteKernel *post_kernel,
@ -200,13 +205,20 @@ int NPUInsertTransformPass::InsertForOutputTensor(kernel::LiteKernel *kernel, ke
int NPUInsertTransformPass::InsertPreNodes(kernel::LiteKernel *kernel,
std::vector<kernel::LiteKernel *> *trans_kernels) {
int ret = RET_OK;
for (size_t i = 0; i < kernel->in_tensors().size(); ++i) {
auto pre_kernel = NPUPassUtils::KernelInputFromKernel(kernel, i);
auto in_tensors = NPUPassUtils::GetNonConstInputs(kernel);
for (auto tensor : in_tensors) {
auto pre_kernel = NPUPassUtils::KernelInputFromKernel(kernel, tensor);
if (NPUPassUtils::IsNchw2Nhwc(pre_kernel)) {
continue;
}
// if this tensor is input of graph, pre_kernel is nullptr.
ret = InsertForInputTensor(kernel, i, pre_kernel, trans_kernels);
auto it = find(kernel->in_tensors().begin(), kernel->in_tensors().end(), tensor);
if (it == kernel->in_tensors().end()) {
MS_LOG(ERROR) << "Find in tensor index error";
return RET_ERROR;
}
size_t index = it - kernel->in_tensors().begin();
ret = InsertForInputTensor(kernel, index, pre_kernel, trans_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel before kernel " << kernel->name() << " failed.";
return ret;
@ -249,59 +261,63 @@ int NPUInsertTransformPass::InsertPostNodes(kernel::LiteKernel *kernel,
int NPUInsertTransformPass::Run() {
std::vector<kernel::LiteKernel *> insert_kernels;
for (size_t i = 0; i < all_kernels_->size(); i++) {
auto kernel = (*all_kernels_)[i];
if (kernel->desc().arch != kNPU) {
continue;
}
auto insert_state = GetInsertState(kernel);
insert_kernels.clear();
// If the every output kernel is nhwc2nchw, insert
// modify loop index add post_kernels.size() to the next kernel in the origin vector
switch (insert_state) {
case PreInsert: {
auto ret = InsertPreNodes(kernel, &insert_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel before kernel " << kernel->name()
<< " failed.";
return RET_ERROR;
}
all_kernels_->insert(all_kernels_->begin() + i, insert_kernels.begin(), insert_kernels.end());
i += insert_kernels.size();
break;
for (int j = 0; j < 2; ++j) {
for (size_t i = 0; i < all_kernels_->size(); i++) {
auto kernel = (*all_kernels_)[i];
if (kernel->desc().arch != kNPU) {
continue;
}
case PostInsert: {
auto ret = InsertPostNodes(kernel, &insert_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel after kernel " << kernel->name() << " failed.";
return RET_ERROR;
auto insert_state = GetInsertState(kernel);
insert_kernels.clear();
// If the every output kernel is nhwc2nchw, insert
// modify loop index add post_kernels.size() to the next kernel in the origin vector
switch (insert_state) {
case PreInsert: {
auto ret = InsertPreNodes(kernel, &insert_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel before kernel " << kernel->name()
<< " failed.";
return RET_ERROR;
}
all_kernels_->insert(all_kernels_->begin() + i, insert_kernels.begin(), insert_kernels.end());
i += insert_kernels.size();
break;
}
all_kernels_->insert(all_kernels_->begin() + i + 1, insert_kernels.begin(), insert_kernels.end());
i += insert_kernels.size();
break;
}
case BothInsert: {
auto ret = InsertPreNodes(kernel, &insert_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel before kernel " << kernel->name()
<< " failed.";
return RET_ERROR;
case PostInsert: {
auto ret = InsertPostNodes(kernel, &insert_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel after kernel " << kernel->name()
<< " failed.";
return RET_ERROR;
}
all_kernels_->insert(all_kernels_->begin() + i + 1, insert_kernels.begin(), insert_kernels.end());
i += insert_kernels.size();
break;
}
all_kernels_->insert(all_kernels_->begin() + i, insert_kernels.begin(), insert_kernels.end());
i += insert_kernels.size();
case BothInsert: {
auto ret = InsertPreNodes(kernel, &insert_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel before kernel " << kernel->name()
<< " failed.";
return RET_ERROR;
}
all_kernels_->insert(all_kernels_->begin() + i, insert_kernels.begin(), insert_kernels.end());
i += insert_kernels.size();
insert_kernels.clear();
ret = InsertPostNodes(kernel, &insert_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel after kernel " << kernel->name() << " failed.";
return RET_ERROR;
insert_kernels.clear();
ret = InsertPostNodes(kernel, &insert_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw kernel and nchw2nhwc kernel after kernel " << kernel->name()
<< " failed.";
return RET_ERROR;
}
all_kernels_->insert(all_kernels_->begin() + i + 1, insert_kernels.begin(), insert_kernels.end());
i += insert_kernels.size();
break;
}
all_kernels_->insert(all_kernels_->begin() + i + 1, insert_kernels.begin(), insert_kernels.end());
i += insert_kernels.size();
break;
default:
MS_LOG(DEBUG) << "Insert Nothing on kernel " << kernel->name();
}
default:
MS_LOG(DEBUG) << "Insert Nothing on kernel " << kernel->name();
}
}
return RET_OK;

View File

@ -34,6 +34,7 @@ class NPUInsertTransformPass : public NPUBasePass {
int Run() override;
private:
int GetInsertState(kernel::LiteKernel *kernel);
int InsertPreNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels);
int InsertPostNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels);

View File

@ -25,7 +25,10 @@
namespace mindspore::lite {
using kernel::KERNEL_ARCH::kCPU;
using kernel::KERNEL_ARCH::kNPU;
std::unordered_map<schema::PrimitiveType, std::set<int>> nodes2const_index{
{schema::PrimitiveType_Split, {1}},
{schema::PrimitiveType_PadFusion, {1}},
{schema::PrimitiveType_StridedSlice, {1, 2, 3}}};
kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors,
const std::vector<Tensor *> &out_tensors,
const InnerContext *ctx, const std::string &name) {
@ -125,8 +128,8 @@ void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel,
}
std::copy(trans_kernels.begin(), trans_kernels.end(), std::back_inserter(cur_out_kernels));
pre_kernel->set_out_kernels(cur_out_kernels);
// For kernel before trans, the output tensor is used for output tensor of trans, so replace the output tensor with
// the input tensor of trans.
// For kernel before trans, the output tensor is used for output tensor of trans, so replace the output tensor
// with the input tensor of trans.
pre_kernel->set_out_tensors({trans_kernels.at(0)->in_tensors().at(0)});
}
@ -158,7 +161,7 @@ void NPUPassUtils::UpdateNC2NHTransNodePostKernel(kernel::LiteKernel *kernel, ke
Tensor *old_in_tensor = nullptr;
// find out which input tensor of post_kernel should be updated
for (size_t i = 0; i < post_in_tensors.size(); ++i) {
if (KernelInputFromKernel(post_kernel, i) == kernel) {
if (KernelInputFromKernel(post_kernel, post_in_tensors.at(i)) == kernel) {
old_in_tensor = post_in_tensors.at(i);
break;
}
@ -219,17 +222,16 @@ bool NPUPassUtils::IsNchw2Nhwc(const kernel::LiteKernel *kernel) {
}
return false;
}
kernel::LiteKernel *NPUPassUtils::KernelInputFromKernel(const kernel::LiteKernel *kernel, size_t in_tensor_index) {
kernel::LiteKernel *NPUPassUtils::KernelInputFromKernel(const kernel::LiteKernel *kernel, Tensor *in_tensor) {
// given kernel and input tensor index, get which kernel output this tensor.
// If input tensor is graph input, return nullptr.
if (kernel == nullptr) {
return nullptr;
}
auto tensor = kernel->in_tensors().at(in_tensor_index);
auto in_kernels = kernel->in_kernels();
auto output_contain = [tensor](const kernel::LiteKernel *kernel) {
auto output_contain = [in_tensor](const kernel::LiteKernel *kernel) {
auto out_tensors = kernel->out_tensors();
return std::find(out_tensors.begin(), out_tensors.end(), tensor) != out_tensors.end();
return std::find(out_tensors.begin(), out_tensors.end(), in_tensor) != out_tensors.end();
};
auto it = std::find_if(in_kernels.begin(), in_kernels.end(), output_contain);
if (it == in_kernels.end()) {
@ -238,10 +240,57 @@ kernel::LiteKernel *NPUPassUtils::KernelInputFromKernel(const kernel::LiteKernel
return *it;
}
std::vector<Tensor *> NPUPassUtils::GetNonConstInputs(kernel::LiteKernel *kernel) {
if (kernel == nullptr) {
return std::vector<Tensor *>{};
}
auto type = static_cast<schema::PrimitiveType>(kernel->op_parameter()->type_);
auto it = nodes2const_index.find(type);
if (it != nodes2const_index.end()) {
auto const_input_indices = it->second;
std::vector<Tensor *> non_const_in_tensors;
auto in_tensors = kernel->in_tensors();
for (auto i = 0; i < in_tensors.size(); ++i) {
if (const_input_indices.find(i) == const_input_indices.end()) {
non_const_in_tensors.push_back(in_tensors[i]);
}
}
return non_const_in_tensors;
}
return kernel->in_tensors();
}
bool NPUPassUtils::Scale4dCase(const kernel::LiteKernel *kernel) {
MS_ASSERT(kernel != nullptr && kernel->op_parameter() != nullptr);
auto scale_param = reinterpret_cast<ScaleParameter *>(kernel->op_parameter());
auto in_tensor = kernel->in_tensors().at(1);
return in_tensor->shape().size() == 1 && (scale_param->axis_ == 3 || scale_param->axis_ == -1);
}
void NPUPassUtils::AssistDataNHWC2NCHW(int *data, size_t unit_size) {
MS_ASSERT(data != nullptr);
for (size_t i = 0; i < unit_size; ++i) {
int c = data[3 * unit_size + i];
// n h w c
// n c h w
data[3 * unit_size + i] = data[2 * unit_size + i];
data[2 * unit_size + i] = data[unit_size + i];
data[unit_size + i] = c;
}
}
int NPUPassUtils::MaskDataNHWC2NCHW(int mask) {
int mask_vec[4];
for (int i = 0; i < 4; ++i) {
mask_vec[i] = (uint32_t)(mask) & (1 << i);
}
AssistDataNHWC2NCHW(mask_vec, 1);
int ret = 0;
for (int i = 0; i < 4; ++i) {
if (mask_vec[i]) {
ret += 1 << i;
}
}
return ret;
}
} // namespace mindspore::lite

View File

@ -17,9 +17,12 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_
#include <vector>
#include <set>
#include <string>
#include <unordered_map>
#include "src/lite_kernel.h"
namespace mindspore::lite {
extern std::unordered_map<schema::PrimitiveType, std::set<int>> nodes2const_index;
class NPUPassUtils {
public:
static kernel::LiteKernel *CreateNchw2NhwcKernel(const std::vector<Tensor *> &in_tensors,
@ -52,8 +55,11 @@ class NPUPassUtils {
static bool IsNhwc2Nchw(const kernel::LiteKernel *kernel);
static bool IsNchw2Nhwc(const kernel::LiteKernel *kernel);
static kernel::LiteKernel *KernelInputFromKernel(const kernel::LiteKernel *kernel, size_t in_tensor_index);
static kernel::LiteKernel *KernelInputFromKernel(const kernel::LiteKernel *kernel, Tensor *in_tensor);
static std::vector<Tensor *> GetNonConstInputs(kernel::LiteKernel *kernel);
static bool Scale4dCase(const kernel::LiteKernel *kernel);
static void AssistDataNHWC2NCHW(int *data, size_t unit_size);
static int MaskDataNHWC2NCHW(int mask);
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_

View File

@ -14,7 +14,6 @@
* limitations under the License.
*/
#include "src/runtime/agent/npu/optimizer/npu_transform_pass.h"
#include <set>
#include <vector>
#include "src/lite_kernel.h"
#include "src/runtime/agent/npu/npu_manager.h"
@ -22,7 +21,7 @@
namespace mindspore::lite {
using kernel::KERNEL_ARCH::kNPU;
static std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = {
std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = {
schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_Resize,
schema::PrimitiveType_MaxPoolFusion, schema::PrimitiveType_AvgPoolFusion, schema::PrimitiveType_ScaleFusion};

View File

@ -16,11 +16,14 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_TRANSFORM_PASS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_TRANSFORM_PASS_H_
#include <set>
#include <vector>
#include "src/lite_kernel.h"
#include "src/runtime/agent/npu/optimizer/npu_base_pass.h"
namespace mindspore::lite {
extern std::set<mindspore::schema::PrimitiveType> npu_trans_nodes;
class NPUTransformPass : public NPUBasePass {
public:
int Run() override;

View File

@ -31,7 +31,7 @@ int PadNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std
}
if (inputs.size() >= 2 && inputs[1]->data_c() != nullptr) {
for (int i = 0; i < inputs[1]->ElementsNum(); i++) {
paddings_.push_back(static_cast<int *>(inputs[1]->data_c())[i]);
param_->paddings_[i] = static_cast<int *>(inputs[1]->data_c())[i];
}
} else {
MS_LOG(WARNING) << "NPU axis is attribute.";
@ -50,7 +50,7 @@ int PadNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const
int size = static_cast<int>(param_->padding_length / 2);
ge::TensorDesc padding_tensor_desc(ge::Shape({size, 2}), ge::FORMAT_NCHW, ge::DT_INT32);
ge::TensorPtr padding_tensor = std::make_shared<hiai::Tensor>(padding_tensor_desc);
padding_tensor->SetData(reinterpret_cast<uint8_t *>(paddings_.data()), 2 * size * sizeof(int));
padding_tensor->SetData(reinterpret_cast<uint8_t *>(param_->paddings_), 2 * size * sizeof(int));
hiai_paddings_ = new hiai::op::Const(name_ + "paddings");
hiai_paddings_->set_attr_value(padding_tensor);

View File

@ -39,7 +39,6 @@ class PadNPUKernel : public NPUKernel {
private:
hiai::op::PadV2 *op_ = nullptr;
PadParameter *param_;
std::vector<int> paddings_;
hiai::op::Const *hiai_paddings_ = nullptr;
hiai::op::Const *hiai_constant_ = nullptr;
};

View File

@ -77,3 +77,5 @@ ml_video_edit_img_segment_adaptise_pb2tflite.tflite 0.5 2
ml_video_edit_imitate_filter.onnx 200
hdc_mobilenet_1w_class.onnx 20
hdc_age_medium 504
posenet_mobilenet_float_075_1_default_1.tflite 395
nasnet_mobile.tflite 1