!24065 [MSLITE] NPU support inputs from multiple output operators.

Merge pull request !24065 from wangshaocong/bugfix_master_npu
This commit is contained in:
i-robot 2021-10-11 07:36:28 +00:00 committed by Gitee
commit fd0f1e3eaa
16 changed files with 141 additions and 14 deletions

View File

@ -95,6 +95,10 @@ int NPUExecutor::Run(const std::vector<mindspore::MSTensor> &in_tensors,
return RET_ERROR;
}
if (npu_output_tensors_.size() != out_tensors.size()) {
MS_LOG(ERROR) << "The output count is not euqal to ms tensor.";
return RET_ERROR;
}
for (size_t i = 0; i < npu_output_tensors_.size(); ++i) {
mindspore::MSTensor out_tensor = out_tensors[i];
auto data = out_tensor.MutableData();

View File

@ -66,6 +66,21 @@ int ActivationNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_ten
return RET_OK;
}
int ActivationNPUOp::SetNPUInputs(
const std::vector<mindspore::MSTensor> &in_tensors, const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs,
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) {
if (!index2_multi_out_index.empty()) {
auto itr = index2_multi_out_index.begin();
auto in_op = itr->second.first;
MS_CHECK_TRUE_RET(in_op != nullptr, RET_ERROR);
act_->SetInput(itr->first, *in_op, itr->second.second);
} else {
act_->set_input_x(*npu_inputs[0]);
}
return RET_OK;
}
ge::Operator *ActivationNPUOp::GetNPUOp() { return act_; }
ActivationNPUOp::~ActivationNPUOp() {

View File

@ -18,6 +18,8 @@
#include <vector>
#include <string>
#include <utility>
#include <unordered_map>
#include "include/graph/compatible/all_ops.h"
#include "src/delegate/npu/op/npu_op.h"
namespace mindspore {
@ -39,6 +41,10 @@ class ActivationNPUOp : public NPUOp {
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) override;
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
ge::Operator *GetNPUOp() override;
private:

View File

@ -20,7 +20,8 @@ namespace mindspore {
constexpr int ARITHMETIC_INPUT_NUM = 2;
int ArithmeticNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (in_tensors[0].Shape().size() != in_tensors[1].Shape().size()) {
if (in_tensors[0].Shape().size() != 0 && in_tensors[1].Shape().size() != 0 &&
in_tensors[0].Shape().size() != in_tensors[1].Shape().size()) {
MS_LOG(WARNING) << name_ << " for the two inputs, the dimension size must be same."
<< " size 1 is:" << in_tensors[0].Shape().size() << " size 2 is:" << in_tensors[1].Shape().size();
return RET_NOT_SUPPORT;

View File

@ -46,6 +46,26 @@ int ConcatNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors
return RET_OK;
}
int ConcatNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs,
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) {
concat_->set_attr_concat_dim(axis_);
concat_->set_attr_N(npu_inputs.size());
concat_->create_dynamic_input_x(npu_inputs.size());
for (auto pair : index2_multi_out_index) {
auto in_op = pair.second.first;
MS_CHECK_TRUE_RET(in_op != nullptr, RET_ERROR);
concat_->SetInput(pair.first, *in_op, pair.second.second);
}
for (int i = 0; i < npu_inputs.size(); ++i) {
if (index2_multi_out_index.find(i) == index2_multi_out_index.end()) {
concat_->SetInput(i, *npu_inputs[i], 0);
}
}
return RET_OK;
}
ge::Operator *ConcatNPUOp::GetNPUOp() { return this->concat_; }
int ConcatNPUOp::HandleAxis() {

View File

@ -18,6 +18,8 @@
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_CONCAT_NPU_H_
#include <vector>
#include <string>
#include <utility>
#include <unordered_map>
#include "include/graph/op/all_ops.h"
#include "src/delegate/npu/op/npu_op.h"
namespace mindspore {
@ -41,6 +43,10 @@ class ConcatNPUOp : public NPUOp {
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) override;
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
ge::Operator *GetNPUOp() override;
int HandleAxis();

View File

@ -112,6 +112,36 @@ int ConvolutionNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_te
return RET_OK;
}
int ConvolutionNPUOp::SetNPUInputs(
const std::vector<mindspore::MSTensor> &in_tensors, const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs,
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) {
auto ret = InitWeightConst(in_tensors);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Set weight and bias for convolution op " << name_ << " failed when running npu";
return RET_ERROR;
}
conv_->set_input_filter(*weight_);
if (in_tensors.size() == CONV_INPUT_SIZE) {
ret = InitBiasConst(in_tensors);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Set bias for convolution op " << name_ << " failed when running npu";
return RET_ERROR;
}
conv_->set_input_bias(*bias_);
}
if (!index2_multi_out_index.empty()) {
auto itr = index2_multi_out_index.begin();
auto in_op = itr->second.first;
MS_CHECK_TRUE_RET(in_op != nullptr, RET_ERROR);
conv_->SetInput(itr->first, *in_op, itr->second.second);
} else {
conv_->set_input_x(*npu_inputs[0]);
}
return RET_OK;
}
ge::Operator *ConvolutionNPUOp::GetNPUOp() {
if (act_type_ == schema::ActivationType_NO_ACTIVATION) {
return conv_;

View File

@ -18,6 +18,8 @@
#include <vector>
#include <string>
#include <utility>
#include <unordered_map>
#include "include/graph/op/all_ops.h"
#include "src/delegate/npu/op/convolution_base_npu.h"
namespace mindspore {
@ -39,6 +41,10 @@ class ConvolutionNPUOp : public ConvolutionBaseNPUOp {
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) override;
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
ge::Operator *GetNPUOp() override;
private:

View File

@ -70,7 +70,8 @@ class NPUOp {
if (index2_multi_out_index.empty()) {
return SetNPUInputs(in_tensors, out_tensors, npu_inputs);
}
return RET_OK;
MS_LOG(ERROR) << "The input operator of npu op: " << this->name() << " has multiple outputs. Override this method.";
return RET_ERROR;
}
virtual ge::Operator *GetNPUOp() { return nullptr; }

View File

@ -50,6 +50,23 @@ int ReshapeNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensor
return RET_OK;
}
int ReshapeNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs,
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) {
for (auto pair : index2_multi_out_index) {
auto in_op = pair.second.first;
MS_CHECK_TRUE_RET(in_op != nullptr, RET_ERROR);
reshape_->SetInput(pair.first, *in_op, pair.second.second);
}
for (int i = 0; i < npu_inputs.size(); ++i) {
if (index2_multi_out_index.find(i) == index2_multi_out_index.end()) {
reshape_->SetInput(i, *npu_inputs[i], 0);
}
}
return RET_OK;
}
ge::Operator *ReshapeNPUOp::GetNPUOp() { return this->reshape_; }
ReshapeNPUOp::~ReshapeNPUOp() {

View File

@ -18,6 +18,8 @@
#define MINDSPORE_LITE_SRC_DELEGATE_NPU_OP_RESHAPE_NPU_H_
#include <vector>
#include <string>
#include <utility>
#include <unordered_map>
#include "include/graph/op/all_ops.h"
#include "src/delegate/npu/op/npu_op.h"
namespace mindspore {
@ -39,6 +41,10 @@ class ReshapeNPUOp : public NPUOp {
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) override;
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
ge::Operator *GetNPUOp() override;
private:

View File

@ -157,6 +157,9 @@ int UpdatePreTensors(NPUOp *cur_op) {
tensors_vec.resize(cur_op->inputs().size());
auto const_index = nodes2const_index[cur_op->type()];
for (auto index : const_index) {
if (index >= cur_op->inputs().size()) {
continue;
}
tensors_vec[index] = cur_op->inputs()[index];
}
}
@ -463,6 +466,7 @@ int NPUFusionPass::Run(NPUGraph *subgraph) {
continue;
case schema::PrimitiveType_AddFusion:
case schema::PrimitiveType_MulFusion:
case schema::PrimitiveType_DivFusion:
case schema::PrimitiveType_Activation:
case schema::PrimitiveType_Eltwise:
i -= cur_op->in_ops().size();

View File

@ -27,7 +27,7 @@ enum InsertState { InsertNone, PreInsert, PostInsert, BothInsert };
std::set<mindspore::schema::PrimitiveType> insert_nodes = {
schema::PrimitiveType_Concat, schema::PrimitiveType_AddFusion, schema::PrimitiveType_Eltwise,
schema::PrimitiveType_Activation, schema::PrimitiveType_Split, schema::PrimitiveType_PadFusion,
schema::PrimitiveType_StridedSlice, schema::PrimitiveType_MulFusion};
schema::PrimitiveType_StridedSlice, schema::PrimitiveType_MulFusion, schema::PrimitiveType_DivFusion};
// this pass goal is to minimize subgraphs generated
// by inserting nchw2nhwc or nhwc2nchw before or after the operator (e.g. concat, add, etc..) together with
@ -136,7 +136,9 @@ int NPUInsertTransformPass::InsertNode(NPUOp *op, NPUOp *post_op, size_t post_in
for (auto i = 0; i < in_tensors.size(); ++i) {
auto in_tensor = in_tensors[i];
auto nhwc_shape = in_tensor.Shape();
if (nhwc_shape.size() < 4) {
if (nhwc_shape.size() == 0) {
continue;
} else if (nhwc_shape.size() < 4) {
MS_LOG(ERROR) << "nhwc_shape size < " << 4;
return RET_ERROR;
}

View File

@ -185,10 +185,6 @@ int NPUTransformPass::Run(NPUGraph *subgraph) {
return RET_ERROR;
}
}
if (op->type() == schema::PrimitiveType_Resize && op->inputs()[0].Shape()[1] > op->outputs()[0].Shape()[1]) {
i++;
continue;
}
// insert pre_ops before op in vector
// modify loop index add (pre_ops.size() + 1) to the post_ops insert location
std::vector<NPUOp *> pre_ops;

View File

@ -29,7 +29,7 @@ deeplabv3_1_default_1.tflite 2.5
ml_video_edit_person_divison 0.5
ml_video_edit_style_transfer_autoportrait.onnx 9
ml_video_edit_style_transfer_candy.onnx 11
ml_video_edit_style_transfer_gongnongbing.onnx 10
ml_video_edit_style_transfer_gongnongbing.onnx 10.5
ml_video_edit_style_transfer_starry.onnx 11
porseg_tmp.onnx;2 1
ml_video_edit_Mnet 1.5
@ -134,7 +134,7 @@ hiai_latin_ocr_1.tflite;1:input_0 5.5
hiai_PoseEstimation_Pcm.tflite;1:image 12
#large precision bias error
#hiai_ssd_mobilenetv2_object.tflite;1:image_tensor
hiai_cv_focusShootOCRModel_02.tflite;1:input_0 4.5
hiai_cv_focusShootOCRModel_02.tflite;1:input_0 5
hiai_cv_poseEstimation.tflite;1:Image 37
mtk_model_normalize_object_scene_ps_20200519_f16.tflite;1:input_0 3
#mtk_age_gender_fp16.tflite;1:img
@ -215,8 +215,8 @@ Q_AADB_HADB_MBV2_model.tflite;1:input_0 2.5
#Q_crnn_ori_v2_405001_notrans_nopre_pb2tflite.tflite;1:input_0
#Q_crnn_screen_slim400w_more_20w_pb2tflite.tflite;1:input_0
Q_dila-small-mix-full-fineturn-390000-nopixel-nosigmoid_tflite.tflite;1:input 2
Q_focusocr_cn_recog.tflite;1:input_0 6
Q_focusocr_jk_recog.tflite;1:input_0 4.5
Q_focusocr_cn_recog.tflite;1:input_0 8
Q_focusocr_jk_recog.tflite;1:input_0 6.5
Q_inception-249970-672-11-16_pb2tflite.tflite;1:input 3
#Q_isface.tflite;1:data
#Q_landmark.tflite;1:img

View File

@ -17,6 +17,8 @@
#include "tools/optimizer/fusion/squeeze_fusion.h"
#include <memory>
#include "schema/inner/model_generated.h"
#include "ops/squeeze.h"
#include "ops/unsqueeze.h"
#include "tools/optimizer/common/gllo_utils.h"
#include "nnacl/op_base.h"
@ -78,8 +80,19 @@ const AnfNodePtr SqueezeFusion::Process(const FuncGraphPtr &func_graph, const An
}
auto pre_node = squeeze_node->cast<CNodePtr>()->input(1);
if (GetCNodePrimitive(unsqueeze_node)->GetAttr(ops::kAxis) ==
GetCNodePrimitive(unsqueeze_node)->GetAttr(ops::kAxis)) {
auto unsqueeze_primitive = GetCNodePrimitive(unsqueeze_node);
auto squeeze_primitive = GetCNodePrimitive(squeeze_node);
MS_ASSERT(unsqueeze_primitive != nullptr);
MS_ASSERT(squeeze_primitive != nullptr);
if (unsqueeze_primitive->GetAttr(ops::kAxis) == nullptr || squeeze_primitive->GetAttr(ops::kAxis) == nullptr) {
MS_LOG(ERROR) << "The squeeze or unsqueeze node has no axis value.";
return nullptr;
}
auto unsqueeze_prim = utils::cast<std::shared_ptr<mindspore::ops::Unsqueeze>>(unsqueeze_primitive);
auto squeeze_prim = utils::cast<std::shared_ptr<mindspore::ops::Squeeze>>(squeeze_primitive);
MS_ASSERT(unsqueeze_prim != nullptr);
MS_ASSERT(squeeze_prim != nullptr);
if (squeeze_prim->get_axis() == unsqueeze_prim->get_axis()) {
auto manager = func_graph->manager();
MS_ASSERT(manager != nullptr);
(void)manager->Replace(unsqueeze_node, act_node);