forked from mindspore-Ecosystem/mindspore
!24065 [MSLITE] NPU support inputs from multiple output operators.
Merge pull request !24065 from wangshaocong/bugfix_master_npu
This commit is contained in:
commit
fd0f1e3eaa
|
@ -95,6 +95,10 @@ int NPUExecutor::Run(const std::vector<mindspore::MSTensor> &in_tensors,
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (npu_output_tensors_.size() != out_tensors.size()) {
|
||||
MS_LOG(ERROR) << "The output count is not euqal to ms tensor.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (size_t i = 0; i < npu_output_tensors_.size(); ++i) {
|
||||
mindspore::MSTensor out_tensor = out_tensors[i];
|
||||
auto data = out_tensor.MutableData();
|
||||
|
|
|
@ -66,6 +66,21 @@ int ActivationNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_ten
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ActivationNPUOp::SetNPUInputs(
|
||||
const std::vector<mindspore::MSTensor> &in_tensors, const std::vector<mindspore::MSTensor> &out_tensors,
|
||||
const std::vector<ge::Operator *> &npu_inputs,
|
||||
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) {
|
||||
if (!index2_multi_out_index.empty()) {
|
||||
auto itr = index2_multi_out_index.begin();
|
||||
auto in_op = itr->second.first;
|
||||
MS_CHECK_TRUE_RET(in_op != nullptr, RET_ERROR);
|
||||
act_->SetInput(itr->first, *in_op, itr->second.second);
|
||||
} else {
|
||||
act_->set_input_x(*npu_inputs[0]);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
ge::Operator *ActivationNPUOp::GetNPUOp() { return act_; }
|
||||
|
||||
ActivationNPUOp::~ActivationNPUOp() {
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <unordered_map>
|
||||
#include "include/graph/compatible/all_ops.h"
|
||||
#include "src/delegate/npu/op/npu_op.h"
|
||||
namespace mindspore {
|
||||
|
@ -39,6 +41,10 @@ class ActivationNPUOp : public NPUOp {
|
|||
const std::vector<mindspore::MSTensor> &out_tensors,
|
||||
const std::vector<ge::Operator *> &npu_inputs) override;
|
||||
|
||||
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
|
||||
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
|
||||
|
||||
ge::Operator *GetNPUOp() override;
|
||||
|
||||
private:
|
||||
|
|
|
@ -20,7 +20,8 @@ namespace mindspore {
|
|||
constexpr int ARITHMETIC_INPUT_NUM = 2;
|
||||
int ArithmeticNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (in_tensors[0].Shape().size() != in_tensors[1].Shape().size()) {
|
||||
if (in_tensors[0].Shape().size() != 0 && in_tensors[1].Shape().size() != 0 &&
|
||||
in_tensors[0].Shape().size() != in_tensors[1].Shape().size()) {
|
||||
MS_LOG(WARNING) << name_ << " for the two inputs, the dimension size must be same."
|
||||
<< " size 1 is:" << in_tensors[0].Shape().size() << " size 2 is:" << in_tensors[1].Shape().size();
|
||||
return RET_NOT_SUPPORT;
|
||||
|
|
|
@ -46,6 +46,26 @@ int ConcatNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConcatNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors,
|
||||
const std::vector<ge::Operator *> &npu_inputs,
|
||||
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) {
|
||||
concat_->set_attr_concat_dim(axis_);
|
||||
concat_->set_attr_N(npu_inputs.size());
|
||||
concat_->create_dynamic_input_x(npu_inputs.size());
|
||||
for (auto pair : index2_multi_out_index) {
|
||||
auto in_op = pair.second.first;
|
||||
MS_CHECK_TRUE_RET(in_op != nullptr, RET_ERROR);
|
||||
concat_->SetInput(pair.first, *in_op, pair.second.second);
|
||||
}
|
||||
for (int i = 0; i < npu_inputs.size(); ++i) {
|
||||
if (index2_multi_out_index.find(i) == index2_multi_out_index.end()) {
|
||||
concat_->SetInput(i, *npu_inputs[i], 0);
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
ge::Operator *ConcatNPUOp::GetNPUOp() { return this->concat_; }
|
||||
|
||||
int ConcatNPUOp::HandleAxis() {
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_CONCAT_NPU_H_
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <unordered_map>
|
||||
#include "include/graph/op/all_ops.h"
|
||||
#include "src/delegate/npu/op/npu_op.h"
|
||||
namespace mindspore {
|
||||
|
@ -41,6 +43,10 @@ class ConcatNPUOp : public NPUOp {
|
|||
const std::vector<mindspore::MSTensor> &out_tensors,
|
||||
const std::vector<ge::Operator *> &npu_inputs) override;
|
||||
|
||||
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
|
||||
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
|
||||
|
||||
ge::Operator *GetNPUOp() override;
|
||||
|
||||
int HandleAxis();
|
||||
|
|
|
@ -112,6 +112,36 @@ int ConvolutionNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_te
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConvolutionNPUOp::SetNPUInputs(
|
||||
const std::vector<mindspore::MSTensor> &in_tensors, const std::vector<mindspore::MSTensor> &out_tensors,
|
||||
const std::vector<ge::Operator *> &npu_inputs,
|
||||
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) {
|
||||
auto ret = InitWeightConst(in_tensors);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Set weight and bias for convolution op " << name_ << " failed when running npu";
|
||||
return RET_ERROR;
|
||||
}
|
||||
conv_->set_input_filter(*weight_);
|
||||
if (in_tensors.size() == CONV_INPUT_SIZE) {
|
||||
ret = InitBiasConst(in_tensors);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Set bias for convolution op " << name_ << " failed when running npu";
|
||||
return RET_ERROR;
|
||||
}
|
||||
conv_->set_input_bias(*bias_);
|
||||
}
|
||||
|
||||
if (!index2_multi_out_index.empty()) {
|
||||
auto itr = index2_multi_out_index.begin();
|
||||
auto in_op = itr->second.first;
|
||||
MS_CHECK_TRUE_RET(in_op != nullptr, RET_ERROR);
|
||||
conv_->SetInput(itr->first, *in_op, itr->second.second);
|
||||
} else {
|
||||
conv_->set_input_x(*npu_inputs[0]);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
ge::Operator *ConvolutionNPUOp::GetNPUOp() {
|
||||
if (act_type_ == schema::ActivationType_NO_ACTIVATION) {
|
||||
return conv_;
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <unordered_map>
|
||||
#include "include/graph/op/all_ops.h"
|
||||
#include "src/delegate/npu/op/convolution_base_npu.h"
|
||||
namespace mindspore {
|
||||
|
@ -39,6 +41,10 @@ class ConvolutionNPUOp : public ConvolutionBaseNPUOp {
|
|||
const std::vector<mindspore::MSTensor> &out_tensors,
|
||||
const std::vector<ge::Operator *> &npu_inputs) override;
|
||||
|
||||
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
|
||||
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
|
||||
|
||||
ge::Operator *GetNPUOp() override;
|
||||
|
||||
private:
|
||||
|
|
|
@ -70,7 +70,8 @@ class NPUOp {
|
|||
if (index2_multi_out_index.empty()) {
|
||||
return SetNPUInputs(in_tensors, out_tensors, npu_inputs);
|
||||
}
|
||||
return RET_OK;
|
||||
MS_LOG(ERROR) << "The input operator of npu op: " << this->name() << " has multiple outputs. Override this method.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
virtual ge::Operator *GetNPUOp() { return nullptr; }
|
||||
|
|
|
@ -50,6 +50,23 @@ int ReshapeNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensor
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int ReshapeNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors,
|
||||
const std::vector<ge::Operator *> &npu_inputs,
|
||||
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) {
|
||||
for (auto pair : index2_multi_out_index) {
|
||||
auto in_op = pair.second.first;
|
||||
MS_CHECK_TRUE_RET(in_op != nullptr, RET_ERROR);
|
||||
reshape_->SetInput(pair.first, *in_op, pair.second.second);
|
||||
}
|
||||
for (int i = 0; i < npu_inputs.size(); ++i) {
|
||||
if (index2_multi_out_index.find(i) == index2_multi_out_index.end()) {
|
||||
reshape_->SetInput(i, *npu_inputs[i], 0);
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
ge::Operator *ReshapeNPUOp::GetNPUOp() { return this->reshape_; }
|
||||
|
||||
ReshapeNPUOp::~ReshapeNPUOp() {
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
#define MINDSPORE_LITE_SRC_DELEGATE_NPU_OP_RESHAPE_NPU_H_
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <unordered_map>
|
||||
#include "include/graph/op/all_ops.h"
|
||||
#include "src/delegate/npu/op/npu_op.h"
|
||||
namespace mindspore {
|
||||
|
@ -39,6 +41,10 @@ class ReshapeNPUOp : public NPUOp {
|
|||
const std::vector<mindspore::MSTensor> &out_tensors,
|
||||
const std::vector<ge::Operator *> &npu_inputs) override;
|
||||
|
||||
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
|
||||
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
|
||||
|
||||
ge::Operator *GetNPUOp() override;
|
||||
|
||||
private:
|
||||
|
|
|
@ -157,6 +157,9 @@ int UpdatePreTensors(NPUOp *cur_op) {
|
|||
tensors_vec.resize(cur_op->inputs().size());
|
||||
auto const_index = nodes2const_index[cur_op->type()];
|
||||
for (auto index : const_index) {
|
||||
if (index >= cur_op->inputs().size()) {
|
||||
continue;
|
||||
}
|
||||
tensors_vec[index] = cur_op->inputs()[index];
|
||||
}
|
||||
}
|
||||
|
@ -463,6 +466,7 @@ int NPUFusionPass::Run(NPUGraph *subgraph) {
|
|||
continue;
|
||||
case schema::PrimitiveType_AddFusion:
|
||||
case schema::PrimitiveType_MulFusion:
|
||||
case schema::PrimitiveType_DivFusion:
|
||||
case schema::PrimitiveType_Activation:
|
||||
case schema::PrimitiveType_Eltwise:
|
||||
i -= cur_op->in_ops().size();
|
||||
|
|
|
@ -27,7 +27,7 @@ enum InsertState { InsertNone, PreInsert, PostInsert, BothInsert };
|
|||
std::set<mindspore::schema::PrimitiveType> insert_nodes = {
|
||||
schema::PrimitiveType_Concat, schema::PrimitiveType_AddFusion, schema::PrimitiveType_Eltwise,
|
||||
schema::PrimitiveType_Activation, schema::PrimitiveType_Split, schema::PrimitiveType_PadFusion,
|
||||
schema::PrimitiveType_StridedSlice, schema::PrimitiveType_MulFusion};
|
||||
schema::PrimitiveType_StridedSlice, schema::PrimitiveType_MulFusion, schema::PrimitiveType_DivFusion};
|
||||
|
||||
// this pass goal is to minimize subgraphs generated
|
||||
// by inserting nchw2nhwc or nhwc2nchw before or after the operator (e.g. concat, add, etc..) together with
|
||||
|
@ -136,7 +136,9 @@ int NPUInsertTransformPass::InsertNode(NPUOp *op, NPUOp *post_op, size_t post_in
|
|||
for (auto i = 0; i < in_tensors.size(); ++i) {
|
||||
auto in_tensor = in_tensors[i];
|
||||
auto nhwc_shape = in_tensor.Shape();
|
||||
if (nhwc_shape.size() < 4) {
|
||||
if (nhwc_shape.size() == 0) {
|
||||
continue;
|
||||
} else if (nhwc_shape.size() < 4) {
|
||||
MS_LOG(ERROR) << "nhwc_shape size < " << 4;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
|
|
@ -185,10 +185,6 @@ int NPUTransformPass::Run(NPUGraph *subgraph) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
if (op->type() == schema::PrimitiveType_Resize && op->inputs()[0].Shape()[1] > op->outputs()[0].Shape()[1]) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
// insert pre_ops before op in vector
|
||||
// modify loop index add (pre_ops.size() + 1) to the post_ops insert location
|
||||
std::vector<NPUOp *> pre_ops;
|
||||
|
|
|
@ -29,7 +29,7 @@ deeplabv3_1_default_1.tflite 2.5
|
|||
ml_video_edit_person_divison 0.5
|
||||
ml_video_edit_style_transfer_autoportrait.onnx 9
|
||||
ml_video_edit_style_transfer_candy.onnx 11
|
||||
ml_video_edit_style_transfer_gongnongbing.onnx 10
|
||||
ml_video_edit_style_transfer_gongnongbing.onnx 10.5
|
||||
ml_video_edit_style_transfer_starry.onnx 11
|
||||
porseg_tmp.onnx;2 1
|
||||
ml_video_edit_Mnet 1.5
|
||||
|
@ -134,7 +134,7 @@ hiai_latin_ocr_1.tflite;1:input_0 5.5
|
|||
hiai_PoseEstimation_Pcm.tflite;1:image 12
|
||||
#large precision bias error
|
||||
#hiai_ssd_mobilenetv2_object.tflite;1:image_tensor
|
||||
hiai_cv_focusShootOCRModel_02.tflite;1:input_0 4.5
|
||||
hiai_cv_focusShootOCRModel_02.tflite;1:input_0 5
|
||||
hiai_cv_poseEstimation.tflite;1:Image 37
|
||||
mtk_model_normalize_object_scene_ps_20200519_f16.tflite;1:input_0 3
|
||||
#mtk_age_gender_fp16.tflite;1:img
|
||||
|
@ -215,8 +215,8 @@ Q_AADB_HADB_MBV2_model.tflite;1:input_0 2.5
|
|||
#Q_crnn_ori_v2_405001_notrans_nopre_pb2tflite.tflite;1:input_0
|
||||
#Q_crnn_screen_slim400w_more_20w_pb2tflite.tflite;1:input_0
|
||||
Q_dila-small-mix-full-fineturn-390000-nopixel-nosigmoid_tflite.tflite;1:input 2
|
||||
Q_focusocr_cn_recog.tflite;1:input_0 6
|
||||
Q_focusocr_jk_recog.tflite;1:input_0 4.5
|
||||
Q_focusocr_cn_recog.tflite;1:input_0 8
|
||||
Q_focusocr_jk_recog.tflite;1:input_0 6.5
|
||||
Q_inception-249970-672-11-16_pb2tflite.tflite;1:input 3
|
||||
#Q_isface.tflite;1:data
|
||||
#Q_landmark.tflite;1:img
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
#include "tools/optimizer/fusion/squeeze_fusion.h"
|
||||
#include <memory>
|
||||
#include "schema/inner/model_generated.h"
|
||||
#include "ops/squeeze.h"
|
||||
#include "ops/unsqueeze.h"
|
||||
#include "tools/optimizer/common/gllo_utils.h"
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
|
@ -78,8 +80,19 @@ const AnfNodePtr SqueezeFusion::Process(const FuncGraphPtr &func_graph, const An
|
|||
}
|
||||
auto pre_node = squeeze_node->cast<CNodePtr>()->input(1);
|
||||
|
||||
if (GetCNodePrimitive(unsqueeze_node)->GetAttr(ops::kAxis) ==
|
||||
GetCNodePrimitive(unsqueeze_node)->GetAttr(ops::kAxis)) {
|
||||
auto unsqueeze_primitive = GetCNodePrimitive(unsqueeze_node);
|
||||
auto squeeze_primitive = GetCNodePrimitive(squeeze_node);
|
||||
MS_ASSERT(unsqueeze_primitive != nullptr);
|
||||
MS_ASSERT(squeeze_primitive != nullptr);
|
||||
if (unsqueeze_primitive->GetAttr(ops::kAxis) == nullptr || squeeze_primitive->GetAttr(ops::kAxis) == nullptr) {
|
||||
MS_LOG(ERROR) << "The squeeze or unsqueeze node has no axis value.";
|
||||
return nullptr;
|
||||
}
|
||||
auto unsqueeze_prim = utils::cast<std::shared_ptr<mindspore::ops::Unsqueeze>>(unsqueeze_primitive);
|
||||
auto squeeze_prim = utils::cast<std::shared_ptr<mindspore::ops::Squeeze>>(squeeze_primitive);
|
||||
MS_ASSERT(unsqueeze_prim != nullptr);
|
||||
MS_ASSERT(squeeze_prim != nullptr);
|
||||
if (squeeze_prim->get_axis() == unsqueeze_prim->get_axis()) {
|
||||
auto manager = func_graph->manager();
|
||||
MS_ASSERT(manager != nullptr);
|
||||
(void)manager->Replace(unsqueeze_node, act_node);
|
||||
|
|
Loading…
Reference in New Issue