reconstuct npu scale op and insert transpose pass

This commit is contained in:
zengxianglong 2022-01-29 01:10:37 +08:00
parent f6e83e0383
commit c564c628b5
34 changed files with 660 additions and 568 deletions

View File

@ -15,7 +15,7 @@
*/
#include "src/delegate/npu/npu_converter_utils.h"
#include "src/common/log_adapter.h"
#include "src/delegate/npu/op/npu_op.h"
namespace mindspore {
#define C4NUM 4
#define C8NUM 8
@ -55,7 +55,7 @@ void Float16ToFloat32(const float16_t *__restrict input, float *__restrict outpu
#endif
ge::Shape ConverterToNPUShape(const std::vector<int64_t> &src_shape, bool is_expand_4d) {
vector<int64_t> shapes;
std::vector<int64_t> shapes;
shapes.reserve(src_shape.size());
for (int i = 0; i < src_shape.size(); i++) {
shapes.push_back(src_shape[i]);
@ -64,8 +64,7 @@ ge::Shape ConverterToNPUShape(const std::vector<int64_t> &src_shape, bool is_exp
if (shapes.size() == 1) {
return ge::Shape({1, shapes[0], 1, 1});
} else {
const int dimension4 = 4;
for (int i = src_shape.size(); i < dimension4; i++) {
for (int i = src_shape.size(); i < NPU_SHAPE_SIZE; i++) {
shapes.push_back(1);
}
}
@ -204,23 +203,23 @@ int TransFormAxis(int axis) {
void AssistDataNHWC2NCHW(int *data, size_t unit_size) {
MS_ASSERT(data != nullptr);
for (size_t i = 0; i < unit_size; ++i) {
int c = data[3 * unit_size + i];
int org_c = data[NHWC_C * unit_size + i];
// n h w c
// n c h w
data[3 * unit_size + i] = data[2 * unit_size + i];
data[2 * unit_size + i] = data[unit_size + i];
data[unit_size + i] = c;
data[NCHW_W * unit_size + i] = data[NHWC_W * unit_size + i];
data[NCHW_H * unit_size + i] = data[NHWC_H * unit_size + i];
data[NCHW_C * unit_size + i] = org_c;
}
}
int MaskDataNHWC2NCHW(int mask) {
int mask_vec[4];
for (int i = 0; i < 4; ++i) {
int mask_vec[NPU_SHAPE_SIZE];
for (int i = 0; i < NPU_SHAPE_SIZE; ++i) {
mask_vec[i] = (uint32_t)(mask) & (1 << i);
}
AssistDataNHWC2NCHW(mask_vec, 1);
int ret = 0;
for (int i = 0; i < 4; ++i) {
for (int i = 0; i < NPU_SHAPE_SIZE; ++i) {
if (mask_vec[i]) {
ret += 1 << i;
}

View File

@ -27,6 +27,9 @@
#include "include/graph/op/array_defs.h"
#include "include/api/types.h"
#include "include/api/data_type.h"
#include "include/graph/op/all_ops.h"
#include "src/common/log_adapter.h"
#include "nnacl/op_base.h"
namespace mindspore {
enum NCHW_SHAPE { NCHW_INVALID = -1, NCHW_N = 0, NCHW_C = 1, NCHW_H = 2, NCHW_W = 3 };
@ -91,5 +94,29 @@ int TransFormAxis(int axis);
void AssistDataNHWC2NCHW(int *data, size_t unit_size);
int MaskDataNHWC2NCHW(int mask);
template <typename T>
ge::Operator *GetNPUConst(const uint8_t *const_data, const std::vector<int64_t> &shape, const ge::DataType data_type,
std::string name = "const", bool is_expand_4d = false) {
MS_CHECK_TRUE_MSG(const_data != nullptr, nullptr, "Const data can not be nullptr.");
int element_num = 1;
if (!shape.empty()) {
for (size_t i = 0; i < shape.size(); i++) {
MS_CHECK_GT(shape.at(i), 0, nullptr);
MS_CHECK_INT_MUL_NOT_OVERFLOW(element_num, shape.at(i), nullptr);
element_num *= shape.at(i);
}
}
ge::TensorDesc const_tensor_desc(ConverterToNPUShape(shape, is_expand_4d), ge::FORMAT_NCHW, data_type);
ge::TensorPtr const_tensor = std::make_shared<hiai::Tensor>(const_tensor_desc);
const_tensor->SetData(const_data, element_num * sizeof(T));
auto const_op = new (std::nothrow) hiai::op::Const(name);
if (const_op == nullptr) {
MS_LOG(ERROR) << "New Const op failed.";
return const_op;
}
const_op->set_attr_value(const_tensor);
return const_op;
}
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_NPU_CONVERTER_UITLS_H_

View File

@ -52,12 +52,12 @@
#include "src/delegate/npu/op/transpose_npu.h"
#include "src/delegate/npu/op/unsqueeze_npu.h"
#include "src/delegate/npu/op/abs_npu.h"
#include "src/delegate/npu/op/flatten_npu.h"
#include "src/delegate/npu/npu_graph.h"
#include "src/delegate/delegate_utils.h"
#include "src/delegate/npu/pass/npu_transform_pass.h"
#include "src/delegate/npu/pass/npu_insert_transform_pass.h"
#include "src/delegate/npu/pass/npu_fusion_pass.h"
#include "src/delegate/npu/pass/npu_infer_format_pass.h"
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
@ -97,13 +97,6 @@ Status NPUDelegate::AddPasses() {
return mindspore::kLiteNullptr;
}
pass_manager_->AddPass(fusion_pass);
auto infer_format_pass = new (std::nothrow) NPUInferFormatPass();
if (infer_format_pass == nullptr) {
MS_LOG(ERROR) << "New NPUInferFormatPass failed.";
return mindspore::kLiteNullptr;
}
pass_manager_->AddPass(infer_format_pass);
return mindspore::kSuccess;
}

View File

@ -16,6 +16,7 @@
#include "src/delegate/npu/npu_executor.h"
#include <unordered_map>
#include <set>
#include "include/errorcode.h"
#include "src/delegate/npu/npu_manager.h"
#include "src/common/log_adapter.h"
@ -73,7 +74,8 @@ bool IsSameShapeTensor(mindspore::MSTensor tensor, const std::shared_ptr<hiai::A
}
int NPUExecutor::Run(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<NPUOp *> &in_ops) {
const std::vector<mindspore::MSTensor> &valid_out_tensors,
const std::vector<mindspore::MSTensor> &all_out_tensors, const std::vector<NPUOp *> &out_ops) {
hiai::AiContext context;
for (size_t i = 0; i < npu_input_tensors_.size(); ++i) {
MS_CHECK_TRUE_RET(i < input_relationship_.size() && input_relationship_.at(i) < in_tensors.size(), RET_ERROR);
@ -97,19 +99,32 @@ int NPUExecutor::Run(const std::vector<mindspore::MSTensor> &in_tensors,
return RET_ERROR;
}
if (npu_output_tensors_.size() != out_tensors.size()) {
MS_LOG(ERROR) << "The output count is not euqal to ms tensor.";
// if the multi-output op is the graph out op, all of its output tensor will be treat as graph output for om model.
std::set<schema::PrimitiveType> multi_output_list = {schema::PrimitiveType_Split};
bool has_multi_output_op = false;
for (auto out_op : out_ops) {
if (std::find(multi_output_list.begin(), multi_output_list.end(), out_op->type()) != multi_output_list.end()) {
has_multi_output_op = true;
break;
}
}
if (npu_output_tensors_.size() != all_out_tensors.size() ||
(!has_multi_output_op && npu_output_tensors_.size() != valid_out_tensors.size())) {
MS_LOG(ERROR) << "The output count (" << npu_output_tensors_.size() << ") is not equal to ms tensor ("
<< all_out_tensors.size() << ").";
return RET_ERROR;
}
for (size_t i = 0; i < npu_output_tensors_.size(); ++i) {
mindspore::MSTensor out_tensor = out_tensors[i];
auto data = out_tensor.MutableData();
if (data == nullptr) {
MS_LOG(ERROR) << "For " << model_name_ << ", the output tensor " << out_tensors[i].Name() << " data is nullptr";
return RET_ERROR;
mindspore::MSTensor out_tensor = all_out_tensors[i];
if (std::find(valid_out_tensors.begin(), valid_out_tensors.end(), out_tensor) != valid_out_tensors.end()) {
auto data = out_tensor.MutableData();
if (data == nullptr) {
MS_LOG(ERROR) << "For " << model_name_ << ", the output tensor " << out_tensor.Name() << " data is nullptr";
return RET_ERROR;
}
memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
}
memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
}
return RET_OK;
}

View File

@ -33,8 +33,8 @@ class NPUExecutor {
~NPUExecutor();
int Prepare();
int Run(const std::vector<mindspore::MSTensor> &in_tensors, const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<NPUOp *> &in_ops);
int Run(const std::vector<mindspore::MSTensor> &in_tensors, const std::vector<mindspore::MSTensor> &valid_out_tensors,
const std::vector<mindspore::MSTensor> &all_out_tensors, const std::vector<NPUOp *> &out_ops);
void InitInputMappingRelationShip(const std::vector<size_t> &input_index) { input_relationship_ = input_index; }

View File

@ -37,6 +37,7 @@ NPUSubGraph::~NPUSubGraph() {
subgraph_input_ops_.clear();
subgraph_output_ops_.clear();
out_tensor_sorted_.clear();
all_tensors_from_out_ops_.clear();
for (auto op : op_buffer_) {
delete op;
}
@ -61,11 +62,11 @@ void NPUSubGraph::set_input(mindspore::MSTensor in_tensor, int index) {
}
void NPUSubGraph::set_output(mindspore::MSTensor out_tensor, int index) {
MS_ASSERT(index < out_tensor_sorted_.size());
MS_ASSERT(index < outputs_.size());
auto origin_tensor = outputs_[index];
for (size_t i = 0; i < out_tensor_sorted_.size(); i++) {
if (out_tensor_sorted_[i] == origin_tensor) {
out_tensor_sorted_[i] = out_tensor;
for (size_t i = 0; i < all_tensors_from_out_ops_.size(); i++) {
if (all_tensors_from_out_ops_[i] == origin_tensor) {
all_tensors_from_out_ops_[i] = out_tensor;
}
}
outputs_[index] = out_tensor;
@ -146,7 +147,7 @@ std::shared_ptr<domi::ModelBufferData> NPUSubGraph::BuildIRModel() {
return om_model_buff;
}
int NPUSubGraph::Execute() { return executor_->Run(inputs(), out_tensor_sorted_, in_ops_); }
int NPUSubGraph::Execute() { return executor_->Run(inputs(), outputs(), all_tensors_from_out_ops_, out_ops_); }
int NPUSubGraph::BuildNPUInputOp() {
int count = 0;
@ -242,12 +243,9 @@ int NPUSubGraph::BuildNPUOutputOp() {
MS_LOG(ERROR) << "Get NPU operators failed.";
return RET_ERROR;
}
out_tensor_sorted_.resize(outputs().size());
int i = 0;
for (auto node : out_ops_) {
for (const auto &tensor : node->outputs()) {
if (std::find(outputs().begin(), outputs().end(), tensor) != outputs().end())
this->out_tensor_sorted_[i++] = tensor;
all_tensors_from_out_ops_.emplace_back(tensor);
}
}
if (subgraph_output_ops_.empty()) {

View File

@ -73,6 +73,8 @@ class NPUSubGraph : public kernel::Kernel {
std::vector<mindspore::MSTensor> out_tensor_sorted_;
std::vector<mindspore::MSTensor> all_tensors_from_out_ops_;
std::vector<ge::Operator *> op_buffer_;
std::vector<NPUOp *> npu_ops_{};

View File

@ -31,18 +31,9 @@ int ConcatNPUOp::Init(const schema::Primitive *primitive, const std::vector<mind
return RET_ERROR;
}
axis_ = concat_prim->axis();
return RET_OK;
}
int ConcatNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) {
concat_->set_attr_concat_dim(axis_);
concat_->set_attr_N(npu_inputs.size());
concat_->create_dynamic_input_x(npu_inputs.size());
for (int i = 0; i < npu_inputs.size(); ++i) {
concat_->set_dynamic_input_x(i + 1, *npu_inputs[i]);
}
auto input_num = in_tensors.size();
concat_->set_attr_N(input_num);
concat_->create_dynamic_input_x(input_num);
return RET_OK;
}
@ -51,8 +42,6 @@ int ConcatNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors
const std::vector<ge::Operator *> &npu_inputs,
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) {
concat_->set_attr_concat_dim(axis_);
concat_->set_attr_N(npu_inputs.size());
concat_->create_dynamic_input_x(npu_inputs.size());
for (auto pair : index2_multi_out_index) {
auto in_op = pair.second.first;
MS_CHECK_TRUE_RET(in_op != nullptr, RET_ERROR);

View File

@ -39,10 +39,6 @@ class ConcatNPUOp : public NPUOp {
int Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) override;
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;

View File

@ -93,27 +93,6 @@ int ConvolutionInt8NPUOp::Init(const schema::Primitive *primitive, const std::ve
return RET_OK;
}
int ConvolutionInt8NPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) {
auto ret = InitWeightConst(in_tensors);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Set weight and bias for convolution op " << name_ << " failed when running npu";
return RET_ERROR;
}
conv_->set_input_filter(*weight_);
if (in_tensors.size() == CONV_INPUT_SIZE) {
ret = InitBiasConst(in_tensors);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Set bias for convolution op " << name_ << " failed when running npu";
return RET_ERROR;
}
conv_->set_input_bias(*bias_);
}
conv_->set_input_x(*npu_inputs[0]);
return RET_OK;
}
int ConvolutionInt8NPUOp::SetNPUInputs(
const std::vector<mindspore::MSTensor> &in_tensors, const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs,

View File

@ -37,10 +37,6 @@ class ConvolutionInt8NPUOp : public ConvolutionBaseNPUOp {
int Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) override;
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;

View File

@ -97,27 +97,6 @@ int ConvolutionNPUOp::Init(const schema::Primitive *primitive, const std::vector
return RET_OK;
}
int ConvolutionNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) {
auto ret = InitWeightConst(in_tensors);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Set weight and bias for convolution op " << name_ << " failed when running npu";
return RET_ERROR;
}
conv_->set_input_filter(*weight_);
if (in_tensors.size() == CONV_INPUT_SIZE) {
ret = InitBiasConst(in_tensors);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Set bias for convolution op " << name_ << " failed when running npu";
return RET_ERROR;
}
conv_->set_input_bias(*bias_);
}
conv_->set_input_x(*npu_inputs[0]);
return RET_OK;
}
int ConvolutionNPUOp::SetNPUInputs(
const std::vector<mindspore::MSTensor> &in_tensors, const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs,

View File

@ -37,10 +37,6 @@ class ConvolutionNPUOp : public ConvolutionBaseNPUOp {
int Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) override;
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;

View File

@ -33,17 +33,25 @@ int EltwiseNPUOp::Init(const schema::Primitive *primitive, const std::vector<min
return RET_ERROR;
}
eltwise_->set_attr_mode(ConverterToNPUEltwiseMode(eltwise_prim->mode()));
int size = in_tensors.size();
eltwise_->create_dynamic_input_x(size);
eltwise_->set_attr_N(size);
auto input_num = in_tensors.size();
eltwise_->create_dynamic_input_x(input_num);
eltwise_->set_attr_N(input_num);
return RET_OK;
}
int EltwiseNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) {
const std::vector<ge::Operator *> &npu_inputs,
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) {
for (auto pair : index2_multi_out_index) {
auto in_op = pair.second.first;
MS_CHECK_TRUE_RET(in_op != nullptr, RET_ERROR);
eltwise_->SetInput(pair.first, *in_op, pair.second.second);
}
for (int i = 0; i < npu_inputs.size(); ++i) {
eltwise_->set_dynamic_input_x(i + 1, *npu_inputs[i]);
if (index2_multi_out_index.find(i) == index2_multi_out_index.end()) {
eltwise_->SetInput(i, *npu_inputs[i], 0);
}
}
return RET_OK;
}

View File

@ -18,6 +18,8 @@
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_ELTWISE_NPU_H_
#include <vector>
#include <string>
#include <utility>
#include <unordered_map>
#include "include/graph/op/all_ops.h"
#include "src/delegate/npu/op/npu_op.h"
@ -39,8 +41,8 @@ class EltwiseNPUOp : public NPUOp {
const std::vector<mindspore::MSTensor> &out_tensors) override;
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) override;
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
ge::Operator *GetNPUOp() override;

View File

@ -0,0 +1,56 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/delegate/npu/op/flatten_npu.h"
#include "include/graph/op/all_ops.h"
#include "src/delegate/npu/npu_converter_utils.h"
namespace mindspore {
int FlattenNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (out_tensors.at(0).Shape().size() != C2NUM) {
MS_LOG(WARNING) << "The output tensor can only be flatten to 2 dimension.";
return RET_NOT_SUPPORT;
}
return RET_OK;
}
int FlattenNPUOp::Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
flatten_ = new (std::nothrow) hiai::op::Flatten(name_);
if (flatten_ == nullptr) {
MS_LOG(ERROR) << name_ << " op is nullptr";
return RET_ERROR;
}
return RET_OK;
}
int FlattenNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) {
flatten_->set_input_x(*npu_inputs[0]);
return RET_OK;
}
ge::Operator *FlattenNPUOp::GetNPUOp() { return this->flatten_; }
FlattenNPUOp::~FlattenNPUOp() {
if (flatten_ != nullptr) {
delete flatten_;
flatten_ = nullptr;
}
}
} // namespace mindspore

View File

@ -0,0 +1,48 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_FLATTEN_NPU_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_FLATTEN_NPU_H_
#include <vector>
#include <string>
#include "include/graph/op/all_ops.h"
#include "src/delegate/npu/op/npu_op.h"
namespace mindspore {
class FlattenNPUOp : public NPUOp {
public:
FlattenNPUOp(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, std::string name)
: NPUOp(primitive, in_tensors, out_tensors, name) {}
~FlattenNPUOp() override;
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
int Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) override;
ge::Operator *GetNPUOp() override;
private:
hiai::op::Flatten *flatten_ = nullptr;
};
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_FLATTEN_NPU_H_

View File

@ -55,9 +55,6 @@ int MatMulNPUOp::Init(const schema::Primitive *primitive, const std::vector<mind
MS_LOG(ERROR) << "New matmul npu operator for op " << name_ << " failed.";
return RET_ERROR;
}
if (in_tensors.size() == MATMUL_INPUT_SIZE) {
has_bias_ = true;
}
auto matmul_prim = primitive->value_as_MatMulFusion();
if (matmul_prim == nullptr) {
MS_LOG(ERROR) << "Get null primitive value for op ." << name_;
@ -66,6 +63,15 @@ int MatMulNPUOp::Init(const schema::Primitive *primitive, const std::vector<mind
matmul_->set_attr_transpose_x1(matmul_prim->transpose_a());
matmul_->set_attr_transpose_x2(matmul_prim->transpose_b());
act_type_ = matmul_prim->activation_type();
if (in_tensors.size() == MATMUL_INPUT_SIZE) {
has_bias_ = true;
add_op_ = new (std::nothrow) hiai::op::Add(name_ + "_add");
if (add_op_ == nullptr) {
MS_LOG(ERROR) << "new add op failed.";
return RET_ERROR;
}
}
return RET_OK;
}
@ -75,11 +81,6 @@ int MatMulNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors
matmul_->set_input_x1(*npu_inputs[0]);
matmul_->set_input_x2(*npu_inputs[1]);
if (has_bias_) {
add_op_ = new (std::nothrow) hiai::op::Add(name_ + "_add");
if (add_op_ == nullptr) {
MS_LOG(ERROR) << "new add op failed.";
return RET_ERROR;
}
add_op_->set_input_x1(*matmul_);
auto bias_shape = in_tensors[BIAS_INDEX].Shape();
auto bias_tensor = ConverterToNPUTensor(in_tensors[BIAS_INDEX]);
@ -104,7 +105,7 @@ int MatMulNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors
}
if (act_type_ != schema::ActivationType_NO_ACTIVATION) {
int ret = RET_ERROR;
if (has_bias_ == true) {
if (has_bias_) {
ret = SetActivation(add_op_);
} else {
ret = SetActivation(matmul_);

View File

@ -133,7 +133,8 @@ NPUOp *GetNPUOp(const schema::Primitive *primitive, const std::vector<mindspore:
return nullptr;
}
std::set<schema::PrimitiveType> int32_lists = {schema::PrimitiveType_Cast, schema::PrimitiveType_StridedSlice};
std::set<schema::PrimitiveType> int32_lists = {schema::PrimitiveType_Cast, schema::PrimitiveType_StridedSlice,
schema::PrimitiveType_Reshape, schema::PrimitiveType_ReduceFusion};
auto support_int32 = in_tensors[0].DataType() == DataType::kNumberTypeInt32 &&
find(int32_lists.begin(), int32_lists.end(), primitive->value_type()) != int32_lists.end();
if (in_tensors[0].DataType() != DataType::kNumberTypeFloat32 &&

View File

@ -32,7 +32,7 @@ int ReduceNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector
return RET_NOT_SUPPORT;
}
reduce_mode_ = reduce_prim->mode();
if (reduce_mode_ != schema::ReduceMode_ReduceMean) {
if (reduce_mode_ != schema::ReduceMode_ReduceMean && reduce_mode_ != schema::ReduceMode_ReduceSum) {
MS_LOG(WARNING) << "Npu does not support reduce mode " << reduce_prim->mode() << " for op " << name_;
return RET_NOT_SUPPORT;
}
@ -58,6 +58,14 @@ int ReduceNPUOp::Init(const schema::Primitive *primitive, const std::vector<mind
}
reduce_mean->set_attr_keep_dims(reduce_prim->keep_dims());
reduce_ = reduce_mean;
} else if (reduce_mode_ == schema::ReduceMode_ReduceSum) {
auto reduce_sum = new (std::nothrow) hiai::op::ReduceSum(name_);
if (reduce_sum == nullptr) {
MS_LOG(ERROR) << "New reduce operator for op " << name_ << " failed.";
return RET_ERROR;
}
reduce_sum->set_attr_keep_dims(reduce_prim->keep_dims());
reduce_ = reduce_sum;
} else {
MS_LOG(ERROR) << "Npu does not support reduce mode " << reduce_prim->mode() << " for op " << name_;
return RET_ERROR;
@ -71,6 +79,9 @@ int ReduceNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors
if (reduce_mode_ == schema::ReduceMode_ReduceMean) {
auto reduce_mean = reinterpret_cast<hiai::op::ReduceMean *>(reduce_);
reduce_mean->set_input_x(*npu_inputs[0]).set_input_axes(*npu_inputs[1]);
} else if (reduce_mode_ == schema::ReduceMode_ReduceSum) {
auto reduce_sum = reinterpret_cast<hiai::op::ReduceSum *>(reduce_);
reduce_sum->set_input_x(*npu_inputs[0]).set_input_axes(*npu_inputs[1]);
}
return RET_OK;
}

View File

@ -20,13 +20,18 @@
namespace mindspore {
int ReshapeNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (in_tensors.size() != 2) {
MS_LOG(WARNING) << "Npu op should have 2 input tensors.";
if (in_tensors.size() != kInputSize1) {
MS_LOG(WARNING) << "NPU op should have 2 input tensors.";
return RET_NOT_SUPPORT;
}
auto shape_tensor = in_tensors.at(1);
if (shape_tensor.Data() == nullptr) {
MS_LOG(WARNING) << "Npu reshape op only supports const shape.";
MS_LOG(WARNING) << "NPU Reshape op only supports const shape.";
return RET_NOT_SUPPORT;
}
if (shape_tensor.Shape().size() > 1 || shape_tensor.ElementNum() > NPU_SHAPE_SIZE) {
MS_LOG(WARNING) << "For NPU Reshape op, the shape tensor should be a one-dimension tensor and its element number "
"should be less than 4.";
return RET_NOT_SUPPORT;
}
return RET_OK;
@ -42,14 +47,6 @@ int ReshapeNPUOp::Init(const schema::Primitive *primitive, const std::vector<min
return RET_OK;
}
int ReshapeNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) {
reshape_->set_input_x(*npu_inputs[0]);
reshape_->set_input_shape(*npu_inputs[1]);
return RET_OK;
}
int ReshapeNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs,

View File

@ -37,10 +37,6 @@ class ReshapeNPUOp : public NPUOp {
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) override;
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;

View File

@ -30,88 +30,122 @@ int ScaleNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector<
MS_LOG(ERROR) << "Get null primitive value for op: " << name_;
return RET_ERROR;
}
auto input_dims = in_tensors.at(INPUT_INDEX).Shape().size();
axis_ = scale_prim->axis();
if (axis_ < 0) {
axis_ = axis_ + in_tensors[INPUT_INDEX].Shape().size();
axis_ = axis_ + input_dims;
}
if (axis_ != NHWC_C && axis_ != NCHW_C) {
if (in_tensors.size() <= BIAS_INDEX) {
MS_LOG(INFO) << "Npu Scale op does not support axis: " << axis_ << ", trying to convert to Mul op.";
use_mul_ = true;
return RET_OK;
} else {
MS_LOG(WARNING) << "Npu Scale axis attr only support 1 or channel, now is " << axis_;
return RET_NOT_SUPPORT;
}
}
if (input_dims < NPU_SHAPE_SIZE) {
need_expand_ = true;
}
return RET_OK;
}
int ScaleNPUOp::Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
if (!use_mul_) {
// note that Scale only support the default axis(i.e., 1), setting axis is meaningless.
op_ = new (std::nothrow) hiai::op::Scale(name_);
} else {
op_ = new (std::nothrow) hiai::op::Mul(name_);
}
if (op_ == nullptr) {
MS_LOG(ERROR) << name_ << " op is nullptr";
return RET_ERROR;
}
auto scale_prim = primitive->value_as_ScaleFusion();
if (scale_prim == nullptr) {
MS_LOG(ERROR) << "Get null primitive value for op ." << name_;
return RET_ERROR;
}
if (use_mul_) {
mul_ = new (std::nothrow) hiai::op::Mul(name_ + "_mul");
if (mul_ == nullptr) {
MS_LOG(ERROR) << "New Mul npu operator for op " << name_ << "_mul failed.";
return RET_ERROR;
}
scale_ops_.emplace_back(mul_);
} else {
// note that Scale only support the default axis(i.e., 1), setting axis is meaningless.
scale_ = new (std::nothrow) hiai::op::Scale(name_);
if (scale_ == nullptr) {
MS_LOG(ERROR) << "New Scale npu operator for op " << name_ << " failed.";
return RET_ERROR;
}
scale_ops_.emplace_back(scale_);
}
if (need_expand_) {
out_reshape_ = new (std::nothrow) hiai::op::Reshape(name_ + "_restore");
if (out_reshape_ == nullptr) {
MS_LOG(ERROR) << "New Reshape npu operator for op " << name_ << "_restore failed.";
return RET_ERROR;
}
scale_ops_.emplace_back(out_reshape_);
}
act_type_ = scale_prim->activation_type();
if (act_type_ != schema::ActivationType_NO_ACTIVATION) {
auto ret = SetActivation(op_);
if (ret != RET_OK) {
act_ = new (std::nothrow) hiai::op::Activation(name_ + "_act");
if (act_ == nullptr) {
MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed.";
return ret;
return RET_ERROR;
}
}
return RET_OK;
}
int ScaleNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) {
MS_ASSERT(in_tensors.size() > SCALE_INDEX);
if (use_mul_) {
auto ret = ConvertScaleToMul(npu_inputs, op_, in_tensors);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Convert Scale to Mul failed, op name: " << name_;
}
return ret;
}
auto scale_op = reinterpret_cast<hiai::op::Scale *>(op_);
scale_op->set_input_x(*npu_inputs.at(INPUT_INDEX));
scale_op->set_input_scale(*npu_inputs.at(SCALE_INDEX));
if (in_tensors.size() > BIAS_INDEX && in_tensors[BIAS_INDEX] != nullptr) {
scale_op->set_input_bias(*npu_inputs.at(BIAS_INDEX));
scale_ops_.emplace_back(act_);
}
return RET_OK;
}
ge::Operator *ScaleNPUOp::GetNPUOp() {
if (act_type_ == schema::ActivationType_NO_ACTIVATION) {
return op_;
} else {
if (act_type_ != schema::ActivationType_NO_ACTIVATION) {
return act_;
} else if (use_mul_) {
return mul_;
} else if (need_expand_) {
return out_reshape_;
} else {
return scale_;
}
}
int ScaleNPUOp::SetActivation(const ge::Operator *input) {
act_ = new (std::nothrow) hiai::op::Activation(name_ + "_act");
if (act_ == nullptr) {
MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed.";
return RET_ERROR;
int ScaleNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) {
if (use_mul_) {
auto ret = ConvertScaleToMul(npu_inputs, in_tensors);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Convert Scale to Mul failed, op name: " << name_;
return RET_ERROR;
}
} else {
auto ret = Adopt4DScale(npu_inputs, in_tensors);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Adopt 4D Scale op failed, op name: " << name_;
return RET_ERROR;
}
}
act_->set_input_x(*input);
if (act_type_ != schema::ActivationType_NO_ACTIVATION) {
auto ret = SetActivation();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Set Activation failed, op name: " << name_;
return RET_ERROR;
}
}
return RET_OK;
}
int ScaleNPUOp::SetActivation() {
ge::Operator *act_input = nullptr;
if (use_mul_) {
act_input = mul_;
} else if (need_expand_) {
act_input = out_reshape_;
} else {
act_input = scale_;
}
MS_CHECK_TRUE_MSG(act_input != nullptr, RET_ERROR, "Scale activation input is nullptr.");
act_->set_input_x(*act_input);
auto act_mode = ConverterToNPUActivationMode(act_type_);
if (act_mode == ACTIVATION_INVALID) {
MS_LOG(ERROR) << "Unsupported activation type for scale op " << name_;
@ -121,69 +155,138 @@ int ScaleNPUOp::SetActivation(const ge::Operator *input) {
return RET_OK;
}
int ScaleNPUOp::ConvertScaleToMul(const std::vector<ge::Operator *> &npu_inputs, ge::Operator *cur_op,
int ScaleNPUOp::ConvertScaleToMul(const std::vector<ge::Operator *> &npu_inputs,
const std::vector<mindspore::MSTensor> &in_tensors) {
auto input_shape = in_tensors[INPUT_INDEX].Shape();
auto scale_shape = in_tensors[SCALE_INDEX].Shape();
auto mul_op = reinterpret_cast<hiai::op::Mul *>(cur_op);
mul_op->set_input_x1(*npu_inputs.at(INPUT_INDEX));
auto input_shape = in_tensors.at(INPUT_INDEX).Shape();
auto scale_shape = in_tensors.at(SCALE_INDEX).Shape();
mul_->set_input_x1(*npu_inputs.at(INPUT_INDEX));
if (input_shape.size() == scale_shape.size()) {
mul_op->set_input_x2(*npu_inputs.at(SCALE_INDEX));
mul_->set_input_x2(*npu_inputs.at(SCALE_INDEX));
} else {
int valid_shape[NPU_SHAPE_SIZE] = {1, 1, 1, 1};
int64_t valid_dims = input_shape.size();
std::vector<int> valid_shape(valid_dims, 1);
for (size_t i = 0; i < scale_shape.size(); i++) {
valid_shape[axis_ + i] = static_cast<int>(scale_shape[i]);
}
reshape_ = new (std::nothrow) hiai::op::Reshape(name_ + "_reshape");
if (reshape_ == nullptr) {
MS_LOG(ERROR) << "New Reshape npu operator for op " << name_ << "_reshape failed.";
auto reshape = new (std::nothrow) hiai::op::Reshape(name_ + "_mul_reshape");
if (reshape == nullptr) {
MS_LOG(ERROR) << "New Reshape npu operator for op " << name_ << "_mul_reshape failed.";
return RET_ERROR;
}
std::shared_ptr<ge::Tensor> shape_tensor = std::make_shared<ge::Tensor>();
if (shape_tensor == nullptr) {
MS_LOG(ERROR) << "new shape_tensor failed.";
scale_ops_.emplace_back(reshape);
auto valid_data_ptr = reinterpret_cast<const uint8_t *>(valid_shape.data());
auto shape = GetNPUConst<int>(valid_data_ptr, {valid_dims}, ge::DT_INT32, name_ + "_mul_expand_shape");
if (shape == nullptr) {
MS_LOG(ERROR) << "Get shape const for op " << name_ << "_mul failed.";
return RET_ERROR;
}
ge::TensorDesc tensor_desc(ge::Shape({NPU_SHAPE_SIZE}), ge::FORMAT_ND, ge::DT_INT32);
shape_tensor->SetTensorDesc(tensor_desc);
shape_tensor->SetData(reinterpret_cast<const uint8_t *>(valid_shape), NPU_SHAPE_SIZE * sizeof(int));
shape_ = new (std::nothrow) hiai::op::Const(name_ + "_reshape_1");
if (shape_ == nullptr) {
MS_LOG(ERROR) << "New shape const for op " << name_ << " failed.";
return RET_ERROR;
}
shape_->set_attr_value(shape_tensor);
reshape_->set_input_x(*npu_inputs.at(SCALE_INDEX));
reshape_->set_input_shape(*shape_);
mul_op->set_input_x2(*reshape_);
scale_ops_.emplace_back(shape);
reshape->set_input_x(*npu_inputs.at(SCALE_INDEX));
reshape->set_input_shape(*shape);
mul_->set_input_x2(*reshape);
}
return RET_OK;
}
int ScaleNPUOp::Adopt4DScale(const std::vector<ge::Operator *> &npu_inputs,
const std::vector<mindspore::MSTensor> &in_tensors) {
MS_ASSERT(scale_ != nullptr);
// handle input
auto org_input_tensor = in_tensors.at(INPUT_INDEX);
ge::Operator *actual_input = npu_inputs.at(INPUT_INDEX);
std::vector<int64_t> org_input_shape = org_input_tensor.Shape();
if (need_expand_) {
actual_input = ChangeDims(npu_inputs.at(INPUT_INDEX), org_input_shape, name_ + "_expand_input", true);
if (actual_input == nullptr) {
MS_LOG(ERROR) << "Change Scale op input dims failed.";
return RET_ERROR;
}
}
scale_->set_input_x(*actual_input);
// handle scale, note that the scale axis can only be 1.
auto org_scale_tensor = in_tensors.at(SCALE_INDEX);
ge::Operator *actual_scale = npu_inputs.at(SCALE_INDEX);
if (org_scale_tensor.Shape().size() == DIMENSION_2D) {
std::vector<int64_t> expand_scale_shape = org_scale_tensor.Shape();
expand_scale_shape.emplace_back(1);
actual_scale = ChangeDims(npu_inputs.at(SCALE_INDEX), expand_scale_shape, name_ + "_expand_scale");
if (actual_scale == nullptr) {
MS_LOG(ERROR) << "Change Scale op scale dims failed.";
return RET_ERROR;
}
}
scale_->set_input_scale(*actual_scale);
// handle bias
if (in_tensors.size() > BIAS_INDEX) {
auto org_bias_tensor = in_tensors.at(BIAS_INDEX);
ge::Operator *actual_bias = npu_inputs.at(BIAS_INDEX);
if (org_bias_tensor.Shape().size() == DIMENSION_2D) {
std::vector<int64_t> expand_bias_shape = org_bias_tensor.Shape();
expand_bias_shape.emplace_back(1);
actual_bias = ChangeDims(npu_inputs.at(BIAS_INDEX), expand_bias_shape, name_ + "_expand_bias");
if (actual_bias == nullptr) {
MS_LOG(ERROR) << "Change Scale op bias dims failed.";
return RET_ERROR;
}
}
scale_->set_input_bias(*actual_bias);
}
// restore to origin input shape
if (need_expand_) {
int64_t dims = org_input_shape.size();
std::vector<int> valid_shape;
for (int i = 0; i < dims; i++) {
valid_shape.emplace_back(static_cast<int>(org_input_shape.at(i)));
}
auto valid_data_ptr = reinterpret_cast<const uint8_t *>(valid_shape.data());
auto shape = GetNPUConst<int>(valid_data_ptr, {dims}, ge::DT_INT32, name_ + "_restore_shape");
if (shape == nullptr) {
MS_LOG(ERROR) << "Get NPU Const for shape restoration failed.";
return RET_ERROR;
}
scale_ops_.emplace_back(shape);
out_reshape_->set_input_x(*scale_);
out_reshape_->set_input_shape(*shape);
}
return RET_OK;
}
ge::Operator *ScaleNPUOp::ChangeDims(const ge::Operator *input, std::vector<int64_t> dst_shape, std::string name,
bool need_expand_4d) {
MS_ASSERT(input != nullptr);
auto reshape = new (std::nothrow) hiai::op::Reshape(name);
if (reshape == nullptr) {
MS_LOG(ERROR) << "New Reshape NPU operator failed.";
return nullptr;
}
scale_ops_.emplace_back(reshape);
MS_CHECK_LE(dst_shape.size(), NPU_SHAPE_SIZE, nullptr);
int64_t actual_dim = need_expand_4d ? NPU_SHAPE_SIZE : dst_shape.size();
std::vector<int> valid_shape(actual_dim, 1);
for (int i = 0; i < dst_shape.size(); i++) {
valid_shape[i] = static_cast<int>(dst_shape.at(i));
}
auto valid_data_ptr = reinterpret_cast<const uint8_t *>(valid_shape.data());
auto shape = GetNPUConst<int>(valid_data_ptr, {actual_dim}, ge::DT_INT32, name_ + "_shape");
if (shape == nullptr) {
MS_LOG(ERROR) << "Get NPU Const for shape restoration failed.";
return nullptr;
}
scale_ops_.emplace_back(shape);
reshape->set_input_x(*input);
reshape->set_input_shape(*shape);
return reshape;
}
ScaleNPUOp::~ScaleNPUOp() {
if (op_ != nullptr) {
delete op_;
op_ = nullptr;
}
if (scale_ != nullptr) {
delete scale_;
scale_ = nullptr;
}
if (bias_ != nullptr) {
delete bias_;
bias_ = nullptr;
}
if (act_ != nullptr) {
delete act_;
act_ = nullptr;
}
if (reshape_ != nullptr) {
delete reshape_;
reshape_ = nullptr;
}
if (shape_ != nullptr) {
delete shape_;
shape_ = nullptr;
for (auto op : scale_ops_) {
if (op != nullptr) {
delete op;
op = nullptr;
}
}
}
} // namespace mindspore

View File

@ -46,20 +46,25 @@ class ScaleNPUOp : public NPUOp {
int GetAxis() { return axis_; }
private:
int SetActivation(const ge::Operator *input);
int SetActivation();
int ConvertScaleToMul(const std::vector<ge::Operator *> &npu_inputs, ge::Operator *cur_op,
int ConvertScaleToMul(const std::vector<ge::Operator *> &npu_inputs,
const std::vector<mindspore::MSTensor> &in_tensors);
int Adopt4DScale(const std::vector<ge::Operator *> &npu_inputs, const std::vector<mindspore::MSTensor> &in_tensors);
ge::Operator *ChangeDims(const ge::Operator *input, std::vector<int64_t> dst_shape, std::string name,
bool need_expand_4d = false);
int axis_ = 0;
bool use_mul_ = false;
bool need_expand_ = false;
schema::ActivationType act_type_ = schema::ActivationType_NO_ACTIVATION;
ge::Operator *op_ = nullptr;
hiai::op::Reshape *reshape_ = nullptr;
hiai::op::Const *scale_ = nullptr;
hiai::op::Const *bias_ = nullptr;
hiai::op::Const *shape_ = nullptr;
hiai::op::Reshape *out_reshape_ = nullptr;
hiai::op::Scale *scale_ = nullptr;
hiai::op::Mul *mul_ = nullptr;
hiai::op::Activation *act_ = nullptr;
std::vector<ge::Operator *> scale_ops_ = {};
};
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_SCALE_NPU_H_

View File

@ -32,14 +32,21 @@ int SplitNPUOp::Init(const schema::Primitive *primitive, const std::vector<minds
return RET_ERROR;
}
axis_ = static_cast<int>(split_prim->axis());
auto split_dim = in_tensors.at(0).Shape().at(axis_);
auto sizes_split = split_prim->size_splits();
std::vector<int> sizes_split_vec;
if (sizes_split != nullptr) {
sizes_split_vec = std::vector<int>(sizes_split->begin(), sizes_split->end());
} else {
return RET_ERROR;
}
int size = split_prim->output_num();
std::vector<int> sizes_split_vec;
CHECK_NULL_RETURN(sizes_split);
for (int i = 0; i < size; ++i) {
auto cur_size = sizes_split->Get(i);
if (i == size - 1 && cur_size == -1) {
sizes_split_vec.emplace_back(split_dim);
break;
}
split_dim -= cur_size;
sizes_split_vec.emplace_back(cur_size);
}
ge::TensorDesc size_splits_tensor_desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32);
ge::TensorPtr size_splits_tensor = std::make_shared<hiai::Tensor>(size_splits_tensor_desc);
size_splits_tensor->SetData(reinterpret_cast<uint8_t *>(sizes_split_vec.data()), size * sizeof(int));
@ -50,8 +57,6 @@ int SplitNPUOp::Init(const schema::Primitive *primitive, const std::vector<minds
}
size_splits_->set_attr_value(size_splits_tensor);
split_->set_input_size_splits(*size_splits_);
axis_ = static_cast<int>(split_prim->axis());
split_->set_attr_num_split(size);
split_->create_dynamic_output_y(size);
return RET_OK;

View File

@ -22,10 +22,6 @@
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
namespace {
constexpr int kNumDims = 4;
} // namespace
namespace mindspore {
bool CheckFusion(NPUOp *cur_op, const std::vector<mindspore::MSTensor> &graph_outputs) {
if (cur_op->in_ops().empty() || cur_op->out_ops().empty()) {
@ -77,32 +73,32 @@ void NPUFusionPass::RemoveAndFreeOp(NPUOp *cur_op) {
}
int NPUFusionPass::UpdatePreOps(NPUOp *cur_op) {
auto cur_in_ops = cur_op->in_ops();
for (auto in_op : cur_op->in_ops()) {
// graph in op
if (in_op->in_ops().empty()) {
continue;
}
auto pre_op = in_op->in_ops()[0];
cur_in_ops.erase(find(cur_in_ops.begin(), cur_in_ops.end(), in_op));
} else {
auto pre_op = in_op->in_ops()[0];
auto pre_out_ops = pre_op->out_ops();
for (size_t i = 0; i < pre_out_ops.size(); i++) {
if (pre_out_ops[i] == in_op) {
pre_out_ops[i] = cur_op;
break;
}
}
pre_op->set_out_ops(pre_out_ops);
auto pre_out_ops = pre_op->out_ops();
for (size_t i = 0; i < pre_out_ops.size(); i++) {
if (pre_out_ops[i] == in_op) {
pre_out_ops[i] = cur_op;
break;
for (size_t i = 0; i < cur_in_ops.size(); i++) {
if (cur_in_ops[i] == in_op) {
cur_in_ops[i] = pre_op;
break;
}
}
}
pre_op->set_out_ops(pre_out_ops);
auto cur_in_ops = cur_op->in_ops();
for (size_t i = 0; i < cur_in_ops.size(); i++) {
if (cur_in_ops[i] == in_op) {
cur_in_ops[i] = pre_op;
break;
}
}
cur_op->set_in_ops(cur_in_ops);
RemoveAndFreeOp(in_op);
}
cur_op->set_in_ops(cur_in_ops);
return RET_OK;
}
@ -139,19 +135,26 @@ int NPUFusionPass::UpdatePostOps(NPUOp *cur_op) {
int UpdatePreTensors(NPUOp *cur_op) {
auto tensors_vec = NPUPassUtils::GetNonConstInputs(cur_op);
for (auto in_op : cur_op->in_ops()) {
if (in_op->inputs().empty() || in_op->outputs().empty() || in_op->in_ops().empty()) {
MS_LOG(ERROR) << "in_tensors/out_tensors/in_ops is empty.";
if (in_op->inputs().empty() || in_op->outputs().empty()) {
MS_LOG(ERROR) << "in_tensors or out_tensors of input op is empty.";
return RET_ERROR;
}
mindspore::MSTensor cur_tensor;
auto in_tensor = in_op->inputs()[0];
auto out_tensor = in_op->outputs()[0];
auto pre_op = in_op->in_ops()[0];
for (size_t i = 0; i < pre_op->outputs().size(); i++) {
if (pre_op->outputs()[i] == in_tensor) {
cur_tensor = pre_op->outputs()[i];
if (!in_op->in_ops().empty()) {
auto pre_op = in_op->in_ops()[0];
for (size_t i = 0; i < pre_op->outputs().size(); i++) {
if (pre_op->outputs()[i] == in_tensor) {
cur_tensor = pre_op->outputs()[i];
break;
}
}
} else {
// graph input
cur_tensor = in_tensor;
}
for (size_t i = 0; i < tensors_vec.size(); i++) {
if (tensors_vec[i] == out_tensor) {
tensors_vec[i] = cur_tensor;
@ -173,56 +176,47 @@ int UpdatePreTensors(NPUOp *cur_op) {
return RET_OK;
}
bool NodeWithNhwc2nchw2nhwcOutput(NPUOp *cur_op) {
auto out_ops = cur_op->out_ops();
if (out_ops.empty()) {
return false;
}
bool all_out_ops_transpose = std::all_of(out_ops.begin(), out_ops.end(), [](NPUOp *op) {
return op->type() == schema::PrimitiveType_Transpose && op->out_ops().size() == 1 &&
op->out_ops()[0]->type() == schema::PrimitiveType_Transpose && op->out_ops()[0]->out_ops().empty();
});
return all_out_ops_transpose;
}
int UpdatePostTensors(NPUOp *cur_op) {
auto tensor = cur_op->outputs()[0];
// in case: node->nh2nc->nc2nh(graph output) --->>> node->nc2nh, node out_tensor should be put to nc2nh out tensors
auto out_ops = cur_op->out_ops();
if (NodeWithNhwc2nchw2nhwcOutput(cur_op)) {
std::vector<MSTensor> outputs;
for (auto i = 0; i < out_ops.size(); ++i) {
auto ori_out_tensor = cur_op->outputs()[i];
auto nc_tensor = out_ops[i]->outputs()[0];
outputs.push_back(nc_tensor);
auto post_post_op = out_ops[i]->out_ops()[0];
post_post_op->set_inputs({nc_tensor});
post_post_op->set_outputs({ori_out_tensor});
}
cur_op->set_outputs(outputs);
return RET_OK;
}
auto nhwc_shape = tensor.Shape();
if (nhwc_shape.size() < kNumDims) {
MS_LOG(ERROR) << "nhwc_shape < " << kNumDims;
return RET_ERROR;
}
tensor.SetShape({nhwc_shape[NHWC_N], nhwc_shape[NHWC_C], nhwc_shape[NHWC_H], nhwc_shape[NHWC_W]});
mindspore::MSTensor new_post_input;
for (auto out_op : cur_op->out_ops()) {
auto in_tensor = out_op->inputs()[0];
auto out_tensor = out_op->outputs()[0];
if (out_op->out_ops().empty()) {
cur_op->set_outputs({out_op->outputs()[0]});
auto nhwc_shape = in_tensor.Shape();
if (in_tensor.format() == Format::NHWC) {
MS_CHECK_TRUE_MSG(nhwc_shape.size() == NPU_SHAPE_SIZE, RET_ERROR, "Invalid transpose dim size!");
in_tensor.SetShape({nhwc_shape[NHWC_N], nhwc_shape[NHWC_C], nhwc_shape[NHWC_H], nhwc_shape[NHWC_W]});
in_tensor.SetFormat(Format::NCHW);
}
for (auto post_op : out_op->out_ops()) {
auto tensors_vec = post_op->inputs();
for (int i = 0; i < tensors_vec.size(); i++) {
if (tensors_vec[i] == out_tensor) {
tensors_vec[i] = tensor;
// out_op is a graph output op
if (out_op->out_ops().empty()) {
auto out_tensors_vec = cur_op->outputs();
for (size_t i = 0; i < out_tensors_vec.size(); i++) {
if (out_tensors_vec[i] == in_tensor) {
out_tensors_vec[i] = out_op->outputs()[0];
}
}
post_op->set_inputs(tensors_vec);
cur_op->set_outputs(out_tensors_vec);
// exist other out_ops using the same tensor as the current out_op, note that the other out_op has likely been
// updated, which mean it may be not a Transpose op anymore.
for (auto other_out_op : cur_op->out_ops()) {
auto other_in_tensors_vec = other_out_op->inputs();
for (size_t i = 0; i < other_in_tensors_vec.size(); i++) {
if (other_in_tensors_vec[i] == in_tensor) {
other_in_tensors_vec[i] = out_op->outputs()[0];
}
}
other_out_op->set_inputs(other_in_tensors_vec);
}
}
// out_op is not a graph out op
for (auto post_op : out_op->out_ops()) {
auto in_tensors_vec = post_op->inputs();
for (size_t i = 0; i < in_tensors_vec.size(); i++) {
if (in_tensors_vec[i] == out_tensor) {
in_tensors_vec[i] = in_tensor;
}
}
post_op->set_inputs(in_tensors_vec);
}
}
return RET_OK;

View File

@ -1,69 +0,0 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/delegate/npu/pass/npu_infer_format_pass.h"
#include <vector>
#include <queue>
#include <map>
#include "src/delegate/npu/pass/npu_pass_utils.h"
#include "src/delegate/npu/npu_converter_utils.h"
#include "src/tensor.h"
#include "src/cxx_api/tensor/tensor_impl.h"
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
namespace mindspore {
int NPUInferFormatPass::Run(NPUGraph *subgraph) {
CHECK_NULL_RETURN(subgraph);
all_ops_ = subgraph->GetOps();
all_tensors_ = subgraph->GetInsertTensors();
auto graph_inputs = subgraph->inputs();
std::queue<NPUOp *> infer_ops;
std::map<tensor::MSTensor *, bool> is_inferred;
// initialization
for (auto op : *all_ops_) {
infer_ops.push(op);
}
for (auto tensor : *all_tensors_) {
is_inferred[tensor->impl()->lite_tensor()] = false;
}
for (auto input_tensor : graph_inputs) {
is_inferred[input_tensor.impl()->lite_tensor()] = true;
}
while (!infer_ops.empty()) {
auto cur_op = infer_ops.front();
infer_ops.pop();
bool input_inferred = std::all_of(cur_op->inputs().begin(), cur_op->inputs().end(), [&](auto in_tensor) {
return is_inferred[in_tensor.impl()->lite_tensor()] == true || in_tensor.IsConst();
});
if (input_inferred) {
auto dst_format = cur_op->inputs().at(0).format();
if (NPUPassUtils::IsNhwc2Nchw(cur_op) && dst_format == Format::NHWC) {
dst_format = Format::NCHW;
} else if (NPUPassUtils::IsNchw2Nhwc(cur_op) && dst_format == Format::NCHW) {
dst_format = Format::NHWC;
}
for (auto &out_tensor : cur_op->outputs()) {
const_cast<mindspore::MSTensor &>(out_tensor).SetFormat(dst_format);
is_inferred[out_tensor.impl()->lite_tensor()] = true;
}
} else {
infer_ops.push(cur_op);
}
}
return RET_OK;
}
} // namespace mindspore

View File

@ -1,38 +0,0 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_PASS_NPU_INFER_FORMAT_PASS_H_
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_PASS_NPU_INFER_FORMAT_PASS_H_
#include <set>
#include <vector>
#include "src/delegate/npu/op/npu_op.h"
#include "src/delegate/npu/pass/npu_base_pass.h"
#include "src/common/log_util.h"
namespace mindspore {
class NPUInferFormatPass : public NPUBasePass {
public:
NPUInferFormatPass() { name_ = "NPUInferFormatPass"; }
int Run(NPUGraph *subgraph) override;
private:
std::vector<NPUOp *> *all_ops_;
std::vector<mindspore::MSTensor *> *all_tensors_;
};
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_PASS_NPU_INFER_FORMAT_PASS_H_

View File

@ -1,5 +1,5 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -18,6 +18,7 @@
#include <set>
#include <string>
#include "src/delegate/npu/pass/npu_pass_utils.h"
#include "src/delegate/npu/npu_converter_utils.h"
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
@ -52,9 +53,9 @@ int NPUInsertTransformPass::GetInsertState(NPUOp *op) {
if (insert_nodes.find(op->type()) == insert_nodes.end()) {
return InsertNone;
}
// current op is target op
// use out ops to count how many out lines from current op
// Use out ops to count the out lines from current op since a single tensor can be used by multiple out ops. Besides,
// a tensor can be used by out ops and graph output at the same time, there will be one more line in this case.
std::vector<mindspore::MSTensor> inputs = NPUPassUtils::GetNonConstInputs(op);
size_t in_out_tensor_num =
inputs.size() + std::max(std::max(op->out_ops().size(), static_cast<size_t>(1)), op->outputs().size());
@ -76,13 +77,19 @@ int NPUInsertTransformPass::GetInsertState(NPUOp *op) {
graph_input_num++;
}
}
if (op->out_ops().empty()) {
need_post_insert = true;
}
if (op->outputs().size() > op->out_ops().size()) {
graph_output_num = op->outputs().size() - op->out_ops().size();
auto graph_output = subgraph_->outputs();
for (auto output : op->outputs()) {
if (std::find(graph_output.begin(), graph_output.end(), output) != graph_output.end()) {
graph_output_num++;
need_post_insert = true;
}
}
for (const auto out_op : op->out_ops()) {
for (auto out_op_input : out_op->inputs()) {
if (std::find(graph_output.begin(), graph_output.end(), out_op_input) != graph_output.end()) {
in_out_tensor_num++;
}
}
if (NPUPassUtils::IsNhwc2Nchw(out_op)) {
transpose_output_num++;
} else {
@ -99,105 +106,81 @@ int NPUInsertTransformPass::GetInsertState(NPUOp *op) {
transpose_tensor_num == in_out_tensor_num) {
return InsertNone;
}
InsertState ret =
(need_pre_insert && need_post_insert)
? BothInsert
: ((need_pre_insert && !need_post_insert) ? PreInsert
: ((!need_pre_insert && need_post_insert) ? PostInsert : InsertNone));
InsertState ret = (need_pre_insert && need_post_insert)
? BothInsert
: (need_pre_insert ? PreInsert : (need_post_insert ? PostInsert : InsertNone));
return ret;
}
int NPUInsertTransformPass::InsertNode(NPUOp *op, NPUOp *post_op, size_t post_input_index,
std::vector<NPUOp *> *trans_ops) {
// Op and post_op can't be nullptr at the same time.
int NPUInsertTransformPass::InsertTransNode(NPUOp *op, NPUOp *post_op, const mindspore::MSTensor &trans_in_tensor,
std::vector<NPUOp *> *trans_ops) {
MS_ASSERT(op != nullptr || post_op != nullptr);
std::string op_name;
std::vector<mindspore::MSTensor> in_tensors;
std::vector<NPUOp *> out_ops;
// If post_op equals nullptr, op is the output of whole graph.
if (post_op != nullptr) {
out_ops.push_back(post_op);
op_name = post_op->name() + "_pre";
in_tensors.push_back(post_op->inputs().at(post_input_index));
}
std::vector<NPUOp *> in_ops;
// If op equals nullptr, post_op is the input of whole graph.
if (op != nullptr && !op->outputs().empty()) {
in_ops.push_back(op);
std::vector<NPUOp *> out_ops;
if (op != nullptr) {
op_name = op->name() + "_post";
in_tensors.resize(op->outputs().size());
std::copy(op->outputs().begin(), op->outputs().end(), in_tensors.begin());
in_ops.emplace_back(op);
}
for (auto i = 0; i < in_tensors.size(); ++i) {
auto in_tensor = in_tensors[i];
auto nhwc_shape = in_tensor.Shape();
if (nhwc_shape.size() == 0) {
continue;
} else if (nhwc_shape.size() < 4) {
MS_LOG(ERROR) << "nhwc_shape size < " << 4;
return RET_ERROR;
}
std::vector<int64_t> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]};
if (post_op != nullptr) {
op_name = post_op->name() + "_pre";
out_ops.emplace_back(post_op);
}
auto nhwc_shape = trans_in_tensor.Shape();
std::vector<int64_t> nchw_shape = {nhwc_shape[NHWC_N], nhwc_shape[NHWC_C], nhwc_shape[NHWC_H], nhwc_shape[NHWC_W]};
auto nh2nc_name = op_name + "_nh2nc_" + std::to_string(total++);
auto nh2nc_tensor =
mindspore::MSTensor::CreateTensor(nh2nc_name + "/output0", in_tensor.DataType(), nchw_shape, nullptr, 0);
if (nh2nc_tensor == nullptr) {
MS_LOG(ERROR) << "New nchw tensor failed when inserting nchw2nhwc op.";
return RET_ERROR;
}
nh2nc_tensor->SetTensorName(nh2nc_name + "/output0");
std::vector<mindspore::MSTensor> nh2nc_tensors = {*nh2nc_tensor};
all_tensors_->push_back(nh2nc_tensor);
auto nh2nc_name = op_name + "_nh2nc_" + std::to_string(total++);
auto nh2nc_tensor =
mindspore::MSTensor::CreateTensor(nh2nc_name + "/output0", trans_in_tensor.DataType(), nchw_shape, nullptr, 0);
if (nh2nc_tensor == nullptr) {
MS_LOG(ERROR) << "New nchw tensor failed when inserting nchw2nhwc op.";
return RET_ERROR;
}
nh2nc_tensor->SetFormat(Format::NCHW);
std::vector<mindspore::MSTensor> nh2nc_tensors = {*nh2nc_tensor};
all_tensors_->push_back(nh2nc_tensor);
auto nc2nh_name = op_name + "_nc2nh_" + std::to_string(total++);
auto nc2nh_tensor =
mindspore::MSTensor::CreateTensor(nc2nh_name + "/output0", in_tensor.DataType(), nhwc_shape, nullptr, 0);
if (nc2nh_tensor == nullptr) {
MS_LOG(ERROR) << "New nhwc tensor failed when inserting nhwc2nchw op.";
return RET_ERROR;
}
std::vector<mindspore::MSTensor> nc2nh_tensors = {*nc2nh_tensor};
all_tensors_->push_back(nc2nh_tensor);
auto nc2nh_name = op_name + "_nc2nh_" + std::to_string(total++);
auto nc2nh_tensor =
mindspore::MSTensor::CreateTensor(nc2nh_name + "/output0", trans_in_tensor.DataType(), nhwc_shape, nullptr, 0);
if (nc2nh_tensor == nullptr) {
MS_LOG(ERROR) << "New nhwc tensor failed when inserting nhwc2nchw op.";
return RET_ERROR;
}
nc2nh_tensor->SetFormat(Format::NHWC);
std::vector<mindspore::MSTensor> nc2nh_tensors = {*nc2nh_tensor};
all_tensors_->push_back(nc2nh_tensor);
auto *nh2nc_op = NPUPassUtils::CreateNhwc2NchwOp({in_tensor}, nh2nc_tensors, nh2nc_name);
trans_ops->push_back(nh2nc_op);
auto *nh2nc_op = NPUPassUtils::CreateNhwc2NchwOp({trans_in_tensor}, nh2nc_tensors, nh2nc_name);
trans_ops->push_back(nh2nc_op);
auto *nc2nh_op = NPUPassUtils::CreateNchw2NhwcOp(nh2nc_tensors, nc2nh_tensors, nc2nh_name);
trans_ops->push_back(nc2nh_op);
auto *nc2nh_op = NPUPassUtils::CreateNchw2NhwcOp(nh2nc_tensors, nc2nh_tensors, nc2nh_name);
trans_ops->push_back(nc2nh_op);
NPUPassUtils::UpdateOp(nh2nc_op, in_ops, {nc2nh_op}, {in_tensor}, nh2nc_tensors);
NPUPassUtils::UpdateOp(nc2nh_op, {nh2nc_op}, out_ops, {nh2nc_tensors[0]}, nc2nh_tensors);
if (op != nullptr) {
NPUPassUtils::UpdateNH2NCTransNodePreOp(op, nh2nc_op, post_op);
}
if (post_op != nullptr) {
NPUPassUtils::UpdateNC2NHTransNodePostOp(op, nc2nh_op, post_op);
} else {
// post_op nullptr mean output, we remain graph output tensor name unchanged
auto graph_output_name = in_tensor.Name();
nc2nh_tensor->SetTensorName(graph_output_name + "_after_" + name_);
}
NPUPassUtils::UpdateOp(nh2nc_op, in_ops, {nc2nh_op}, {trans_in_tensor}, nh2nc_tensors);
NPUPassUtils::UpdateOp(nc2nh_op, {nh2nc_op}, out_ops, {nh2nc_tensors[0]}, nc2nh_tensors);
if (op != nullptr) {
NPUPassUtils::UpdateNH2NCTransNodePreOp(op, nh2nc_op, post_op);
}
if (post_op != nullptr) {
NPUPassUtils::UpdateNC2NHTransNodePostOp(op, nc2nh_op, post_op);
} else {
// post_op nullptr mean output, we remain graph output tensor name unchanged
auto graph_output_name = trans_in_tensor.Name();
nc2nh_tensor->SetTensorName(graph_output_name + "_after_" + name_);
}
return RET_OK;
}
int NPUInsertTransformPass::InsertForInputTensor(NPUOp *op, size_t in_tensor_index, NPUOp *pre_op,
std::vector<NPUOp *> *trans_ops) {
// insert transpose nodes before target ops
return InsertNode(pre_op, op, in_tensor_index, trans_ops);
}
int NPUInsertTransformPass::InsertForOutputTensor(NPUOp *op, NPUOp *post_op, size_t post_in_tensor_index,
std::vector<NPUOp *> *trans_ops) {
// insert transpose nodes after target ops
return InsertNode(op, post_op, post_in_tensor_index, trans_ops);
}
int NPUInsertTransformPass::InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops) {
int ret = RET_OK;
auto inputs = NPUPassUtils::GetNonConstInputs(op);
for (auto tensor : inputs) {
if (tensor.Shape().size() < NPU_SHAPE_SIZE) {
continue;
}
// the input tensor can only come from a single op
auto pre_op = NPUPassUtils::OpInputFromOp(op, tensor);
if (NPUPassUtils::IsNchw2Nhwc(pre_op)) {
continue;
@ -209,7 +192,7 @@ int NPUInsertTransformPass::InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *tran
return RET_ERROR;
}
size_t index = it - op->inputs().begin();
ret = InsertForInputTensor(op, index, pre_op, trans_ops);
ret = InsertTransNode(pre_op, op, op->inputs().at(index), trans_ops);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw op and nchw2nhwc op before op " << op->name() << " failed.";
return ret;
@ -220,38 +203,58 @@ int NPUInsertTransformPass::InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *tran
int NPUInsertTransformPass::InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops) {
int ret = RET_OK;
for (const auto post_op : op->out_ops()) {
if (NPUPassUtils::IsNhwc2Nchw(post_op)) {
for (size_t idx = 0; idx < op->outputs().size(); idx++) {
auto out_tensor = op->outputs().at(idx);
if (out_tensor.Shape().size() < NPU_SHAPE_SIZE) {
continue;
}
auto post_op_in_tensors = post_op->inputs();
// op's out tensor is one of post_op's input tensor
auto it = std::find(post_op_in_tensors.begin(), post_op_in_tensors.end(), op->outputs().at(0));
if (it == post_op_in_tensors.end()) {
return RET_ERROR;
if (std::find(subgraph_->outputs().begin(), subgraph_->outputs().end(), out_tensor) != subgraph_->outputs().end()) {
// the case that op's out tensor is graph output
ret = InsertTransNode(op, nullptr, op->outputs().at(idx), trans_ops);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw op and nchw2nhwc op after op " << op->name() << " failed.";
return RET_ERROR;
}
// use origin output as the last trans op's output in order to avoid the lost of the output tensor after transpose
// fusion. The input of the cur_op's out_op will be updated in the loop below.
auto last_trans = trans_ops->back();
auto trans_output = last_trans->outputs();
auto cur_outputs = op->outputs();
cur_outputs[idx] = last_trans->outputs()[0];
trans_output[0] = op->outputs()[idx];
last_trans->set_outputs(trans_output);
op->set_outputs(cur_outputs);
}
size_t input_index = it - post_op_in_tensors.begin();
ret = InsertForOutputTensor(op, post_op, input_index, trans_ops);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw op and nchw2nhwc op after op " << op->name() << " failed.";
return ret;
}
}
if (op->outputs().size() > op->out_ops().size()) {
// op out is graph output
ret = InsertForOutputTensor(op, nullptr, 0, trans_ops);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw op and nchw2nhwc op after op " << op->name() << " failed.";
return ret;
// besides of being as graph outputs, the output tensors also can connected with multiple ops.
for (auto post_op : op->out_ops()) {
auto post_op_input = post_op->inputs();
auto it = std::find(post_op_input.begin(), post_op_input.end(), out_tensor);
if (it == post_op_input.end()) {
continue;
}
auto related_idx = it - post_op_input.begin();
post_op_input[related_idx] = op->outputs().at(idx);
post_op->set_inputs(post_op_input);
if (NPUPassUtils::IsNhwc2Nchw(post_op)) {
continue;
}
// the case that op's out tensor is one of post_op's input tensor
ret = InsertTransNode(op, post_op, op->outputs().at(idx), trans_ops);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nhwc2nchw op and nchw2nhwc op after op " << op->name() << " failed.";
return ret;
}
}
}
return ret;
}
int NPUInsertTransformPass::Run(NPUGraph *subgraph) {
all_ops_ = subgraph->GetOps();
all_tensors_ = subgraph->GetInsertTensors();
subgraph_ = subgraph;
all_ops_ = subgraph_->GetOps();
all_tensors_ = subgraph_->GetInsertTensors();
std::vector<NPUOp *> insert_ops;
for (int j = 0; j < 2; ++j) {
for (size_t i = 0; i < all_ops_->size(); i++) {

View File

@ -1,5 +1,5 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
* Copyright 2020-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -30,14 +30,14 @@ class NPUInsertTransformPass : public NPUBasePass {
int GetInsertState(NPUOp *op);
int InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops);
int InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops);
int InsertNode(NPUOp *op, NPUOp *post_op, size_t post_input_index, std::vector<NPUOp *> *trans_ops);
int InsertForInputTensor(NPUOp *op, size_t in_tensor_index, NPUOp *pre_op, std::vector<NPUOp *> *trans_ops);
int InsertForOutputTensor(NPUOp *op, NPUOp *post_op, size_t post_in_tensor_index, std::vector<NPUOp *> *trans_ops);
int InsertTransNode(NPUOp *op, NPUOp *post_op, const mindspore::MSTensor &trans_in_tensor,
std::vector<NPUOp *> *trans_ops);
private:
int total = 0;
std::vector<NPUOp *> *all_ops_;
std::vector<mindspore::MSTensor *> *all_tensors_;
NPUGraph *subgraph_ = nullptr;
std::vector<NPUOp *> *all_ops_ = nullptr;
std::vector<mindspore::MSTensor *> *all_tensors_ = nullptr;
};
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_PASS_NPU_INSERT_TRANSFORM_PASS_H_

View File

@ -60,16 +60,16 @@ void NPUPassUtils::UpdateOp(NPUOp *op, const std::vector<NPUOp *> &in_ops, const
void NPUPassUtils::UpdateNH2NCTransNodePreOp(NPUOp *pre_op, NPUOp *trans_op, NPUOp *op) {
// For op before trans, update the out_ops; the output tensor of op is the input tensor of trans.
std::vector<NPUOp *> out_ops = pre_op->out_ops();
size_t i = 0;
for (; i < out_ops.size(); i++) {
if (out_ops[i] == op) {
out_ops[i] = trans_op;
break;
if (op == nullptr) {
out_ops.emplace_back(trans_op);
} else {
for (size_t i = 0; i < out_ops.size(); i++) {
if (out_ops[i] == op) {
out_ops[i] = trans_op;
break;
}
}
}
if (i == out_ops.size()) {
out_ops.push_back(trans_op);
}
pre_op->set_out_ops(out_ops);
}
@ -177,8 +177,8 @@ NPUOp *NPUPassUtils::OpInputFromOp(NPUOp *op, mindspore::MSTensor in_tensor) {
return nullptr;
}
auto in_ops = op->in_ops();
auto output_contain = [in_tensor](NPUOp *op) {
auto outputs = op->outputs();
auto output_contain = [in_tensor](NPUOp *in_op) {
auto outputs = in_op->outputs();
return std::find(outputs.begin(), outputs.end(), in_tensor) != outputs.end();
};
auto it = std::find_if(in_ops.begin(), in_ops.end(), output_contain);

View File

@ -37,10 +37,6 @@ int NPUTransformPass::InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops)
MS_LOG(ERROR) << "NPU Transform pass does not find in op with 4d output";
return RET_ERROR;
}
if (op->inputs().front().format() == Format::NCHW) {
// input format is already NCHW, no need to insert transpose.
return RET_OK;
}
if (is_input_op || nchw_nodes.find((*it)->type()) == nchw_nodes.end()) {
NPUOp *pre_op = nullptr;
if (!is_input_op) {
@ -57,6 +53,7 @@ int NPUTransformPass::InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops)
MS_LOG(ERROR) << "New nchw tensor failed when inserting pre nhwc2nchw op.";
return RET_ERROR;
}
tensor->SetFormat(Format::NCHW);
std::vector<mindspore::MSTensor> pre_trans_outputs = {*tensor};
all_tensors_->push_back(tensor);
@ -83,11 +80,10 @@ int NPUTransformPass::InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops)
return RET_OK;
}
int NPUTransformPass::InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops,
std::vector<mindspore::MSTensor> graph_outputs) {
int NPUTransformPass::InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops) {
bool is_output_op = false;
if (op->out_ops().empty() ||
find(graph_outputs.begin(), graph_outputs.end(), op->outputs()[0]) != graph_outputs.end()) {
find(subgraph_->outputs().begin(), subgraph_->outputs().end(), op->outputs()[0]) != subgraph_->outputs().end()) {
is_output_op = true;
}
// Get the post op that need insert trans op.
@ -116,6 +112,7 @@ int NPUTransformPass::InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops
MS_LOG(ERROR) << "New nchw tensor failed when inserting post nchw2nhwc op.";
return RET_ERROR;
}
nc2nh_tensor->SetFormat(Format::NCHW);
all_tensors_->push_back(nc2nh_tensor);
if (is_output_op) {
@ -145,6 +142,7 @@ int NPUTransformPass::InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops
MS_LOG(ERROR) << "New nhwc tensor failed when inserting post nchw2nhwc op.";
return RET_ERROR;
}
out_tensor->SetFormat(Format::NHWC);
all_tensors_->push_back(out_tensor);
nc2nh_outputs.push_back(*out_tensor);
@ -173,9 +171,9 @@ int NPUTransformPass::InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops
}
int NPUTransformPass::Run(NPUGraph *subgraph) {
all_ops_ = subgraph->GetOps();
all_tensors_ = subgraph->GetInsertTensors();
auto graph_outputs = subgraph->outputs();
subgraph_ = subgraph;
all_ops_ = subgraph_->GetOps();
all_tensors_ = subgraph_->GetInsertTensors();
for (size_t i = 0; i < all_ops_->size();) {
auto op = (*all_ops_)[i];
if (nchw_nodes.find(op->type()) == nchw_nodes.end()) {
@ -204,7 +202,7 @@ int NPUTransformPass::Run(NPUGraph *subgraph) {
// insert post_ops after op in vector
// modify loop index add post_ops.size() to the next op in the origin vector
std::vector<NPUOp *> post_ops;
ret = InsertPostNodes(op, &post_ops, graph_outputs);
ret = InsertPostNodes(op, &post_ops);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Insert nchw2nhwc op after op " << op->name() << " failed.";
return RET_ERROR;

View File

@ -32,12 +32,13 @@ class NPUTransformPass : public NPUBasePass {
private:
int InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops);
int InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops, std::vector<mindspore::MSTensor> graph_outputs);
int InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops);
private:
int total = 0;
std::vector<NPUOp *> *all_ops_;
std::vector<mindspore::MSTensor *> *all_tensors_;
NPUGraph *subgraph_ = nullptr;
std::vector<NPUOp *> *all_ops_ = nullptr;
std::vector<mindspore::MSTensor *> *all_tensors_ = nullptr;
};
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_PASS_NPU_TRANSFORM_PASS_H_

View File

@ -493,6 +493,7 @@ STATUS TfliteModelParser::ConvertGraphOutputs(const std::unique_ptr<tflite::SubG
auto make_tuple_cnode = func_graph->NewCNode(make_tuple_inputs);
MSLITE_CHECK_PTR(make_tuple_cnode);
make_tuple_cnode->set_fullname_with_scope("return_tuple");
auto return_prim_ptr = std::make_shared<ops::Return>();
if (return_prim_ptr == nullptr) {
MS_LOG(ERROR) << "new Return failed";