reconstuct npu scale op and insert transpose pass
This commit is contained in:
parent
f6e83e0383
commit
c564c628b5
|
@ -15,7 +15,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "src/delegate/npu/npu_converter_utils.h"
|
#include "src/delegate/npu/npu_converter_utils.h"
|
||||||
#include "src/common/log_adapter.h"
|
#include "src/delegate/npu/op/npu_op.h"
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
#define C4NUM 4
|
#define C4NUM 4
|
||||||
#define C8NUM 8
|
#define C8NUM 8
|
||||||
|
@ -55,7 +55,7 @@ void Float16ToFloat32(const float16_t *__restrict input, float *__restrict outpu
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ge::Shape ConverterToNPUShape(const std::vector<int64_t> &src_shape, bool is_expand_4d) {
|
ge::Shape ConverterToNPUShape(const std::vector<int64_t> &src_shape, bool is_expand_4d) {
|
||||||
vector<int64_t> shapes;
|
std::vector<int64_t> shapes;
|
||||||
shapes.reserve(src_shape.size());
|
shapes.reserve(src_shape.size());
|
||||||
for (int i = 0; i < src_shape.size(); i++) {
|
for (int i = 0; i < src_shape.size(); i++) {
|
||||||
shapes.push_back(src_shape[i]);
|
shapes.push_back(src_shape[i]);
|
||||||
|
@ -64,8 +64,7 @@ ge::Shape ConverterToNPUShape(const std::vector<int64_t> &src_shape, bool is_exp
|
||||||
if (shapes.size() == 1) {
|
if (shapes.size() == 1) {
|
||||||
return ge::Shape({1, shapes[0], 1, 1});
|
return ge::Shape({1, shapes[0], 1, 1});
|
||||||
} else {
|
} else {
|
||||||
const int dimension4 = 4;
|
for (int i = src_shape.size(); i < NPU_SHAPE_SIZE; i++) {
|
||||||
for (int i = src_shape.size(); i < dimension4; i++) {
|
|
||||||
shapes.push_back(1);
|
shapes.push_back(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -204,23 +203,23 @@ int TransFormAxis(int axis) {
|
||||||
void AssistDataNHWC2NCHW(int *data, size_t unit_size) {
|
void AssistDataNHWC2NCHW(int *data, size_t unit_size) {
|
||||||
MS_ASSERT(data != nullptr);
|
MS_ASSERT(data != nullptr);
|
||||||
for (size_t i = 0; i < unit_size; ++i) {
|
for (size_t i = 0; i < unit_size; ++i) {
|
||||||
int c = data[3 * unit_size + i];
|
int org_c = data[NHWC_C * unit_size + i];
|
||||||
// n h w c
|
// n h w c
|
||||||
// n c h w
|
// n c h w
|
||||||
data[3 * unit_size + i] = data[2 * unit_size + i];
|
data[NCHW_W * unit_size + i] = data[NHWC_W * unit_size + i];
|
||||||
data[2 * unit_size + i] = data[unit_size + i];
|
data[NCHW_H * unit_size + i] = data[NHWC_H * unit_size + i];
|
||||||
data[unit_size + i] = c;
|
data[NCHW_C * unit_size + i] = org_c;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int MaskDataNHWC2NCHW(int mask) {
|
int MaskDataNHWC2NCHW(int mask) {
|
||||||
int mask_vec[4];
|
int mask_vec[NPU_SHAPE_SIZE];
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < NPU_SHAPE_SIZE; ++i) {
|
||||||
mask_vec[i] = (uint32_t)(mask) & (1 << i);
|
mask_vec[i] = (uint32_t)(mask) & (1 << i);
|
||||||
}
|
}
|
||||||
AssistDataNHWC2NCHW(mask_vec, 1);
|
AssistDataNHWC2NCHW(mask_vec, 1);
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < NPU_SHAPE_SIZE; ++i) {
|
||||||
if (mask_vec[i]) {
|
if (mask_vec[i]) {
|
||||||
ret += 1 << i;
|
ret += 1 << i;
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,9 @@
|
||||||
#include "include/graph/op/array_defs.h"
|
#include "include/graph/op/array_defs.h"
|
||||||
#include "include/api/types.h"
|
#include "include/api/types.h"
|
||||||
#include "include/api/data_type.h"
|
#include "include/api/data_type.h"
|
||||||
|
#include "include/graph/op/all_ops.h"
|
||||||
|
#include "src/common/log_adapter.h"
|
||||||
|
#include "nnacl/op_base.h"
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
enum NCHW_SHAPE { NCHW_INVALID = -1, NCHW_N = 0, NCHW_C = 1, NCHW_H = 2, NCHW_W = 3 };
|
enum NCHW_SHAPE { NCHW_INVALID = -1, NCHW_N = 0, NCHW_C = 1, NCHW_H = 2, NCHW_W = 3 };
|
||||||
|
@ -91,5 +94,29 @@ int TransFormAxis(int axis);
|
||||||
void AssistDataNHWC2NCHW(int *data, size_t unit_size);
|
void AssistDataNHWC2NCHW(int *data, size_t unit_size);
|
||||||
|
|
||||||
int MaskDataNHWC2NCHW(int mask);
|
int MaskDataNHWC2NCHW(int mask);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
ge::Operator *GetNPUConst(const uint8_t *const_data, const std::vector<int64_t> &shape, const ge::DataType data_type,
|
||||||
|
std::string name = "const", bool is_expand_4d = false) {
|
||||||
|
MS_CHECK_TRUE_MSG(const_data != nullptr, nullptr, "Const data can not be nullptr.");
|
||||||
|
int element_num = 1;
|
||||||
|
if (!shape.empty()) {
|
||||||
|
for (size_t i = 0; i < shape.size(); i++) {
|
||||||
|
MS_CHECK_GT(shape.at(i), 0, nullptr);
|
||||||
|
MS_CHECK_INT_MUL_NOT_OVERFLOW(element_num, shape.at(i), nullptr);
|
||||||
|
element_num *= shape.at(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ge::TensorDesc const_tensor_desc(ConverterToNPUShape(shape, is_expand_4d), ge::FORMAT_NCHW, data_type);
|
||||||
|
ge::TensorPtr const_tensor = std::make_shared<hiai::Tensor>(const_tensor_desc);
|
||||||
|
const_tensor->SetData(const_data, element_num * sizeof(T));
|
||||||
|
auto const_op = new (std::nothrow) hiai::op::Const(name);
|
||||||
|
if (const_op == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "New Const op failed.";
|
||||||
|
return const_op;
|
||||||
|
}
|
||||||
|
const_op->set_attr_value(const_tensor);
|
||||||
|
return const_op;
|
||||||
|
}
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_NPU_CONVERTER_UITLS_H_
|
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_NPU_CONVERTER_UITLS_H_
|
||||||
|
|
|
@ -52,12 +52,12 @@
|
||||||
#include "src/delegate/npu/op/transpose_npu.h"
|
#include "src/delegate/npu/op/transpose_npu.h"
|
||||||
#include "src/delegate/npu/op/unsqueeze_npu.h"
|
#include "src/delegate/npu/op/unsqueeze_npu.h"
|
||||||
#include "src/delegate/npu/op/abs_npu.h"
|
#include "src/delegate/npu/op/abs_npu.h"
|
||||||
|
#include "src/delegate/npu/op/flatten_npu.h"
|
||||||
#include "src/delegate/npu/npu_graph.h"
|
#include "src/delegate/npu/npu_graph.h"
|
||||||
#include "src/delegate/delegate_utils.h"
|
#include "src/delegate/delegate_utils.h"
|
||||||
#include "src/delegate/npu/pass/npu_transform_pass.h"
|
#include "src/delegate/npu/pass/npu_transform_pass.h"
|
||||||
#include "src/delegate/npu/pass/npu_insert_transform_pass.h"
|
#include "src/delegate/npu/pass/npu_insert_transform_pass.h"
|
||||||
#include "src/delegate/npu/pass/npu_fusion_pass.h"
|
#include "src/delegate/npu/pass/npu_fusion_pass.h"
|
||||||
#include "src/delegate/npu/pass/npu_infer_format_pass.h"
|
|
||||||
|
|
||||||
using mindspore::lite::RET_ERROR;
|
using mindspore::lite::RET_ERROR;
|
||||||
using mindspore::lite::RET_OK;
|
using mindspore::lite::RET_OK;
|
||||||
|
@ -97,13 +97,6 @@ Status NPUDelegate::AddPasses() {
|
||||||
return mindspore::kLiteNullptr;
|
return mindspore::kLiteNullptr;
|
||||||
}
|
}
|
||||||
pass_manager_->AddPass(fusion_pass);
|
pass_manager_->AddPass(fusion_pass);
|
||||||
|
|
||||||
auto infer_format_pass = new (std::nothrow) NPUInferFormatPass();
|
|
||||||
if (infer_format_pass == nullptr) {
|
|
||||||
MS_LOG(ERROR) << "New NPUInferFormatPass failed.";
|
|
||||||
return mindspore::kLiteNullptr;
|
|
||||||
}
|
|
||||||
pass_manager_->AddPass(infer_format_pass);
|
|
||||||
return mindspore::kSuccess;
|
return mindspore::kSuccess;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
#include "src/delegate/npu/npu_executor.h"
|
#include "src/delegate/npu/npu_executor.h"
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <set>
|
||||||
#include "include/errorcode.h"
|
#include "include/errorcode.h"
|
||||||
#include "src/delegate/npu/npu_manager.h"
|
#include "src/delegate/npu/npu_manager.h"
|
||||||
#include "src/common/log_adapter.h"
|
#include "src/common/log_adapter.h"
|
||||||
|
@ -73,7 +74,8 @@ bool IsSameShapeTensor(mindspore::MSTensor tensor, const std::shared_ptr<hiai::A
|
||||||
}
|
}
|
||||||
|
|
||||||
int NPUExecutor::Run(const std::vector<mindspore::MSTensor> &in_tensors,
|
int NPUExecutor::Run(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<NPUOp *> &in_ops) {
|
const std::vector<mindspore::MSTensor> &valid_out_tensors,
|
||||||
|
const std::vector<mindspore::MSTensor> &all_out_tensors, const std::vector<NPUOp *> &out_ops) {
|
||||||
hiai::AiContext context;
|
hiai::AiContext context;
|
||||||
for (size_t i = 0; i < npu_input_tensors_.size(); ++i) {
|
for (size_t i = 0; i < npu_input_tensors_.size(); ++i) {
|
||||||
MS_CHECK_TRUE_RET(i < input_relationship_.size() && input_relationship_.at(i) < in_tensors.size(), RET_ERROR);
|
MS_CHECK_TRUE_RET(i < input_relationship_.size() && input_relationship_.at(i) < in_tensors.size(), RET_ERROR);
|
||||||
|
@ -97,19 +99,32 @@ int NPUExecutor::Run(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (npu_output_tensors_.size() != out_tensors.size()) {
|
// if the multi-output op is the graph out op, all of its output tensor will be treat as graph output for om model.
|
||||||
MS_LOG(ERROR) << "The output count is not euqal to ms tensor.";
|
std::set<schema::PrimitiveType> multi_output_list = {schema::PrimitiveType_Split};
|
||||||
|
bool has_multi_output_op = false;
|
||||||
|
for (auto out_op : out_ops) {
|
||||||
|
if (std::find(multi_output_list.begin(), multi_output_list.end(), out_op->type()) != multi_output_list.end()) {
|
||||||
|
has_multi_output_op = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (npu_output_tensors_.size() != all_out_tensors.size() ||
|
||||||
|
(!has_multi_output_op && npu_output_tensors_.size() != valid_out_tensors.size())) {
|
||||||
|
MS_LOG(ERROR) << "The output count (" << npu_output_tensors_.size() << ") is not equal to ms tensor ("
|
||||||
|
<< all_out_tensors.size() << ").";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < npu_output_tensors_.size(); ++i) {
|
for (size_t i = 0; i < npu_output_tensors_.size(); ++i) {
|
||||||
mindspore::MSTensor out_tensor = out_tensors[i];
|
mindspore::MSTensor out_tensor = all_out_tensors[i];
|
||||||
auto data = out_tensor.MutableData();
|
if (std::find(valid_out_tensors.begin(), valid_out_tensors.end(), out_tensor) != valid_out_tensors.end()) {
|
||||||
if (data == nullptr) {
|
auto data = out_tensor.MutableData();
|
||||||
MS_LOG(ERROR) << "For " << model_name_ << ", the output tensor " << out_tensors[i].Name() << " data is nullptr";
|
if (data == nullptr) {
|
||||||
return RET_ERROR;
|
MS_LOG(ERROR) << "For " << model_name_ << ", the output tensor " << out_tensor.Name() << " data is nullptr";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
|
|
||||||
}
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,8 +33,8 @@ class NPUExecutor {
|
||||||
~NPUExecutor();
|
~NPUExecutor();
|
||||||
int Prepare();
|
int Prepare();
|
||||||
|
|
||||||
int Run(const std::vector<mindspore::MSTensor> &in_tensors, const std::vector<mindspore::MSTensor> &out_tensors,
|
int Run(const std::vector<mindspore::MSTensor> &in_tensors, const std::vector<mindspore::MSTensor> &valid_out_tensors,
|
||||||
const std::vector<NPUOp *> &in_ops);
|
const std::vector<mindspore::MSTensor> &all_out_tensors, const std::vector<NPUOp *> &out_ops);
|
||||||
|
|
||||||
void InitInputMappingRelationShip(const std::vector<size_t> &input_index) { input_relationship_ = input_index; }
|
void InitInputMappingRelationShip(const std::vector<size_t> &input_index) { input_relationship_ = input_index; }
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,7 @@ NPUSubGraph::~NPUSubGraph() {
|
||||||
subgraph_input_ops_.clear();
|
subgraph_input_ops_.clear();
|
||||||
subgraph_output_ops_.clear();
|
subgraph_output_ops_.clear();
|
||||||
out_tensor_sorted_.clear();
|
out_tensor_sorted_.clear();
|
||||||
|
all_tensors_from_out_ops_.clear();
|
||||||
for (auto op : op_buffer_) {
|
for (auto op : op_buffer_) {
|
||||||
delete op;
|
delete op;
|
||||||
}
|
}
|
||||||
|
@ -61,11 +62,11 @@ void NPUSubGraph::set_input(mindspore::MSTensor in_tensor, int index) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void NPUSubGraph::set_output(mindspore::MSTensor out_tensor, int index) {
|
void NPUSubGraph::set_output(mindspore::MSTensor out_tensor, int index) {
|
||||||
MS_ASSERT(index < out_tensor_sorted_.size());
|
MS_ASSERT(index < outputs_.size());
|
||||||
auto origin_tensor = outputs_[index];
|
auto origin_tensor = outputs_[index];
|
||||||
for (size_t i = 0; i < out_tensor_sorted_.size(); i++) {
|
for (size_t i = 0; i < all_tensors_from_out_ops_.size(); i++) {
|
||||||
if (out_tensor_sorted_[i] == origin_tensor) {
|
if (all_tensors_from_out_ops_[i] == origin_tensor) {
|
||||||
out_tensor_sorted_[i] = out_tensor;
|
all_tensors_from_out_ops_[i] = out_tensor;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
outputs_[index] = out_tensor;
|
outputs_[index] = out_tensor;
|
||||||
|
@ -146,7 +147,7 @@ std::shared_ptr<domi::ModelBufferData> NPUSubGraph::BuildIRModel() {
|
||||||
return om_model_buff;
|
return om_model_buff;
|
||||||
}
|
}
|
||||||
|
|
||||||
int NPUSubGraph::Execute() { return executor_->Run(inputs(), out_tensor_sorted_, in_ops_); }
|
int NPUSubGraph::Execute() { return executor_->Run(inputs(), outputs(), all_tensors_from_out_ops_, out_ops_); }
|
||||||
|
|
||||||
int NPUSubGraph::BuildNPUInputOp() {
|
int NPUSubGraph::BuildNPUInputOp() {
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
@ -242,12 +243,9 @@ int NPUSubGraph::BuildNPUOutputOp() {
|
||||||
MS_LOG(ERROR) << "Get NPU operators failed.";
|
MS_LOG(ERROR) << "Get NPU operators failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
out_tensor_sorted_.resize(outputs().size());
|
|
||||||
int i = 0;
|
|
||||||
for (auto node : out_ops_) {
|
for (auto node : out_ops_) {
|
||||||
for (const auto &tensor : node->outputs()) {
|
for (const auto &tensor : node->outputs()) {
|
||||||
if (std::find(outputs().begin(), outputs().end(), tensor) != outputs().end())
|
all_tensors_from_out_ops_.emplace_back(tensor);
|
||||||
this->out_tensor_sorted_[i++] = tensor;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (subgraph_output_ops_.empty()) {
|
if (subgraph_output_ops_.empty()) {
|
||||||
|
|
|
@ -73,6 +73,8 @@ class NPUSubGraph : public kernel::Kernel {
|
||||||
|
|
||||||
std::vector<mindspore::MSTensor> out_tensor_sorted_;
|
std::vector<mindspore::MSTensor> out_tensor_sorted_;
|
||||||
|
|
||||||
|
std::vector<mindspore::MSTensor> all_tensors_from_out_ops_;
|
||||||
|
|
||||||
std::vector<ge::Operator *> op_buffer_;
|
std::vector<ge::Operator *> op_buffer_;
|
||||||
|
|
||||||
std::vector<NPUOp *> npu_ops_{};
|
std::vector<NPUOp *> npu_ops_{};
|
||||||
|
|
|
@ -31,18 +31,9 @@ int ConcatNPUOp::Init(const schema::Primitive *primitive, const std::vector<mind
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
axis_ = concat_prim->axis();
|
axis_ = concat_prim->axis();
|
||||||
return RET_OK;
|
auto input_num = in_tensors.size();
|
||||||
}
|
concat_->set_attr_N(input_num);
|
||||||
|
concat_->create_dynamic_input_x(input_num);
|
||||||
int ConcatNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors,
|
|
||||||
const std::vector<ge::Operator *> &npu_inputs) {
|
|
||||||
concat_->set_attr_concat_dim(axis_);
|
|
||||||
concat_->set_attr_N(npu_inputs.size());
|
|
||||||
concat_->create_dynamic_input_x(npu_inputs.size());
|
|
||||||
for (int i = 0; i < npu_inputs.size(); ++i) {
|
|
||||||
concat_->set_dynamic_input_x(i + 1, *npu_inputs[i]);
|
|
||||||
}
|
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -51,8 +42,6 @@ int ConcatNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors
|
||||||
const std::vector<ge::Operator *> &npu_inputs,
|
const std::vector<ge::Operator *> &npu_inputs,
|
||||||
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) {
|
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) {
|
||||||
concat_->set_attr_concat_dim(axis_);
|
concat_->set_attr_concat_dim(axis_);
|
||||||
concat_->set_attr_N(npu_inputs.size());
|
|
||||||
concat_->create_dynamic_input_x(npu_inputs.size());
|
|
||||||
for (auto pair : index2_multi_out_index) {
|
for (auto pair : index2_multi_out_index) {
|
||||||
auto in_op = pair.second.first;
|
auto in_op = pair.second.first;
|
||||||
MS_CHECK_TRUE_RET(in_op != nullptr, RET_ERROR);
|
MS_CHECK_TRUE_RET(in_op != nullptr, RET_ERROR);
|
||||||
|
|
|
@ -39,10 +39,6 @@ class ConcatNPUOp : public NPUOp {
|
||||||
int Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
int Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||||
|
|
||||||
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors,
|
|
||||||
const std::vector<ge::Operator *> &npu_inputs) override;
|
|
||||||
|
|
||||||
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
|
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
|
||||||
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
|
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
|
||||||
|
|
|
@ -93,27 +93,6 @@ int ConvolutionInt8NPUOp::Init(const schema::Primitive *primitive, const std::ve
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ConvolutionInt8NPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors,
|
|
||||||
const std::vector<ge::Operator *> &npu_inputs) {
|
|
||||||
auto ret = InitWeightConst(in_tensors);
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "Set weight and bias for convolution op " << name_ << " failed when running npu";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
conv_->set_input_filter(*weight_);
|
|
||||||
if (in_tensors.size() == CONV_INPUT_SIZE) {
|
|
||||||
ret = InitBiasConst(in_tensors);
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "Set bias for convolution op " << name_ << " failed when running npu";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
conv_->set_input_bias(*bias_);
|
|
||||||
}
|
|
||||||
conv_->set_input_x(*npu_inputs[0]);
|
|
||||||
return RET_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
int ConvolutionInt8NPUOp::SetNPUInputs(
|
int ConvolutionInt8NPUOp::SetNPUInputs(
|
||||||
const std::vector<mindspore::MSTensor> &in_tensors, const std::vector<mindspore::MSTensor> &out_tensors,
|
const std::vector<mindspore::MSTensor> &in_tensors, const std::vector<mindspore::MSTensor> &out_tensors,
|
||||||
const std::vector<ge::Operator *> &npu_inputs,
|
const std::vector<ge::Operator *> &npu_inputs,
|
||||||
|
|
|
@ -37,10 +37,6 @@ class ConvolutionInt8NPUOp : public ConvolutionBaseNPUOp {
|
||||||
int Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
int Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||||
|
|
||||||
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors,
|
|
||||||
const std::vector<ge::Operator *> &npu_inputs) override;
|
|
||||||
|
|
||||||
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
|
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
|
||||||
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
|
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
|
||||||
|
|
|
@ -97,27 +97,6 @@ int ConvolutionNPUOp::Init(const schema::Primitive *primitive, const std::vector
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ConvolutionNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors,
|
|
||||||
const std::vector<ge::Operator *> &npu_inputs) {
|
|
||||||
auto ret = InitWeightConst(in_tensors);
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "Set weight and bias for convolution op " << name_ << " failed when running npu";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
conv_->set_input_filter(*weight_);
|
|
||||||
if (in_tensors.size() == CONV_INPUT_SIZE) {
|
|
||||||
ret = InitBiasConst(in_tensors);
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "Set bias for convolution op " << name_ << " failed when running npu";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
conv_->set_input_bias(*bias_);
|
|
||||||
}
|
|
||||||
conv_->set_input_x(*npu_inputs[0]);
|
|
||||||
return RET_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
int ConvolutionNPUOp::SetNPUInputs(
|
int ConvolutionNPUOp::SetNPUInputs(
|
||||||
const std::vector<mindspore::MSTensor> &in_tensors, const std::vector<mindspore::MSTensor> &out_tensors,
|
const std::vector<mindspore::MSTensor> &in_tensors, const std::vector<mindspore::MSTensor> &out_tensors,
|
||||||
const std::vector<ge::Operator *> &npu_inputs,
|
const std::vector<ge::Operator *> &npu_inputs,
|
||||||
|
|
|
@ -37,10 +37,6 @@ class ConvolutionNPUOp : public ConvolutionBaseNPUOp {
|
||||||
int Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
int Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||||
|
|
||||||
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors,
|
|
||||||
const std::vector<ge::Operator *> &npu_inputs) override;
|
|
||||||
|
|
||||||
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
|
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
|
||||||
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
|
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
|
||||||
|
|
|
@ -33,17 +33,25 @@ int EltwiseNPUOp::Init(const schema::Primitive *primitive, const std::vector<min
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
eltwise_->set_attr_mode(ConverterToNPUEltwiseMode(eltwise_prim->mode()));
|
eltwise_->set_attr_mode(ConverterToNPUEltwiseMode(eltwise_prim->mode()));
|
||||||
int size = in_tensors.size();
|
auto input_num = in_tensors.size();
|
||||||
eltwise_->create_dynamic_input_x(size);
|
eltwise_->create_dynamic_input_x(input_num);
|
||||||
eltwise_->set_attr_N(size);
|
eltwise_->set_attr_N(input_num);
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int EltwiseNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
int EltwiseNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors,
|
const std::vector<mindspore::MSTensor> &out_tensors,
|
||||||
const std::vector<ge::Operator *> &npu_inputs) {
|
const std::vector<ge::Operator *> &npu_inputs,
|
||||||
|
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) {
|
||||||
|
for (auto pair : index2_multi_out_index) {
|
||||||
|
auto in_op = pair.second.first;
|
||||||
|
MS_CHECK_TRUE_RET(in_op != nullptr, RET_ERROR);
|
||||||
|
eltwise_->SetInput(pair.first, *in_op, pair.second.second);
|
||||||
|
}
|
||||||
for (int i = 0; i < npu_inputs.size(); ++i) {
|
for (int i = 0; i < npu_inputs.size(); ++i) {
|
||||||
eltwise_->set_dynamic_input_x(i + 1, *npu_inputs[i]);
|
if (index2_multi_out_index.find(i) == index2_multi_out_index.end()) {
|
||||||
|
eltwise_->SetInput(i, *npu_inputs[i], 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,8 @@
|
||||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_ELTWISE_NPU_H_
|
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_ELTWISE_NPU_H_
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <unordered_map>
|
||||||
#include "include/graph/op/all_ops.h"
|
#include "include/graph/op/all_ops.h"
|
||||||
#include "src/delegate/npu/op/npu_op.h"
|
#include "src/delegate/npu/op/npu_op.h"
|
||||||
|
|
||||||
|
@ -39,8 +41,8 @@ class EltwiseNPUOp : public NPUOp {
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||||
|
|
||||||
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors,
|
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
|
||||||
const std::vector<ge::Operator *> &npu_inputs) override;
|
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
|
||||||
|
|
||||||
ge::Operator *GetNPUOp() override;
|
ge::Operator *GetNPUOp() override;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "src/delegate/npu/op/flatten_npu.h"
|
||||||
|
#include "include/graph/op/all_ops.h"
|
||||||
|
#include "src/delegate/npu/npu_converter_utils.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
int FlattenNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
|
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||||
|
if (out_tensors.at(0).Shape().size() != C2NUM) {
|
||||||
|
MS_LOG(WARNING) << "The output tensor can only be flatten to 2 dimension.";
|
||||||
|
return RET_NOT_SUPPORT;
|
||||||
|
}
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int FlattenNPUOp::Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
|
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||||
|
flatten_ = new (std::nothrow) hiai::op::Flatten(name_);
|
||||||
|
if (flatten_ == nullptr) {
|
||||||
|
MS_LOG(ERROR) << name_ << " op is nullptr";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int FlattenNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
|
const std::vector<mindspore::MSTensor> &out_tensors,
|
||||||
|
const std::vector<ge::Operator *> &npu_inputs) {
|
||||||
|
flatten_->set_input_x(*npu_inputs[0]);
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
ge::Operator *FlattenNPUOp::GetNPUOp() { return this->flatten_; }
|
||||||
|
|
||||||
|
FlattenNPUOp::~FlattenNPUOp() {
|
||||||
|
if (flatten_ != nullptr) {
|
||||||
|
delete flatten_;
|
||||||
|
flatten_ = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,48 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_FLATTEN_NPU_H_
|
||||||
|
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_FLATTEN_NPU_H_
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include "include/graph/op/all_ops.h"
|
||||||
|
#include "src/delegate/npu/op/npu_op.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
class FlattenNPUOp : public NPUOp {
|
||||||
|
public:
|
||||||
|
FlattenNPUOp(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
|
const std::vector<mindspore::MSTensor> &out_tensors, std::string name)
|
||||||
|
: NPUOp(primitive, in_tensors, out_tensors, name) {}
|
||||||
|
~FlattenNPUOp() override;
|
||||||
|
|
||||||
|
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
|
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||||
|
|
||||||
|
int Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
|
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||||
|
|
||||||
|
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
|
const std::vector<mindspore::MSTensor> &out_tensors,
|
||||||
|
const std::vector<ge::Operator *> &npu_inputs) override;
|
||||||
|
|
||||||
|
ge::Operator *GetNPUOp() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
hiai::op::Flatten *flatten_ = nullptr;
|
||||||
|
};
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_FLATTEN_NPU_H_
|
|
@ -55,9 +55,6 @@ int MatMulNPUOp::Init(const schema::Primitive *primitive, const std::vector<mind
|
||||||
MS_LOG(ERROR) << "New matmul npu operator for op " << name_ << " failed.";
|
MS_LOG(ERROR) << "New matmul npu operator for op " << name_ << " failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
if (in_tensors.size() == MATMUL_INPUT_SIZE) {
|
|
||||||
has_bias_ = true;
|
|
||||||
}
|
|
||||||
auto matmul_prim = primitive->value_as_MatMulFusion();
|
auto matmul_prim = primitive->value_as_MatMulFusion();
|
||||||
if (matmul_prim == nullptr) {
|
if (matmul_prim == nullptr) {
|
||||||
MS_LOG(ERROR) << "Get null primitive value for op ." << name_;
|
MS_LOG(ERROR) << "Get null primitive value for op ." << name_;
|
||||||
|
@ -66,6 +63,15 @@ int MatMulNPUOp::Init(const schema::Primitive *primitive, const std::vector<mind
|
||||||
matmul_->set_attr_transpose_x1(matmul_prim->transpose_a());
|
matmul_->set_attr_transpose_x1(matmul_prim->transpose_a());
|
||||||
matmul_->set_attr_transpose_x2(matmul_prim->transpose_b());
|
matmul_->set_attr_transpose_x2(matmul_prim->transpose_b());
|
||||||
act_type_ = matmul_prim->activation_type();
|
act_type_ = matmul_prim->activation_type();
|
||||||
|
|
||||||
|
if (in_tensors.size() == MATMUL_INPUT_SIZE) {
|
||||||
|
has_bias_ = true;
|
||||||
|
add_op_ = new (std::nothrow) hiai::op::Add(name_ + "_add");
|
||||||
|
if (add_op_ == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "new add op failed.";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -75,11 +81,6 @@ int MatMulNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors
|
||||||
matmul_->set_input_x1(*npu_inputs[0]);
|
matmul_->set_input_x1(*npu_inputs[0]);
|
||||||
matmul_->set_input_x2(*npu_inputs[1]);
|
matmul_->set_input_x2(*npu_inputs[1]);
|
||||||
if (has_bias_) {
|
if (has_bias_) {
|
||||||
add_op_ = new (std::nothrow) hiai::op::Add(name_ + "_add");
|
|
||||||
if (add_op_ == nullptr) {
|
|
||||||
MS_LOG(ERROR) << "new add op failed.";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
add_op_->set_input_x1(*matmul_);
|
add_op_->set_input_x1(*matmul_);
|
||||||
auto bias_shape = in_tensors[BIAS_INDEX].Shape();
|
auto bias_shape = in_tensors[BIAS_INDEX].Shape();
|
||||||
auto bias_tensor = ConverterToNPUTensor(in_tensors[BIAS_INDEX]);
|
auto bias_tensor = ConverterToNPUTensor(in_tensors[BIAS_INDEX]);
|
||||||
|
@ -104,7 +105,7 @@ int MatMulNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors
|
||||||
}
|
}
|
||||||
if (act_type_ != schema::ActivationType_NO_ACTIVATION) {
|
if (act_type_ != schema::ActivationType_NO_ACTIVATION) {
|
||||||
int ret = RET_ERROR;
|
int ret = RET_ERROR;
|
||||||
if (has_bias_ == true) {
|
if (has_bias_) {
|
||||||
ret = SetActivation(add_op_);
|
ret = SetActivation(add_op_);
|
||||||
} else {
|
} else {
|
||||||
ret = SetActivation(matmul_);
|
ret = SetActivation(matmul_);
|
||||||
|
|
|
@ -133,7 +133,8 @@ NPUOp *GetNPUOp(const schema::Primitive *primitive, const std::vector<mindspore:
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::set<schema::PrimitiveType> int32_lists = {schema::PrimitiveType_Cast, schema::PrimitiveType_StridedSlice};
|
std::set<schema::PrimitiveType> int32_lists = {schema::PrimitiveType_Cast, schema::PrimitiveType_StridedSlice,
|
||||||
|
schema::PrimitiveType_Reshape, schema::PrimitiveType_ReduceFusion};
|
||||||
auto support_int32 = in_tensors[0].DataType() == DataType::kNumberTypeInt32 &&
|
auto support_int32 = in_tensors[0].DataType() == DataType::kNumberTypeInt32 &&
|
||||||
find(int32_lists.begin(), int32_lists.end(), primitive->value_type()) != int32_lists.end();
|
find(int32_lists.begin(), int32_lists.end(), primitive->value_type()) != int32_lists.end();
|
||||||
if (in_tensors[0].DataType() != DataType::kNumberTypeFloat32 &&
|
if (in_tensors[0].DataType() != DataType::kNumberTypeFloat32 &&
|
||||||
|
|
|
@ -32,7 +32,7 @@ int ReduceNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector
|
||||||
return RET_NOT_SUPPORT;
|
return RET_NOT_SUPPORT;
|
||||||
}
|
}
|
||||||
reduce_mode_ = reduce_prim->mode();
|
reduce_mode_ = reduce_prim->mode();
|
||||||
if (reduce_mode_ != schema::ReduceMode_ReduceMean) {
|
if (reduce_mode_ != schema::ReduceMode_ReduceMean && reduce_mode_ != schema::ReduceMode_ReduceSum) {
|
||||||
MS_LOG(WARNING) << "Npu does not support reduce mode " << reduce_prim->mode() << " for op " << name_;
|
MS_LOG(WARNING) << "Npu does not support reduce mode " << reduce_prim->mode() << " for op " << name_;
|
||||||
return RET_NOT_SUPPORT;
|
return RET_NOT_SUPPORT;
|
||||||
}
|
}
|
||||||
|
@ -58,6 +58,14 @@ int ReduceNPUOp::Init(const schema::Primitive *primitive, const std::vector<mind
|
||||||
}
|
}
|
||||||
reduce_mean->set_attr_keep_dims(reduce_prim->keep_dims());
|
reduce_mean->set_attr_keep_dims(reduce_prim->keep_dims());
|
||||||
reduce_ = reduce_mean;
|
reduce_ = reduce_mean;
|
||||||
|
} else if (reduce_mode_ == schema::ReduceMode_ReduceSum) {
|
||||||
|
auto reduce_sum = new (std::nothrow) hiai::op::ReduceSum(name_);
|
||||||
|
if (reduce_sum == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "New reduce operator for op " << name_ << " failed.";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
reduce_sum->set_attr_keep_dims(reduce_prim->keep_dims());
|
||||||
|
reduce_ = reduce_sum;
|
||||||
} else {
|
} else {
|
||||||
MS_LOG(ERROR) << "Npu does not support reduce mode " << reduce_prim->mode() << " for op " << name_;
|
MS_LOG(ERROR) << "Npu does not support reduce mode " << reduce_prim->mode() << " for op " << name_;
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
|
@ -71,6 +79,9 @@ int ReduceNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors
|
||||||
if (reduce_mode_ == schema::ReduceMode_ReduceMean) {
|
if (reduce_mode_ == schema::ReduceMode_ReduceMean) {
|
||||||
auto reduce_mean = reinterpret_cast<hiai::op::ReduceMean *>(reduce_);
|
auto reduce_mean = reinterpret_cast<hiai::op::ReduceMean *>(reduce_);
|
||||||
reduce_mean->set_input_x(*npu_inputs[0]).set_input_axes(*npu_inputs[1]);
|
reduce_mean->set_input_x(*npu_inputs[0]).set_input_axes(*npu_inputs[1]);
|
||||||
|
} else if (reduce_mode_ == schema::ReduceMode_ReduceSum) {
|
||||||
|
auto reduce_sum = reinterpret_cast<hiai::op::ReduceSum *>(reduce_);
|
||||||
|
reduce_sum->set_input_x(*npu_inputs[0]).set_input_axes(*npu_inputs[1]);
|
||||||
}
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,13 +20,18 @@
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
int ReshapeNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
int ReshapeNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||||
if (in_tensors.size() != 2) {
|
if (in_tensors.size() != kInputSize1) {
|
||||||
MS_LOG(WARNING) << "Npu op should have 2 input tensors.";
|
MS_LOG(WARNING) << "NPU op should have 2 input tensors.";
|
||||||
return RET_NOT_SUPPORT;
|
return RET_NOT_SUPPORT;
|
||||||
}
|
}
|
||||||
auto shape_tensor = in_tensors.at(1);
|
auto shape_tensor = in_tensors.at(1);
|
||||||
if (shape_tensor.Data() == nullptr) {
|
if (shape_tensor.Data() == nullptr) {
|
||||||
MS_LOG(WARNING) << "Npu reshape op only supports const shape.";
|
MS_LOG(WARNING) << "NPU Reshape op only supports const shape.";
|
||||||
|
return RET_NOT_SUPPORT;
|
||||||
|
}
|
||||||
|
if (shape_tensor.Shape().size() > 1 || shape_tensor.ElementNum() > NPU_SHAPE_SIZE) {
|
||||||
|
MS_LOG(WARNING) << "For NPU Reshape op, the shape tensor should be a one-dimension tensor and its element number "
|
||||||
|
"should be less than 4.";
|
||||||
return RET_NOT_SUPPORT;
|
return RET_NOT_SUPPORT;
|
||||||
}
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
|
@ -42,14 +47,6 @@ int ReshapeNPUOp::Init(const schema::Primitive *primitive, const std::vector<min
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ReshapeNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors,
|
|
||||||
const std::vector<ge::Operator *> &npu_inputs) {
|
|
||||||
reshape_->set_input_x(*npu_inputs[0]);
|
|
||||||
reshape_->set_input_shape(*npu_inputs[1]);
|
|
||||||
return RET_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
int ReshapeNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
int ReshapeNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors,
|
const std::vector<mindspore::MSTensor> &out_tensors,
|
||||||
const std::vector<ge::Operator *> &npu_inputs,
|
const std::vector<ge::Operator *> &npu_inputs,
|
||||||
|
|
|
@ -37,10 +37,6 @@ class ReshapeNPUOp : public NPUOp {
|
||||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||||
|
|
||||||
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors,
|
|
||||||
const std::vector<ge::Operator *> &npu_inputs) override;
|
|
||||||
|
|
||||||
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
int SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
|
const std::vector<mindspore::MSTensor> &out_tensors, const std::vector<ge::Operator *> &npu_inputs,
|
||||||
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
|
const std::unordered_map<int, std::pair<ge::Operator *, int>> &index2_multi_out_index) override;
|
||||||
|
|
|
@ -30,88 +30,122 @@ int ScaleNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector<
|
||||||
MS_LOG(ERROR) << "Get null primitive value for op: " << name_;
|
MS_LOG(ERROR) << "Get null primitive value for op: " << name_;
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
auto input_dims = in_tensors.at(INPUT_INDEX).Shape().size();
|
||||||
axis_ = scale_prim->axis();
|
axis_ = scale_prim->axis();
|
||||||
if (axis_ < 0) {
|
if (axis_ < 0) {
|
||||||
axis_ = axis_ + in_tensors[INPUT_INDEX].Shape().size();
|
axis_ = axis_ + input_dims;
|
||||||
}
|
}
|
||||||
if (axis_ != NHWC_C && axis_ != NCHW_C) {
|
if (axis_ != NHWC_C && axis_ != NCHW_C) {
|
||||||
if (in_tensors.size() <= BIAS_INDEX) {
|
if (in_tensors.size() <= BIAS_INDEX) {
|
||||||
MS_LOG(INFO) << "Npu Scale op does not support axis: " << axis_ << ", trying to convert to Mul op.";
|
MS_LOG(INFO) << "Npu Scale op does not support axis: " << axis_ << ", trying to convert to Mul op.";
|
||||||
use_mul_ = true;
|
use_mul_ = true;
|
||||||
|
return RET_OK;
|
||||||
} else {
|
} else {
|
||||||
MS_LOG(WARNING) << "Npu Scale axis attr only support 1 or channel, now is " << axis_;
|
MS_LOG(WARNING) << "Npu Scale axis attr only support 1 or channel, now is " << axis_;
|
||||||
return RET_NOT_SUPPORT;
|
return RET_NOT_SUPPORT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (input_dims < NPU_SHAPE_SIZE) {
|
||||||
|
need_expand_ = true;
|
||||||
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ScaleNPUOp::Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
int ScaleNPUOp::Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||||
if (!use_mul_) {
|
|
||||||
// note that Scale only support the default axis(i.e., 1), setting axis is meaningless.
|
|
||||||
op_ = new (std::nothrow) hiai::op::Scale(name_);
|
|
||||||
} else {
|
|
||||||
op_ = new (std::nothrow) hiai::op::Mul(name_);
|
|
||||||
}
|
|
||||||
if (op_ == nullptr) {
|
|
||||||
MS_LOG(ERROR) << name_ << " op is nullptr";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto scale_prim = primitive->value_as_ScaleFusion();
|
auto scale_prim = primitive->value_as_ScaleFusion();
|
||||||
if (scale_prim == nullptr) {
|
if (scale_prim == nullptr) {
|
||||||
MS_LOG(ERROR) << "Get null primitive value for op ." << name_;
|
MS_LOG(ERROR) << "Get null primitive value for op ." << name_;
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (use_mul_) {
|
||||||
|
mul_ = new (std::nothrow) hiai::op::Mul(name_ + "_mul");
|
||||||
|
if (mul_ == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "New Mul npu operator for op " << name_ << "_mul failed.";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
scale_ops_.emplace_back(mul_);
|
||||||
|
} else {
|
||||||
|
// note that Scale only support the default axis(i.e., 1), setting axis is meaningless.
|
||||||
|
scale_ = new (std::nothrow) hiai::op::Scale(name_);
|
||||||
|
if (scale_ == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "New Scale npu operator for op " << name_ << " failed.";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
scale_ops_.emplace_back(scale_);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (need_expand_) {
|
||||||
|
out_reshape_ = new (std::nothrow) hiai::op::Reshape(name_ + "_restore");
|
||||||
|
if (out_reshape_ == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "New Reshape npu operator for op " << name_ << "_restore failed.";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
scale_ops_.emplace_back(out_reshape_);
|
||||||
|
}
|
||||||
|
|
||||||
act_type_ = scale_prim->activation_type();
|
act_type_ = scale_prim->activation_type();
|
||||||
if (act_type_ != schema::ActivationType_NO_ACTIVATION) {
|
if (act_type_ != schema::ActivationType_NO_ACTIVATION) {
|
||||||
auto ret = SetActivation(op_);
|
act_ = new (std::nothrow) hiai::op::Activation(name_ + "_act");
|
||||||
if (ret != RET_OK) {
|
if (act_ == nullptr) {
|
||||||
MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed.";
|
MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed.";
|
||||||
return ret;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
}
|
scale_ops_.emplace_back(act_);
|
||||||
return RET_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
int ScaleNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
|
||||||
const std::vector<mindspore::MSTensor> &out_tensors,
|
|
||||||
const std::vector<ge::Operator *> &npu_inputs) {
|
|
||||||
MS_ASSERT(in_tensors.size() > SCALE_INDEX);
|
|
||||||
if (use_mul_) {
|
|
||||||
auto ret = ConvertScaleToMul(npu_inputs, op_, in_tensors);
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "Convert Scale to Mul failed, op name: " << name_;
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto scale_op = reinterpret_cast<hiai::op::Scale *>(op_);
|
|
||||||
scale_op->set_input_x(*npu_inputs.at(INPUT_INDEX));
|
|
||||||
scale_op->set_input_scale(*npu_inputs.at(SCALE_INDEX));
|
|
||||||
if (in_tensors.size() > BIAS_INDEX && in_tensors[BIAS_INDEX] != nullptr) {
|
|
||||||
scale_op->set_input_bias(*npu_inputs.at(BIAS_INDEX));
|
|
||||||
}
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
ge::Operator *ScaleNPUOp::GetNPUOp() {
|
ge::Operator *ScaleNPUOp::GetNPUOp() {
|
||||||
if (act_type_ == schema::ActivationType_NO_ACTIVATION) {
|
if (act_type_ != schema::ActivationType_NO_ACTIVATION) {
|
||||||
return op_;
|
|
||||||
} else {
|
|
||||||
return act_;
|
return act_;
|
||||||
|
} else if (use_mul_) {
|
||||||
|
return mul_;
|
||||||
|
} else if (need_expand_) {
|
||||||
|
return out_reshape_;
|
||||||
|
} else {
|
||||||
|
return scale_;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int ScaleNPUOp::SetActivation(const ge::Operator *input) {
|
int ScaleNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||||
act_ = new (std::nothrow) hiai::op::Activation(name_ + "_act");
|
const std::vector<mindspore::MSTensor> &out_tensors,
|
||||||
if (act_ == nullptr) {
|
const std::vector<ge::Operator *> &npu_inputs) {
|
||||||
MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed.";
|
if (use_mul_) {
|
||||||
return RET_ERROR;
|
auto ret = ConvertScaleToMul(npu_inputs, in_tensors);
|
||||||
|
if (ret != RET_OK) {
|
||||||
|
MS_LOG(ERROR) << "Convert Scale to Mul failed, op name: " << name_;
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
auto ret = Adopt4DScale(npu_inputs, in_tensors);
|
||||||
|
if (ret != RET_OK) {
|
||||||
|
MS_LOG(ERROR) << "Adopt 4D Scale op failed, op name: " << name_;
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
act_->set_input_x(*input);
|
if (act_type_ != schema::ActivationType_NO_ACTIVATION) {
|
||||||
|
auto ret = SetActivation();
|
||||||
|
if (ret != RET_OK) {
|
||||||
|
MS_LOG(ERROR) << "Set Activation failed, op name: " << name_;
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ScaleNPUOp::SetActivation() {
|
||||||
|
ge::Operator *act_input = nullptr;
|
||||||
|
if (use_mul_) {
|
||||||
|
act_input = mul_;
|
||||||
|
} else if (need_expand_) {
|
||||||
|
act_input = out_reshape_;
|
||||||
|
} else {
|
||||||
|
act_input = scale_;
|
||||||
|
}
|
||||||
|
MS_CHECK_TRUE_MSG(act_input != nullptr, RET_ERROR, "Scale activation input is nullptr.");
|
||||||
|
act_->set_input_x(*act_input);
|
||||||
auto act_mode = ConverterToNPUActivationMode(act_type_);
|
auto act_mode = ConverterToNPUActivationMode(act_type_);
|
||||||
if (act_mode == ACTIVATION_INVALID) {
|
if (act_mode == ACTIVATION_INVALID) {
|
||||||
MS_LOG(ERROR) << "Unsupported activation type for scale op " << name_;
|
MS_LOG(ERROR) << "Unsupported activation type for scale op " << name_;
|
||||||
|
@ -121,69 +155,138 @@ int ScaleNPUOp::SetActivation(const ge::Operator *input) {
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ScaleNPUOp::ConvertScaleToMul(const std::vector<ge::Operator *> &npu_inputs, ge::Operator *cur_op,
|
int ScaleNPUOp::ConvertScaleToMul(const std::vector<ge::Operator *> &npu_inputs,
|
||||||
const std::vector<mindspore::MSTensor> &in_tensors) {
|
const std::vector<mindspore::MSTensor> &in_tensors) {
|
||||||
auto input_shape = in_tensors[INPUT_INDEX].Shape();
|
auto input_shape = in_tensors.at(INPUT_INDEX).Shape();
|
||||||
auto scale_shape = in_tensors[SCALE_INDEX].Shape();
|
auto scale_shape = in_tensors.at(SCALE_INDEX).Shape();
|
||||||
auto mul_op = reinterpret_cast<hiai::op::Mul *>(cur_op);
|
mul_->set_input_x1(*npu_inputs.at(INPUT_INDEX));
|
||||||
mul_op->set_input_x1(*npu_inputs.at(INPUT_INDEX));
|
|
||||||
if (input_shape.size() == scale_shape.size()) {
|
if (input_shape.size() == scale_shape.size()) {
|
||||||
mul_op->set_input_x2(*npu_inputs.at(SCALE_INDEX));
|
mul_->set_input_x2(*npu_inputs.at(SCALE_INDEX));
|
||||||
} else {
|
} else {
|
||||||
int valid_shape[NPU_SHAPE_SIZE] = {1, 1, 1, 1};
|
int64_t valid_dims = input_shape.size();
|
||||||
|
std::vector<int> valid_shape(valid_dims, 1);
|
||||||
for (size_t i = 0; i < scale_shape.size(); i++) {
|
for (size_t i = 0; i < scale_shape.size(); i++) {
|
||||||
valid_shape[axis_ + i] = static_cast<int>(scale_shape[i]);
|
valid_shape[axis_ + i] = static_cast<int>(scale_shape[i]);
|
||||||
}
|
}
|
||||||
reshape_ = new (std::nothrow) hiai::op::Reshape(name_ + "_reshape");
|
auto reshape = new (std::nothrow) hiai::op::Reshape(name_ + "_mul_reshape");
|
||||||
if (reshape_ == nullptr) {
|
if (reshape == nullptr) {
|
||||||
MS_LOG(ERROR) << "New Reshape npu operator for op " << name_ << "_reshape failed.";
|
MS_LOG(ERROR) << "New Reshape npu operator for op " << name_ << "_mul_reshape failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
std::shared_ptr<ge::Tensor> shape_tensor = std::make_shared<ge::Tensor>();
|
scale_ops_.emplace_back(reshape);
|
||||||
if (shape_tensor == nullptr) {
|
auto valid_data_ptr = reinterpret_cast<const uint8_t *>(valid_shape.data());
|
||||||
MS_LOG(ERROR) << "new shape_tensor failed.";
|
auto shape = GetNPUConst<int>(valid_data_ptr, {valid_dims}, ge::DT_INT32, name_ + "_mul_expand_shape");
|
||||||
|
if (shape == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "Get shape const for op " << name_ << "_mul failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
ge::TensorDesc tensor_desc(ge::Shape({NPU_SHAPE_SIZE}), ge::FORMAT_ND, ge::DT_INT32);
|
scale_ops_.emplace_back(shape);
|
||||||
shape_tensor->SetTensorDesc(tensor_desc);
|
reshape->set_input_x(*npu_inputs.at(SCALE_INDEX));
|
||||||
shape_tensor->SetData(reinterpret_cast<const uint8_t *>(valid_shape), NPU_SHAPE_SIZE * sizeof(int));
|
reshape->set_input_shape(*shape);
|
||||||
shape_ = new (std::nothrow) hiai::op::Const(name_ + "_reshape_1");
|
mul_->set_input_x2(*reshape);
|
||||||
if (shape_ == nullptr) {
|
|
||||||
MS_LOG(ERROR) << "New shape const for op " << name_ << " failed.";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
shape_->set_attr_value(shape_tensor);
|
|
||||||
reshape_->set_input_x(*npu_inputs.at(SCALE_INDEX));
|
|
||||||
reshape_->set_input_shape(*shape_);
|
|
||||||
mul_op->set_input_x2(*reshape_);
|
|
||||||
}
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ScaleNPUOp::Adopt4DScale(const std::vector<ge::Operator *> &npu_inputs,
|
||||||
|
const std::vector<mindspore::MSTensor> &in_tensors) {
|
||||||
|
MS_ASSERT(scale_ != nullptr);
|
||||||
|
// handle input
|
||||||
|
auto org_input_tensor = in_tensors.at(INPUT_INDEX);
|
||||||
|
ge::Operator *actual_input = npu_inputs.at(INPUT_INDEX);
|
||||||
|
std::vector<int64_t> org_input_shape = org_input_tensor.Shape();
|
||||||
|
if (need_expand_) {
|
||||||
|
actual_input = ChangeDims(npu_inputs.at(INPUT_INDEX), org_input_shape, name_ + "_expand_input", true);
|
||||||
|
if (actual_input == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "Change Scale op input dims failed.";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
scale_->set_input_x(*actual_input);
|
||||||
|
|
||||||
|
// handle scale, note that the scale axis can only be 1.
|
||||||
|
auto org_scale_tensor = in_tensors.at(SCALE_INDEX);
|
||||||
|
ge::Operator *actual_scale = npu_inputs.at(SCALE_INDEX);
|
||||||
|
if (org_scale_tensor.Shape().size() == DIMENSION_2D) {
|
||||||
|
std::vector<int64_t> expand_scale_shape = org_scale_tensor.Shape();
|
||||||
|
expand_scale_shape.emplace_back(1);
|
||||||
|
actual_scale = ChangeDims(npu_inputs.at(SCALE_INDEX), expand_scale_shape, name_ + "_expand_scale");
|
||||||
|
if (actual_scale == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "Change Scale op scale dims failed.";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
scale_->set_input_scale(*actual_scale);
|
||||||
|
|
||||||
|
// handle bias
|
||||||
|
if (in_tensors.size() > BIAS_INDEX) {
|
||||||
|
auto org_bias_tensor = in_tensors.at(BIAS_INDEX);
|
||||||
|
ge::Operator *actual_bias = npu_inputs.at(BIAS_INDEX);
|
||||||
|
if (org_bias_tensor.Shape().size() == DIMENSION_2D) {
|
||||||
|
std::vector<int64_t> expand_bias_shape = org_bias_tensor.Shape();
|
||||||
|
expand_bias_shape.emplace_back(1);
|
||||||
|
actual_bias = ChangeDims(npu_inputs.at(BIAS_INDEX), expand_bias_shape, name_ + "_expand_bias");
|
||||||
|
if (actual_bias == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "Change Scale op bias dims failed.";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
scale_->set_input_bias(*actual_bias);
|
||||||
|
}
|
||||||
|
|
||||||
|
// restore to origin input shape
|
||||||
|
if (need_expand_) {
|
||||||
|
int64_t dims = org_input_shape.size();
|
||||||
|
std::vector<int> valid_shape;
|
||||||
|
for (int i = 0; i < dims; i++) {
|
||||||
|
valid_shape.emplace_back(static_cast<int>(org_input_shape.at(i)));
|
||||||
|
}
|
||||||
|
auto valid_data_ptr = reinterpret_cast<const uint8_t *>(valid_shape.data());
|
||||||
|
auto shape = GetNPUConst<int>(valid_data_ptr, {dims}, ge::DT_INT32, name_ + "_restore_shape");
|
||||||
|
if (shape == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "Get NPU Const for shape restoration failed.";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
scale_ops_.emplace_back(shape);
|
||||||
|
out_reshape_->set_input_x(*scale_);
|
||||||
|
out_reshape_->set_input_shape(*shape);
|
||||||
|
}
|
||||||
|
return RET_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
ge::Operator *ScaleNPUOp::ChangeDims(const ge::Operator *input, std::vector<int64_t> dst_shape, std::string name,
|
||||||
|
bool need_expand_4d) {
|
||||||
|
MS_ASSERT(input != nullptr);
|
||||||
|
auto reshape = new (std::nothrow) hiai::op::Reshape(name);
|
||||||
|
if (reshape == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "New Reshape NPU operator failed.";
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
scale_ops_.emplace_back(reshape);
|
||||||
|
MS_CHECK_LE(dst_shape.size(), NPU_SHAPE_SIZE, nullptr);
|
||||||
|
int64_t actual_dim = need_expand_4d ? NPU_SHAPE_SIZE : dst_shape.size();
|
||||||
|
std::vector<int> valid_shape(actual_dim, 1);
|
||||||
|
for (int i = 0; i < dst_shape.size(); i++) {
|
||||||
|
valid_shape[i] = static_cast<int>(dst_shape.at(i));
|
||||||
|
}
|
||||||
|
auto valid_data_ptr = reinterpret_cast<const uint8_t *>(valid_shape.data());
|
||||||
|
auto shape = GetNPUConst<int>(valid_data_ptr, {actual_dim}, ge::DT_INT32, name_ + "_shape");
|
||||||
|
if (shape == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "Get NPU Const for shape restoration failed.";
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
scale_ops_.emplace_back(shape);
|
||||||
|
reshape->set_input_x(*input);
|
||||||
|
reshape->set_input_shape(*shape);
|
||||||
|
return reshape;
|
||||||
|
}
|
||||||
|
|
||||||
ScaleNPUOp::~ScaleNPUOp() {
|
ScaleNPUOp::~ScaleNPUOp() {
|
||||||
if (op_ != nullptr) {
|
for (auto op : scale_ops_) {
|
||||||
delete op_;
|
if (op != nullptr) {
|
||||||
op_ = nullptr;
|
delete op;
|
||||||
}
|
op = nullptr;
|
||||||
if (scale_ != nullptr) {
|
}
|
||||||
delete scale_;
|
|
||||||
scale_ = nullptr;
|
|
||||||
}
|
|
||||||
if (bias_ != nullptr) {
|
|
||||||
delete bias_;
|
|
||||||
bias_ = nullptr;
|
|
||||||
}
|
|
||||||
if (act_ != nullptr) {
|
|
||||||
delete act_;
|
|
||||||
act_ = nullptr;
|
|
||||||
}
|
|
||||||
if (reshape_ != nullptr) {
|
|
||||||
delete reshape_;
|
|
||||||
reshape_ = nullptr;
|
|
||||||
}
|
|
||||||
if (shape_ != nullptr) {
|
|
||||||
delete shape_;
|
|
||||||
shape_ = nullptr;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -46,20 +46,25 @@ class ScaleNPUOp : public NPUOp {
|
||||||
int GetAxis() { return axis_; }
|
int GetAxis() { return axis_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int SetActivation(const ge::Operator *input);
|
int SetActivation();
|
||||||
|
|
||||||
int ConvertScaleToMul(const std::vector<ge::Operator *> &npu_inputs, ge::Operator *cur_op,
|
int ConvertScaleToMul(const std::vector<ge::Operator *> &npu_inputs,
|
||||||
const std::vector<mindspore::MSTensor> &in_tensors);
|
const std::vector<mindspore::MSTensor> &in_tensors);
|
||||||
|
|
||||||
|
int Adopt4DScale(const std::vector<ge::Operator *> &npu_inputs, const std::vector<mindspore::MSTensor> &in_tensors);
|
||||||
|
|
||||||
|
ge::Operator *ChangeDims(const ge::Operator *input, std::vector<int64_t> dst_shape, std::string name,
|
||||||
|
bool need_expand_4d = false);
|
||||||
|
|
||||||
int axis_ = 0;
|
int axis_ = 0;
|
||||||
bool use_mul_ = false;
|
bool use_mul_ = false;
|
||||||
|
bool need_expand_ = false;
|
||||||
schema::ActivationType act_type_ = schema::ActivationType_NO_ACTIVATION;
|
schema::ActivationType act_type_ = schema::ActivationType_NO_ACTIVATION;
|
||||||
ge::Operator *op_ = nullptr;
|
hiai::op::Reshape *out_reshape_ = nullptr;
|
||||||
hiai::op::Reshape *reshape_ = nullptr;
|
hiai::op::Scale *scale_ = nullptr;
|
||||||
hiai::op::Const *scale_ = nullptr;
|
hiai::op::Mul *mul_ = nullptr;
|
||||||
hiai::op::Const *bias_ = nullptr;
|
|
||||||
hiai::op::Const *shape_ = nullptr;
|
|
||||||
hiai::op::Activation *act_ = nullptr;
|
hiai::op::Activation *act_ = nullptr;
|
||||||
|
std::vector<ge::Operator *> scale_ops_ = {};
|
||||||
};
|
};
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_SCALE_NPU_H_
|
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_SCALE_NPU_H_
|
||||||
|
|
|
@ -32,14 +32,21 @@ int SplitNPUOp::Init(const schema::Primitive *primitive, const std::vector<minds
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
axis_ = static_cast<int>(split_prim->axis());
|
||||||
|
auto split_dim = in_tensors.at(0).Shape().at(axis_);
|
||||||
auto sizes_split = split_prim->size_splits();
|
auto sizes_split = split_prim->size_splits();
|
||||||
std::vector<int> sizes_split_vec;
|
|
||||||
if (sizes_split != nullptr) {
|
|
||||||
sizes_split_vec = std::vector<int>(sizes_split->begin(), sizes_split->end());
|
|
||||||
} else {
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
int size = split_prim->output_num();
|
int size = split_prim->output_num();
|
||||||
|
std::vector<int> sizes_split_vec;
|
||||||
|
CHECK_NULL_RETURN(sizes_split);
|
||||||
|
for (int i = 0; i < size; ++i) {
|
||||||
|
auto cur_size = sizes_split->Get(i);
|
||||||
|
if (i == size - 1 && cur_size == -1) {
|
||||||
|
sizes_split_vec.emplace_back(split_dim);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
split_dim -= cur_size;
|
||||||
|
sizes_split_vec.emplace_back(cur_size);
|
||||||
|
}
|
||||||
ge::TensorDesc size_splits_tensor_desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32);
|
ge::TensorDesc size_splits_tensor_desc(ge::Shape({size}), ge::FORMAT_NCHW, ge::DT_INT32);
|
||||||
ge::TensorPtr size_splits_tensor = std::make_shared<hiai::Tensor>(size_splits_tensor_desc);
|
ge::TensorPtr size_splits_tensor = std::make_shared<hiai::Tensor>(size_splits_tensor_desc);
|
||||||
size_splits_tensor->SetData(reinterpret_cast<uint8_t *>(sizes_split_vec.data()), size * sizeof(int));
|
size_splits_tensor->SetData(reinterpret_cast<uint8_t *>(sizes_split_vec.data()), size * sizeof(int));
|
||||||
|
@ -50,8 +57,6 @@ int SplitNPUOp::Init(const schema::Primitive *primitive, const std::vector<minds
|
||||||
}
|
}
|
||||||
size_splits_->set_attr_value(size_splits_tensor);
|
size_splits_->set_attr_value(size_splits_tensor);
|
||||||
split_->set_input_size_splits(*size_splits_);
|
split_->set_input_size_splits(*size_splits_);
|
||||||
|
|
||||||
axis_ = static_cast<int>(split_prim->axis());
|
|
||||||
split_->set_attr_num_split(size);
|
split_->set_attr_num_split(size);
|
||||||
split_->create_dynamic_output_y(size);
|
split_->create_dynamic_output_y(size);
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
|
|
|
@ -22,10 +22,6 @@
|
||||||
using mindspore::lite::RET_ERROR;
|
using mindspore::lite::RET_ERROR;
|
||||||
using mindspore::lite::RET_OK;
|
using mindspore::lite::RET_OK;
|
||||||
|
|
||||||
namespace {
|
|
||||||
constexpr int kNumDims = 4;
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
bool CheckFusion(NPUOp *cur_op, const std::vector<mindspore::MSTensor> &graph_outputs) {
|
bool CheckFusion(NPUOp *cur_op, const std::vector<mindspore::MSTensor> &graph_outputs) {
|
||||||
if (cur_op->in_ops().empty() || cur_op->out_ops().empty()) {
|
if (cur_op->in_ops().empty() || cur_op->out_ops().empty()) {
|
||||||
|
@ -77,32 +73,32 @@ void NPUFusionPass::RemoveAndFreeOp(NPUOp *cur_op) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int NPUFusionPass::UpdatePreOps(NPUOp *cur_op) {
|
int NPUFusionPass::UpdatePreOps(NPUOp *cur_op) {
|
||||||
|
auto cur_in_ops = cur_op->in_ops();
|
||||||
for (auto in_op : cur_op->in_ops()) {
|
for (auto in_op : cur_op->in_ops()) {
|
||||||
// graph in op
|
// graph in op
|
||||||
if (in_op->in_ops().empty()) {
|
if (in_op->in_ops().empty()) {
|
||||||
continue;
|
cur_in_ops.erase(find(cur_in_ops.begin(), cur_in_ops.end(), in_op));
|
||||||
}
|
} else {
|
||||||
auto pre_op = in_op->in_ops()[0];
|
auto pre_op = in_op->in_ops()[0];
|
||||||
|
auto pre_out_ops = pre_op->out_ops();
|
||||||
|
for (size_t i = 0; i < pre_out_ops.size(); i++) {
|
||||||
|
if (pre_out_ops[i] == in_op) {
|
||||||
|
pre_out_ops[i] = cur_op;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pre_op->set_out_ops(pre_out_ops);
|
||||||
|
|
||||||
auto pre_out_ops = pre_op->out_ops();
|
for (size_t i = 0; i < cur_in_ops.size(); i++) {
|
||||||
for (size_t i = 0; i < pre_out_ops.size(); i++) {
|
if (cur_in_ops[i] == in_op) {
|
||||||
if (pre_out_ops[i] == in_op) {
|
cur_in_ops[i] = pre_op;
|
||||||
pre_out_ops[i] = cur_op;
|
break;
|
||||||
break;
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pre_op->set_out_ops(pre_out_ops);
|
|
||||||
|
|
||||||
auto cur_in_ops = cur_op->in_ops();
|
|
||||||
for (size_t i = 0; i < cur_in_ops.size(); i++) {
|
|
||||||
if (cur_in_ops[i] == in_op) {
|
|
||||||
cur_in_ops[i] = pre_op;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
cur_op->set_in_ops(cur_in_ops);
|
|
||||||
RemoveAndFreeOp(in_op);
|
RemoveAndFreeOp(in_op);
|
||||||
}
|
}
|
||||||
|
cur_op->set_in_ops(cur_in_ops);
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -139,19 +135,26 @@ int NPUFusionPass::UpdatePostOps(NPUOp *cur_op) {
|
||||||
int UpdatePreTensors(NPUOp *cur_op) {
|
int UpdatePreTensors(NPUOp *cur_op) {
|
||||||
auto tensors_vec = NPUPassUtils::GetNonConstInputs(cur_op);
|
auto tensors_vec = NPUPassUtils::GetNonConstInputs(cur_op);
|
||||||
for (auto in_op : cur_op->in_ops()) {
|
for (auto in_op : cur_op->in_ops()) {
|
||||||
if (in_op->inputs().empty() || in_op->outputs().empty() || in_op->in_ops().empty()) {
|
if (in_op->inputs().empty() || in_op->outputs().empty()) {
|
||||||
MS_LOG(ERROR) << "in_tensors/out_tensors/in_ops is empty.";
|
MS_LOG(ERROR) << "in_tensors or out_tensors of input op is empty.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
mindspore::MSTensor cur_tensor;
|
mindspore::MSTensor cur_tensor;
|
||||||
auto in_tensor = in_op->inputs()[0];
|
auto in_tensor = in_op->inputs()[0];
|
||||||
auto out_tensor = in_op->outputs()[0];
|
auto out_tensor = in_op->outputs()[0];
|
||||||
auto pre_op = in_op->in_ops()[0];
|
if (!in_op->in_ops().empty()) {
|
||||||
for (size_t i = 0; i < pre_op->outputs().size(); i++) {
|
auto pre_op = in_op->in_ops()[0];
|
||||||
if (pre_op->outputs()[i] == in_tensor) {
|
for (size_t i = 0; i < pre_op->outputs().size(); i++) {
|
||||||
cur_tensor = pre_op->outputs()[i];
|
if (pre_op->outputs()[i] == in_tensor) {
|
||||||
|
cur_tensor = pre_op->outputs()[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// graph input
|
||||||
|
cur_tensor = in_tensor;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < tensors_vec.size(); i++) {
|
for (size_t i = 0; i < tensors_vec.size(); i++) {
|
||||||
if (tensors_vec[i] == out_tensor) {
|
if (tensors_vec[i] == out_tensor) {
|
||||||
tensors_vec[i] = cur_tensor;
|
tensors_vec[i] = cur_tensor;
|
||||||
|
@ -173,56 +176,47 @@ int UpdatePreTensors(NPUOp *cur_op) {
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool NodeWithNhwc2nchw2nhwcOutput(NPUOp *cur_op) {
|
|
||||||
auto out_ops = cur_op->out_ops();
|
|
||||||
if (out_ops.empty()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
bool all_out_ops_transpose = std::all_of(out_ops.begin(), out_ops.end(), [](NPUOp *op) {
|
|
||||||
return op->type() == schema::PrimitiveType_Transpose && op->out_ops().size() == 1 &&
|
|
||||||
op->out_ops()[0]->type() == schema::PrimitiveType_Transpose && op->out_ops()[0]->out_ops().empty();
|
|
||||||
});
|
|
||||||
return all_out_ops_transpose;
|
|
||||||
}
|
|
||||||
|
|
||||||
int UpdatePostTensors(NPUOp *cur_op) {
|
int UpdatePostTensors(NPUOp *cur_op) {
|
||||||
auto tensor = cur_op->outputs()[0];
|
mindspore::MSTensor new_post_input;
|
||||||
|
|
||||||
// in case: node->nh2nc->nc2nh(graph output) --->>> node->nc2nh, node out_tensor should be put to nc2nh out tensors
|
|
||||||
auto out_ops = cur_op->out_ops();
|
|
||||||
if (NodeWithNhwc2nchw2nhwcOutput(cur_op)) {
|
|
||||||
std::vector<MSTensor> outputs;
|
|
||||||
for (auto i = 0; i < out_ops.size(); ++i) {
|
|
||||||
auto ori_out_tensor = cur_op->outputs()[i];
|
|
||||||
auto nc_tensor = out_ops[i]->outputs()[0];
|
|
||||||
outputs.push_back(nc_tensor);
|
|
||||||
auto post_post_op = out_ops[i]->out_ops()[0];
|
|
||||||
post_post_op->set_inputs({nc_tensor});
|
|
||||||
post_post_op->set_outputs({ori_out_tensor});
|
|
||||||
}
|
|
||||||
cur_op->set_outputs(outputs);
|
|
||||||
return RET_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto nhwc_shape = tensor.Shape();
|
|
||||||
if (nhwc_shape.size() < kNumDims) {
|
|
||||||
MS_LOG(ERROR) << "nhwc_shape < " << kNumDims;
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
tensor.SetShape({nhwc_shape[NHWC_N], nhwc_shape[NHWC_C], nhwc_shape[NHWC_H], nhwc_shape[NHWC_W]});
|
|
||||||
for (auto out_op : cur_op->out_ops()) {
|
for (auto out_op : cur_op->out_ops()) {
|
||||||
|
auto in_tensor = out_op->inputs()[0];
|
||||||
auto out_tensor = out_op->outputs()[0];
|
auto out_tensor = out_op->outputs()[0];
|
||||||
if (out_op->out_ops().empty()) {
|
auto nhwc_shape = in_tensor.Shape();
|
||||||
cur_op->set_outputs({out_op->outputs()[0]});
|
if (in_tensor.format() == Format::NHWC) {
|
||||||
|
MS_CHECK_TRUE_MSG(nhwc_shape.size() == NPU_SHAPE_SIZE, RET_ERROR, "Invalid transpose dim size!");
|
||||||
|
in_tensor.SetShape({nhwc_shape[NHWC_N], nhwc_shape[NHWC_C], nhwc_shape[NHWC_H], nhwc_shape[NHWC_W]});
|
||||||
|
in_tensor.SetFormat(Format::NCHW);
|
||||||
}
|
}
|
||||||
for (auto post_op : out_op->out_ops()) {
|
// out_op is a graph output op
|
||||||
auto tensors_vec = post_op->inputs();
|
if (out_op->out_ops().empty()) {
|
||||||
for (int i = 0; i < tensors_vec.size(); i++) {
|
auto out_tensors_vec = cur_op->outputs();
|
||||||
if (tensors_vec[i] == out_tensor) {
|
for (size_t i = 0; i < out_tensors_vec.size(); i++) {
|
||||||
tensors_vec[i] = tensor;
|
if (out_tensors_vec[i] == in_tensor) {
|
||||||
|
out_tensors_vec[i] = out_op->outputs()[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
post_op->set_inputs(tensors_vec);
|
cur_op->set_outputs(out_tensors_vec);
|
||||||
|
// exist other out_ops using the same tensor as the current out_op, note that the other out_op has likely been
|
||||||
|
// updated, which mean it may be not a Transpose op anymore.
|
||||||
|
for (auto other_out_op : cur_op->out_ops()) {
|
||||||
|
auto other_in_tensors_vec = other_out_op->inputs();
|
||||||
|
for (size_t i = 0; i < other_in_tensors_vec.size(); i++) {
|
||||||
|
if (other_in_tensors_vec[i] == in_tensor) {
|
||||||
|
other_in_tensors_vec[i] = out_op->outputs()[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
other_out_op->set_inputs(other_in_tensors_vec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// out_op is not a graph out op
|
||||||
|
for (auto post_op : out_op->out_ops()) {
|
||||||
|
auto in_tensors_vec = post_op->inputs();
|
||||||
|
for (size_t i = 0; i < in_tensors_vec.size(); i++) {
|
||||||
|
if (in_tensors_vec[i] == out_tensor) {
|
||||||
|
in_tensors_vec[i] = in_tensor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
post_op->set_inputs(in_tensors_vec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
|
|
|
@ -1,69 +0,0 @@
|
||||||
/**
|
|
||||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
#include "src/delegate/npu/pass/npu_infer_format_pass.h"
|
|
||||||
#include <vector>
|
|
||||||
#include <queue>
|
|
||||||
#include <map>
|
|
||||||
#include "src/delegate/npu/pass/npu_pass_utils.h"
|
|
||||||
#include "src/delegate/npu/npu_converter_utils.h"
|
|
||||||
#include "src/tensor.h"
|
|
||||||
#include "src/cxx_api/tensor/tensor_impl.h"
|
|
||||||
|
|
||||||
using mindspore::lite::RET_ERROR;
|
|
||||||
using mindspore::lite::RET_OK;
|
|
||||||
|
|
||||||
namespace mindspore {
|
|
||||||
int NPUInferFormatPass::Run(NPUGraph *subgraph) {
|
|
||||||
CHECK_NULL_RETURN(subgraph);
|
|
||||||
all_ops_ = subgraph->GetOps();
|
|
||||||
all_tensors_ = subgraph->GetInsertTensors();
|
|
||||||
auto graph_inputs = subgraph->inputs();
|
|
||||||
std::queue<NPUOp *> infer_ops;
|
|
||||||
std::map<tensor::MSTensor *, bool> is_inferred;
|
|
||||||
// initialization
|
|
||||||
for (auto op : *all_ops_) {
|
|
||||||
infer_ops.push(op);
|
|
||||||
}
|
|
||||||
for (auto tensor : *all_tensors_) {
|
|
||||||
is_inferred[tensor->impl()->lite_tensor()] = false;
|
|
||||||
}
|
|
||||||
for (auto input_tensor : graph_inputs) {
|
|
||||||
is_inferred[input_tensor.impl()->lite_tensor()] = true;
|
|
||||||
}
|
|
||||||
while (!infer_ops.empty()) {
|
|
||||||
auto cur_op = infer_ops.front();
|
|
||||||
infer_ops.pop();
|
|
||||||
bool input_inferred = std::all_of(cur_op->inputs().begin(), cur_op->inputs().end(), [&](auto in_tensor) {
|
|
||||||
return is_inferred[in_tensor.impl()->lite_tensor()] == true || in_tensor.IsConst();
|
|
||||||
});
|
|
||||||
if (input_inferred) {
|
|
||||||
auto dst_format = cur_op->inputs().at(0).format();
|
|
||||||
if (NPUPassUtils::IsNhwc2Nchw(cur_op) && dst_format == Format::NHWC) {
|
|
||||||
dst_format = Format::NCHW;
|
|
||||||
} else if (NPUPassUtils::IsNchw2Nhwc(cur_op) && dst_format == Format::NCHW) {
|
|
||||||
dst_format = Format::NHWC;
|
|
||||||
}
|
|
||||||
for (auto &out_tensor : cur_op->outputs()) {
|
|
||||||
const_cast<mindspore::MSTensor &>(out_tensor).SetFormat(dst_format);
|
|
||||||
is_inferred[out_tensor.impl()->lite_tensor()] = true;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
infer_ops.push(cur_op);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return RET_OK;
|
|
||||||
}
|
|
||||||
} // namespace mindspore
|
|
|
@ -1,38 +0,0 @@
|
||||||
/**
|
|
||||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_PASS_NPU_INFER_FORMAT_PASS_H_
|
|
||||||
#define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_PASS_NPU_INFER_FORMAT_PASS_H_
|
|
||||||
|
|
||||||
#include <set>
|
|
||||||
#include <vector>
|
|
||||||
#include "src/delegate/npu/op/npu_op.h"
|
|
||||||
#include "src/delegate/npu/pass/npu_base_pass.h"
|
|
||||||
#include "src/common/log_util.h"
|
|
||||||
|
|
||||||
namespace mindspore {
|
|
||||||
class NPUInferFormatPass : public NPUBasePass {
|
|
||||||
public:
|
|
||||||
NPUInferFormatPass() { name_ = "NPUInferFormatPass"; }
|
|
||||||
|
|
||||||
int Run(NPUGraph *subgraph) override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::vector<NPUOp *> *all_ops_;
|
|
||||||
std::vector<mindspore::MSTensor *> *all_tensors_;
|
|
||||||
};
|
|
||||||
} // namespace mindspore
|
|
||||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_PASS_NPU_INFER_FORMAT_PASS_H_
|
|
|
@ -1,5 +1,5 @@
|
||||||
/**
|
/**
|
||||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
* you may not use this file except in compliance with the License.
|
* you may not use this file except in compliance with the License.
|
||||||
|
@ -18,6 +18,7 @@
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "src/delegate/npu/pass/npu_pass_utils.h"
|
#include "src/delegate/npu/pass/npu_pass_utils.h"
|
||||||
|
#include "src/delegate/npu/npu_converter_utils.h"
|
||||||
|
|
||||||
using mindspore::lite::RET_ERROR;
|
using mindspore::lite::RET_ERROR;
|
||||||
using mindspore::lite::RET_OK;
|
using mindspore::lite::RET_OK;
|
||||||
|
@ -52,9 +53,9 @@ int NPUInsertTransformPass::GetInsertState(NPUOp *op) {
|
||||||
if (insert_nodes.find(op->type()) == insert_nodes.end()) {
|
if (insert_nodes.find(op->type()) == insert_nodes.end()) {
|
||||||
return InsertNone;
|
return InsertNone;
|
||||||
}
|
}
|
||||||
|
|
||||||
// current op is target op
|
// current op is target op
|
||||||
// use out ops to count how many out lines from current op
|
// Use out ops to count the out lines from current op since a single tensor can be used by multiple out ops. Besides,
|
||||||
|
// a tensor can be used by out ops and graph output at the same time, there will be one more line in this case.
|
||||||
std::vector<mindspore::MSTensor> inputs = NPUPassUtils::GetNonConstInputs(op);
|
std::vector<mindspore::MSTensor> inputs = NPUPassUtils::GetNonConstInputs(op);
|
||||||
size_t in_out_tensor_num =
|
size_t in_out_tensor_num =
|
||||||
inputs.size() + std::max(std::max(op->out_ops().size(), static_cast<size_t>(1)), op->outputs().size());
|
inputs.size() + std::max(std::max(op->out_ops().size(), static_cast<size_t>(1)), op->outputs().size());
|
||||||
|
@ -76,13 +77,19 @@ int NPUInsertTransformPass::GetInsertState(NPUOp *op) {
|
||||||
graph_input_num++;
|
graph_input_num++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (op->out_ops().empty()) {
|
auto graph_output = subgraph_->outputs();
|
||||||
need_post_insert = true;
|
for (auto output : op->outputs()) {
|
||||||
}
|
if (std::find(graph_output.begin(), graph_output.end(), output) != graph_output.end()) {
|
||||||
if (op->outputs().size() > op->out_ops().size()) {
|
graph_output_num++;
|
||||||
graph_output_num = op->outputs().size() - op->out_ops().size();
|
need_post_insert = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for (const auto out_op : op->out_ops()) {
|
for (const auto out_op : op->out_ops()) {
|
||||||
|
for (auto out_op_input : out_op->inputs()) {
|
||||||
|
if (std::find(graph_output.begin(), graph_output.end(), out_op_input) != graph_output.end()) {
|
||||||
|
in_out_tensor_num++;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (NPUPassUtils::IsNhwc2Nchw(out_op)) {
|
if (NPUPassUtils::IsNhwc2Nchw(out_op)) {
|
||||||
transpose_output_num++;
|
transpose_output_num++;
|
||||||
} else {
|
} else {
|
||||||
|
@ -99,105 +106,81 @@ int NPUInsertTransformPass::GetInsertState(NPUOp *op) {
|
||||||
transpose_tensor_num == in_out_tensor_num) {
|
transpose_tensor_num == in_out_tensor_num) {
|
||||||
return InsertNone;
|
return InsertNone;
|
||||||
}
|
}
|
||||||
InsertState ret =
|
InsertState ret = (need_pre_insert && need_post_insert)
|
||||||
(need_pre_insert && need_post_insert)
|
? BothInsert
|
||||||
? BothInsert
|
: (need_pre_insert ? PreInsert : (need_post_insert ? PostInsert : InsertNone));
|
||||||
: ((need_pre_insert && !need_post_insert) ? PreInsert
|
|
||||||
: ((!need_pre_insert && need_post_insert) ? PostInsert : InsertNone));
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int NPUInsertTransformPass::InsertNode(NPUOp *op, NPUOp *post_op, size_t post_input_index,
|
int NPUInsertTransformPass::InsertTransNode(NPUOp *op, NPUOp *post_op, const mindspore::MSTensor &trans_in_tensor,
|
||||||
std::vector<NPUOp *> *trans_ops) {
|
std::vector<NPUOp *> *trans_ops) {
|
||||||
// Op and post_op can't be nullptr at the same time.
|
MS_ASSERT(op != nullptr || post_op != nullptr);
|
||||||
std::string op_name;
|
std::string op_name;
|
||||||
std::vector<mindspore::MSTensor> in_tensors;
|
|
||||||
std::vector<NPUOp *> out_ops;
|
|
||||||
// If post_op equals nullptr, op is the output of whole graph.
|
|
||||||
if (post_op != nullptr) {
|
|
||||||
out_ops.push_back(post_op);
|
|
||||||
op_name = post_op->name() + "_pre";
|
|
||||||
in_tensors.push_back(post_op->inputs().at(post_input_index));
|
|
||||||
}
|
|
||||||
std::vector<NPUOp *> in_ops;
|
std::vector<NPUOp *> in_ops;
|
||||||
// If op equals nullptr, post_op is the input of whole graph.
|
std::vector<NPUOp *> out_ops;
|
||||||
if (op != nullptr && !op->outputs().empty()) {
|
if (op != nullptr) {
|
||||||
in_ops.push_back(op);
|
|
||||||
op_name = op->name() + "_post";
|
op_name = op->name() + "_post";
|
||||||
in_tensors.resize(op->outputs().size());
|
in_ops.emplace_back(op);
|
||||||
std::copy(op->outputs().begin(), op->outputs().end(), in_tensors.begin());
|
|
||||||
}
|
}
|
||||||
for (auto i = 0; i < in_tensors.size(); ++i) {
|
if (post_op != nullptr) {
|
||||||
auto in_tensor = in_tensors[i];
|
op_name = post_op->name() + "_pre";
|
||||||
auto nhwc_shape = in_tensor.Shape();
|
out_ops.emplace_back(post_op);
|
||||||
if (nhwc_shape.size() == 0) {
|
}
|
||||||
continue;
|
auto nhwc_shape = trans_in_tensor.Shape();
|
||||||
} else if (nhwc_shape.size() < 4) {
|
std::vector<int64_t> nchw_shape = {nhwc_shape[NHWC_N], nhwc_shape[NHWC_C], nhwc_shape[NHWC_H], nhwc_shape[NHWC_W]};
|
||||||
MS_LOG(ERROR) << "nhwc_shape size < " << 4;
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
std::vector<int64_t> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]};
|
|
||||||
|
|
||||||
auto nh2nc_name = op_name + "_nh2nc_" + std::to_string(total++);
|
auto nh2nc_name = op_name + "_nh2nc_" + std::to_string(total++);
|
||||||
auto nh2nc_tensor =
|
auto nh2nc_tensor =
|
||||||
mindspore::MSTensor::CreateTensor(nh2nc_name + "/output0", in_tensor.DataType(), nchw_shape, nullptr, 0);
|
mindspore::MSTensor::CreateTensor(nh2nc_name + "/output0", trans_in_tensor.DataType(), nchw_shape, nullptr, 0);
|
||||||
if (nh2nc_tensor == nullptr) {
|
if (nh2nc_tensor == nullptr) {
|
||||||
MS_LOG(ERROR) << "New nchw tensor failed when inserting nchw2nhwc op.";
|
MS_LOG(ERROR) << "New nchw tensor failed when inserting nchw2nhwc op.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
nh2nc_tensor->SetTensorName(nh2nc_name + "/output0");
|
nh2nc_tensor->SetFormat(Format::NCHW);
|
||||||
std::vector<mindspore::MSTensor> nh2nc_tensors = {*nh2nc_tensor};
|
std::vector<mindspore::MSTensor> nh2nc_tensors = {*nh2nc_tensor};
|
||||||
all_tensors_->push_back(nh2nc_tensor);
|
all_tensors_->push_back(nh2nc_tensor);
|
||||||
|
|
||||||
auto nc2nh_name = op_name + "_nc2nh_" + std::to_string(total++);
|
auto nc2nh_name = op_name + "_nc2nh_" + std::to_string(total++);
|
||||||
auto nc2nh_tensor =
|
auto nc2nh_tensor =
|
||||||
mindspore::MSTensor::CreateTensor(nc2nh_name + "/output0", in_tensor.DataType(), nhwc_shape, nullptr, 0);
|
mindspore::MSTensor::CreateTensor(nc2nh_name + "/output0", trans_in_tensor.DataType(), nhwc_shape, nullptr, 0);
|
||||||
if (nc2nh_tensor == nullptr) {
|
if (nc2nh_tensor == nullptr) {
|
||||||
MS_LOG(ERROR) << "New nhwc tensor failed when inserting nhwc2nchw op.";
|
MS_LOG(ERROR) << "New nhwc tensor failed when inserting nhwc2nchw op.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
std::vector<mindspore::MSTensor> nc2nh_tensors = {*nc2nh_tensor};
|
nc2nh_tensor->SetFormat(Format::NHWC);
|
||||||
all_tensors_->push_back(nc2nh_tensor);
|
std::vector<mindspore::MSTensor> nc2nh_tensors = {*nc2nh_tensor};
|
||||||
|
all_tensors_->push_back(nc2nh_tensor);
|
||||||
|
|
||||||
auto *nh2nc_op = NPUPassUtils::CreateNhwc2NchwOp({in_tensor}, nh2nc_tensors, nh2nc_name);
|
auto *nh2nc_op = NPUPassUtils::CreateNhwc2NchwOp({trans_in_tensor}, nh2nc_tensors, nh2nc_name);
|
||||||
trans_ops->push_back(nh2nc_op);
|
trans_ops->push_back(nh2nc_op);
|
||||||
|
|
||||||
auto *nc2nh_op = NPUPassUtils::CreateNchw2NhwcOp(nh2nc_tensors, nc2nh_tensors, nc2nh_name);
|
auto *nc2nh_op = NPUPassUtils::CreateNchw2NhwcOp(nh2nc_tensors, nc2nh_tensors, nc2nh_name);
|
||||||
trans_ops->push_back(nc2nh_op);
|
trans_ops->push_back(nc2nh_op);
|
||||||
|
|
||||||
NPUPassUtils::UpdateOp(nh2nc_op, in_ops, {nc2nh_op}, {in_tensor}, nh2nc_tensors);
|
NPUPassUtils::UpdateOp(nh2nc_op, in_ops, {nc2nh_op}, {trans_in_tensor}, nh2nc_tensors);
|
||||||
NPUPassUtils::UpdateOp(nc2nh_op, {nh2nc_op}, out_ops, {nh2nc_tensors[0]}, nc2nh_tensors);
|
NPUPassUtils::UpdateOp(nc2nh_op, {nh2nc_op}, out_ops, {nh2nc_tensors[0]}, nc2nh_tensors);
|
||||||
if (op != nullptr) {
|
if (op != nullptr) {
|
||||||
NPUPassUtils::UpdateNH2NCTransNodePreOp(op, nh2nc_op, post_op);
|
NPUPassUtils::UpdateNH2NCTransNodePreOp(op, nh2nc_op, post_op);
|
||||||
}
|
}
|
||||||
if (post_op != nullptr) {
|
if (post_op != nullptr) {
|
||||||
NPUPassUtils::UpdateNC2NHTransNodePostOp(op, nc2nh_op, post_op);
|
NPUPassUtils::UpdateNC2NHTransNodePostOp(op, nc2nh_op, post_op);
|
||||||
} else {
|
} else {
|
||||||
// post_op nullptr mean output, we remain graph output tensor name unchanged
|
// post_op nullptr mean output, we remain graph output tensor name unchanged
|
||||||
auto graph_output_name = in_tensor.Name();
|
auto graph_output_name = trans_in_tensor.Name();
|
||||||
nc2nh_tensor->SetTensorName(graph_output_name + "_after_" + name_);
|
nc2nh_tensor->SetTensorName(graph_output_name + "_after_" + name_);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int NPUInsertTransformPass::InsertForInputTensor(NPUOp *op, size_t in_tensor_index, NPUOp *pre_op,
|
|
||||||
std::vector<NPUOp *> *trans_ops) {
|
|
||||||
// insert transpose nodes before target ops
|
|
||||||
return InsertNode(pre_op, op, in_tensor_index, trans_ops);
|
|
||||||
}
|
|
||||||
|
|
||||||
int NPUInsertTransformPass::InsertForOutputTensor(NPUOp *op, NPUOp *post_op, size_t post_in_tensor_index,
|
|
||||||
std::vector<NPUOp *> *trans_ops) {
|
|
||||||
// insert transpose nodes after target ops
|
|
||||||
return InsertNode(op, post_op, post_in_tensor_index, trans_ops);
|
|
||||||
}
|
|
||||||
|
|
||||||
int NPUInsertTransformPass::InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops) {
|
int NPUInsertTransformPass::InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops) {
|
||||||
int ret = RET_OK;
|
int ret = RET_OK;
|
||||||
auto inputs = NPUPassUtils::GetNonConstInputs(op);
|
auto inputs = NPUPassUtils::GetNonConstInputs(op);
|
||||||
for (auto tensor : inputs) {
|
for (auto tensor : inputs) {
|
||||||
|
if (tensor.Shape().size() < NPU_SHAPE_SIZE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// the input tensor can only come from a single op
|
||||||
auto pre_op = NPUPassUtils::OpInputFromOp(op, tensor);
|
auto pre_op = NPUPassUtils::OpInputFromOp(op, tensor);
|
||||||
if (NPUPassUtils::IsNchw2Nhwc(pre_op)) {
|
if (NPUPassUtils::IsNchw2Nhwc(pre_op)) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -209,7 +192,7 @@ int NPUInsertTransformPass::InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *tran
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
size_t index = it - op->inputs().begin();
|
size_t index = it - op->inputs().begin();
|
||||||
ret = InsertForInputTensor(op, index, pre_op, trans_ops);
|
ret = InsertTransNode(pre_op, op, op->inputs().at(index), trans_ops);
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "Insert nhwc2nchw op and nchw2nhwc op before op " << op->name() << " failed.";
|
MS_LOG(ERROR) << "Insert nhwc2nchw op and nchw2nhwc op before op " << op->name() << " failed.";
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -220,38 +203,58 @@ int NPUInsertTransformPass::InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *tran
|
||||||
|
|
||||||
int NPUInsertTransformPass::InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops) {
|
int NPUInsertTransformPass::InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops) {
|
||||||
int ret = RET_OK;
|
int ret = RET_OK;
|
||||||
|
for (size_t idx = 0; idx < op->outputs().size(); idx++) {
|
||||||
for (const auto post_op : op->out_ops()) {
|
auto out_tensor = op->outputs().at(idx);
|
||||||
if (NPUPassUtils::IsNhwc2Nchw(post_op)) {
|
if (out_tensor.Shape().size() < NPU_SHAPE_SIZE) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto post_op_in_tensors = post_op->inputs();
|
if (std::find(subgraph_->outputs().begin(), subgraph_->outputs().end(), out_tensor) != subgraph_->outputs().end()) {
|
||||||
// op's out tensor is one of post_op's input tensor
|
// the case that op's out tensor is graph output
|
||||||
auto it = std::find(post_op_in_tensors.begin(), post_op_in_tensors.end(), op->outputs().at(0));
|
ret = InsertTransNode(op, nullptr, op->outputs().at(idx), trans_ops);
|
||||||
if (it == post_op_in_tensors.end()) {
|
if (ret != RET_OK) {
|
||||||
return RET_ERROR;
|
MS_LOG(ERROR) << "Insert nhwc2nchw op and nchw2nhwc op after op " << op->name() << " failed.";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
// use origin output as the last trans op's output in order to avoid the lost of the output tensor after transpose
|
||||||
|
// fusion. The input of the cur_op's out_op will be updated in the loop below.
|
||||||
|
auto last_trans = trans_ops->back();
|
||||||
|
auto trans_output = last_trans->outputs();
|
||||||
|
auto cur_outputs = op->outputs();
|
||||||
|
cur_outputs[idx] = last_trans->outputs()[0];
|
||||||
|
trans_output[0] = op->outputs()[idx];
|
||||||
|
last_trans->set_outputs(trans_output);
|
||||||
|
op->set_outputs(cur_outputs);
|
||||||
}
|
}
|
||||||
size_t input_index = it - post_op_in_tensors.begin();
|
|
||||||
ret = InsertForOutputTensor(op, post_op, input_index, trans_ops);
|
// besides of being as graph outputs, the output tensors also can connected with multiple ops.
|
||||||
if (ret != RET_OK) {
|
for (auto post_op : op->out_ops()) {
|
||||||
MS_LOG(ERROR) << "Insert nhwc2nchw op and nchw2nhwc op after op " << op->name() << " failed.";
|
auto post_op_input = post_op->inputs();
|
||||||
return ret;
|
auto it = std::find(post_op_input.begin(), post_op_input.end(), out_tensor);
|
||||||
}
|
if (it == post_op_input.end()) {
|
||||||
}
|
continue;
|
||||||
if (op->outputs().size() > op->out_ops().size()) {
|
}
|
||||||
// op out is graph output
|
auto related_idx = it - post_op_input.begin();
|
||||||
ret = InsertForOutputTensor(op, nullptr, 0, trans_ops);
|
post_op_input[related_idx] = op->outputs().at(idx);
|
||||||
if (ret != RET_OK) {
|
post_op->set_inputs(post_op_input);
|
||||||
MS_LOG(ERROR) << "Insert nhwc2nchw op and nchw2nhwc op after op " << op->name() << " failed.";
|
|
||||||
return ret;
|
if (NPUPassUtils::IsNhwc2Nchw(post_op)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// the case that op's out tensor is one of post_op's input tensor
|
||||||
|
ret = InsertTransNode(op, post_op, op->outputs().at(idx), trans_ops);
|
||||||
|
if (ret != RET_OK) {
|
||||||
|
MS_LOG(ERROR) << "Insert nhwc2nchw op and nchw2nhwc op after op " << op->name() << " failed.";
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int NPUInsertTransformPass::Run(NPUGraph *subgraph) {
|
int NPUInsertTransformPass::Run(NPUGraph *subgraph) {
|
||||||
all_ops_ = subgraph->GetOps();
|
subgraph_ = subgraph;
|
||||||
all_tensors_ = subgraph->GetInsertTensors();
|
all_ops_ = subgraph_->GetOps();
|
||||||
|
all_tensors_ = subgraph_->GetInsertTensors();
|
||||||
std::vector<NPUOp *> insert_ops;
|
std::vector<NPUOp *> insert_ops;
|
||||||
for (int j = 0; j < 2; ++j) {
|
for (int j = 0; j < 2; ++j) {
|
||||||
for (size_t i = 0; i < all_ops_->size(); i++) {
|
for (size_t i = 0; i < all_ops_->size(); i++) {
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/**
|
/**
|
||||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
* you may not use this file except in compliance with the License.
|
* you may not use this file except in compliance with the License.
|
||||||
|
@ -30,14 +30,14 @@ class NPUInsertTransformPass : public NPUBasePass {
|
||||||
int GetInsertState(NPUOp *op);
|
int GetInsertState(NPUOp *op);
|
||||||
int InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops);
|
int InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops);
|
||||||
int InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops);
|
int InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops);
|
||||||
int InsertNode(NPUOp *op, NPUOp *post_op, size_t post_input_index, std::vector<NPUOp *> *trans_ops);
|
int InsertTransNode(NPUOp *op, NPUOp *post_op, const mindspore::MSTensor &trans_in_tensor,
|
||||||
int InsertForInputTensor(NPUOp *op, size_t in_tensor_index, NPUOp *pre_op, std::vector<NPUOp *> *trans_ops);
|
std::vector<NPUOp *> *trans_ops);
|
||||||
int InsertForOutputTensor(NPUOp *op, NPUOp *post_op, size_t post_in_tensor_index, std::vector<NPUOp *> *trans_ops);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int total = 0;
|
int total = 0;
|
||||||
std::vector<NPUOp *> *all_ops_;
|
NPUGraph *subgraph_ = nullptr;
|
||||||
std::vector<mindspore::MSTensor *> *all_tensors_;
|
std::vector<NPUOp *> *all_ops_ = nullptr;
|
||||||
|
std::vector<mindspore::MSTensor *> *all_tensors_ = nullptr;
|
||||||
};
|
};
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_PASS_NPU_INSERT_TRANSFORM_PASS_H_
|
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_PASS_NPU_INSERT_TRANSFORM_PASS_H_
|
||||||
|
|
|
@ -60,16 +60,16 @@ void NPUPassUtils::UpdateOp(NPUOp *op, const std::vector<NPUOp *> &in_ops, const
|
||||||
void NPUPassUtils::UpdateNH2NCTransNodePreOp(NPUOp *pre_op, NPUOp *trans_op, NPUOp *op) {
|
void NPUPassUtils::UpdateNH2NCTransNodePreOp(NPUOp *pre_op, NPUOp *trans_op, NPUOp *op) {
|
||||||
// For op before trans, update the out_ops; the output tensor of op is the input tensor of trans.
|
// For op before trans, update the out_ops; the output tensor of op is the input tensor of trans.
|
||||||
std::vector<NPUOp *> out_ops = pre_op->out_ops();
|
std::vector<NPUOp *> out_ops = pre_op->out_ops();
|
||||||
size_t i = 0;
|
if (op == nullptr) {
|
||||||
for (; i < out_ops.size(); i++) {
|
out_ops.emplace_back(trans_op);
|
||||||
if (out_ops[i] == op) {
|
} else {
|
||||||
out_ops[i] = trans_op;
|
for (size_t i = 0; i < out_ops.size(); i++) {
|
||||||
break;
|
if (out_ops[i] == op) {
|
||||||
|
out_ops[i] = trans_op;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (i == out_ops.size()) {
|
|
||||||
out_ops.push_back(trans_op);
|
|
||||||
}
|
|
||||||
pre_op->set_out_ops(out_ops);
|
pre_op->set_out_ops(out_ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -177,8 +177,8 @@ NPUOp *NPUPassUtils::OpInputFromOp(NPUOp *op, mindspore::MSTensor in_tensor) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto in_ops = op->in_ops();
|
auto in_ops = op->in_ops();
|
||||||
auto output_contain = [in_tensor](NPUOp *op) {
|
auto output_contain = [in_tensor](NPUOp *in_op) {
|
||||||
auto outputs = op->outputs();
|
auto outputs = in_op->outputs();
|
||||||
return std::find(outputs.begin(), outputs.end(), in_tensor) != outputs.end();
|
return std::find(outputs.begin(), outputs.end(), in_tensor) != outputs.end();
|
||||||
};
|
};
|
||||||
auto it = std::find_if(in_ops.begin(), in_ops.end(), output_contain);
|
auto it = std::find_if(in_ops.begin(), in_ops.end(), output_contain);
|
||||||
|
|
|
@ -37,10 +37,6 @@ int NPUTransformPass::InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops)
|
||||||
MS_LOG(ERROR) << "NPU Transform pass does not find in op with 4d output";
|
MS_LOG(ERROR) << "NPU Transform pass does not find in op with 4d output";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
if (op->inputs().front().format() == Format::NCHW) {
|
|
||||||
// input format is already NCHW, no need to insert transpose.
|
|
||||||
return RET_OK;
|
|
||||||
}
|
|
||||||
if (is_input_op || nchw_nodes.find((*it)->type()) == nchw_nodes.end()) {
|
if (is_input_op || nchw_nodes.find((*it)->type()) == nchw_nodes.end()) {
|
||||||
NPUOp *pre_op = nullptr;
|
NPUOp *pre_op = nullptr;
|
||||||
if (!is_input_op) {
|
if (!is_input_op) {
|
||||||
|
@ -57,6 +53,7 @@ int NPUTransformPass::InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops)
|
||||||
MS_LOG(ERROR) << "New nchw tensor failed when inserting pre nhwc2nchw op.";
|
MS_LOG(ERROR) << "New nchw tensor failed when inserting pre nhwc2nchw op.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
tensor->SetFormat(Format::NCHW);
|
||||||
std::vector<mindspore::MSTensor> pre_trans_outputs = {*tensor};
|
std::vector<mindspore::MSTensor> pre_trans_outputs = {*tensor};
|
||||||
all_tensors_->push_back(tensor);
|
all_tensors_->push_back(tensor);
|
||||||
|
|
||||||
|
@ -83,11 +80,10 @@ int NPUTransformPass::InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops)
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int NPUTransformPass::InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops,
|
int NPUTransformPass::InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops) {
|
||||||
std::vector<mindspore::MSTensor> graph_outputs) {
|
|
||||||
bool is_output_op = false;
|
bool is_output_op = false;
|
||||||
if (op->out_ops().empty() ||
|
if (op->out_ops().empty() ||
|
||||||
find(graph_outputs.begin(), graph_outputs.end(), op->outputs()[0]) != graph_outputs.end()) {
|
find(subgraph_->outputs().begin(), subgraph_->outputs().end(), op->outputs()[0]) != subgraph_->outputs().end()) {
|
||||||
is_output_op = true;
|
is_output_op = true;
|
||||||
}
|
}
|
||||||
// Get the post op that need insert trans op.
|
// Get the post op that need insert trans op.
|
||||||
|
@ -116,6 +112,7 @@ int NPUTransformPass::InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops
|
||||||
MS_LOG(ERROR) << "New nchw tensor failed when inserting post nchw2nhwc op.";
|
MS_LOG(ERROR) << "New nchw tensor failed when inserting post nchw2nhwc op.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
nc2nh_tensor->SetFormat(Format::NCHW);
|
||||||
all_tensors_->push_back(nc2nh_tensor);
|
all_tensors_->push_back(nc2nh_tensor);
|
||||||
|
|
||||||
if (is_output_op) {
|
if (is_output_op) {
|
||||||
|
@ -145,6 +142,7 @@ int NPUTransformPass::InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops
|
||||||
MS_LOG(ERROR) << "New nhwc tensor failed when inserting post nchw2nhwc op.";
|
MS_LOG(ERROR) << "New nhwc tensor failed when inserting post nchw2nhwc op.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
out_tensor->SetFormat(Format::NHWC);
|
||||||
all_tensors_->push_back(out_tensor);
|
all_tensors_->push_back(out_tensor);
|
||||||
nc2nh_outputs.push_back(*out_tensor);
|
nc2nh_outputs.push_back(*out_tensor);
|
||||||
|
|
||||||
|
@ -173,9 +171,9 @@ int NPUTransformPass::InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops
|
||||||
}
|
}
|
||||||
|
|
||||||
int NPUTransformPass::Run(NPUGraph *subgraph) {
|
int NPUTransformPass::Run(NPUGraph *subgraph) {
|
||||||
all_ops_ = subgraph->GetOps();
|
subgraph_ = subgraph;
|
||||||
all_tensors_ = subgraph->GetInsertTensors();
|
all_ops_ = subgraph_->GetOps();
|
||||||
auto graph_outputs = subgraph->outputs();
|
all_tensors_ = subgraph_->GetInsertTensors();
|
||||||
for (size_t i = 0; i < all_ops_->size();) {
|
for (size_t i = 0; i < all_ops_->size();) {
|
||||||
auto op = (*all_ops_)[i];
|
auto op = (*all_ops_)[i];
|
||||||
if (nchw_nodes.find(op->type()) == nchw_nodes.end()) {
|
if (nchw_nodes.find(op->type()) == nchw_nodes.end()) {
|
||||||
|
@ -204,7 +202,7 @@ int NPUTransformPass::Run(NPUGraph *subgraph) {
|
||||||
// insert post_ops after op in vector
|
// insert post_ops after op in vector
|
||||||
// modify loop index add post_ops.size() to the next op in the origin vector
|
// modify loop index add post_ops.size() to the next op in the origin vector
|
||||||
std::vector<NPUOp *> post_ops;
|
std::vector<NPUOp *> post_ops;
|
||||||
ret = InsertPostNodes(op, &post_ops, graph_outputs);
|
ret = InsertPostNodes(op, &post_ops);
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "Insert nchw2nhwc op after op " << op->name() << " failed.";
|
MS_LOG(ERROR) << "Insert nchw2nhwc op after op " << op->name() << " failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
|
|
|
@ -32,12 +32,13 @@ class NPUTransformPass : public NPUBasePass {
|
||||||
private:
|
private:
|
||||||
int InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops);
|
int InsertPreNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops);
|
||||||
|
|
||||||
int InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops, std::vector<mindspore::MSTensor> graph_outputs);
|
int InsertPostNodes(NPUOp *op, std::vector<NPUOp *> *trans_ops);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int total = 0;
|
int total = 0;
|
||||||
std::vector<NPUOp *> *all_ops_;
|
NPUGraph *subgraph_ = nullptr;
|
||||||
std::vector<mindspore::MSTensor *> *all_tensors_;
|
std::vector<NPUOp *> *all_ops_ = nullptr;
|
||||||
|
std::vector<mindspore::MSTensor *> *all_tensors_ = nullptr;
|
||||||
};
|
};
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_PASS_NPU_TRANSFORM_PASS_H_
|
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_PASS_NPU_TRANSFORM_PASS_H_
|
||||||
|
|
|
@ -493,6 +493,7 @@ STATUS TfliteModelParser::ConvertGraphOutputs(const std::unique_ptr<tflite::SubG
|
||||||
auto make_tuple_cnode = func_graph->NewCNode(make_tuple_inputs);
|
auto make_tuple_cnode = func_graph->NewCNode(make_tuple_inputs);
|
||||||
MSLITE_CHECK_PTR(make_tuple_cnode);
|
MSLITE_CHECK_PTR(make_tuple_cnode);
|
||||||
make_tuple_cnode->set_fullname_with_scope("return_tuple");
|
make_tuple_cnode->set_fullname_with_scope("return_tuple");
|
||||||
|
|
||||||
auto return_prim_ptr = std::make_shared<ops::Return>();
|
auto return_prim_ptr = std::make_shared<ops::Return>();
|
||||||
if (return_prim_ptr == nullptr) {
|
if (return_prim_ptr == nullptr) {
|
||||||
MS_LOG(ERROR) << "new Return failed";
|
MS_LOG(ERROR) << "new Return failed";
|
||||||
|
|
Loading…
Reference in New Issue