From 9355cd94900126f280052f9cabb2a05d8e5557e6 Mon Sep 17 00:00:00 2001 From: zhaozhenlong Date: Wed, 10 Mar 2021 15:40:36 +0800 Subject: [PATCH] fix scale npu weight format wrong and activation adjust npu pass inserting trans for each output kernel --- .../src/runtime/agent/npu/npu_executor.cc | 10 +- .../lite/src/runtime/agent/npu/npu_manager.h | 4 +- .../optimizer/npu_insert_transform_pass.cc | 4 +- .../agent/npu/optimizer/npu_pass_utils.cc | 19 ++- .../agent/npu/optimizer/npu_pass_utils.h | 6 +- .../agent/npu/optimizer/npu_transform_pass.cc | 112 ++++++++++++------ .../runtime/agent/npu/subgraph_npu_kernel.cc | 6 +- .../lite/src/runtime/kernel/npu/scale_npu.cc | 102 +++++++++++++++- .../lite/src/runtime/kernel/npu/scale_npu.h | 7 ++ mindspore/lite/test/models_npu.cfg | 1 + 10 files changed, 215 insertions(+), 56 deletions(-) diff --git a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc index 830e2b8db5f..38ff6b44897 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc +++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc @@ -108,11 +108,11 @@ int NPUExecutor::Run(const std::vector &in_tensors, const std::vector< } break; } - if (index == in_tensors.size()) { - MS_LOG(ERROR) << "Can't find corresponding ms lite tensor of " << i << " input tensor for npu executor " - << model_name_; - return RET_ERROR; - } + } + if (index == in_tensors.size()) { + MS_LOG(ERROR) << "Can't find corresponding ms lite tensor of " << i << " input tensor for npu executor " + << model_name_; + return RET_ERROR; } } context.AddPara("model_name", model_name_); diff --git a/mindspore/lite/src/runtime/agent/npu/npu_manager.h b/mindspore/lite/src/runtime/agent/npu/npu_manager.h index b52c1bcc5d8..b3186c2fcb2 100644 --- a/mindspore/lite/src/runtime/agent/npu/npu_manager.h +++ b/mindspore/lite/src/runtime/agent/npu/npu_manager.h @@ -27,9 +27,7 @@ #include "include/HiAiModelManagerService.h" namespace mindspore::lite { -static std::set npu_trans_nodes = { - schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_Resize, - schema::PrimitiveType_MaxPoolFusion, schema::PrimitiveType_AvgPoolFusion}; + struct SubGraphModel { public: SubGraphModel(int index, std::string model_name, std::shared_ptr model_buffer_data) diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc index 3277380fc1b..ea86fd59814 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc @@ -117,7 +117,7 @@ int NPUInsertTransformPass::InsertNode(kernel::LiteKernel *kernel, kernel::LiteK std::vector nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; auto nh2nc_name = kernel_name + "_nh2nc_" + std::to_string(total++); - auto nh2nc_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR); + auto nh2nc_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nchw_shape, schema::Format_NCHW, Tensor::VAR); if (nh2nc_tensor == nullptr) { MS_LOG(ERROR) << "New nchw tensor failed when inserting nchw2nhwc kernel."; return RET_ERROR; @@ -127,7 +127,7 @@ int NPUInsertTransformPass::InsertNode(kernel::LiteKernel *kernel, kernel::LiteK all_tensors_->push_back(nh2nc_tensors[0]); auto nc2nh_name = kernel_name + "_nc2nh_" + std::to_string(total++); - auto nc2nh_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nhwc_shape, schema::Format_NCHW, Tensor::VAR); + auto nc2nh_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nhwc_shape, schema::Format_NHWC, Tensor::VAR); if (nc2nh_tensor == nullptr) { MS_LOG(ERROR) << "New nhwc tensor failed when inserting nhwc2nchw kernel."; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc index 000ff411bce..7941f5103f4 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.cc @@ -15,8 +15,10 @@ */ #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" +#include #include "src/runtime/agent/npu/npu_manager.h" #include "nnacl/transpose.h" +#include "nnacl/scale.h" #include "src/ops/populate/populate_register.h" #include "src/runtime/kernel/arm/fp32/transpose_fp32.h" @@ -47,6 +49,7 @@ kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vectorset_desc(key); } else { MS_LOG(ERROR) << "New Nchw2Nhwc Kernel failed."; + free(transpose_param); return nullptr; } @@ -106,8 +109,9 @@ void NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *pre_kernel, pre_kernel->set_out_kernels(out_kernels); } -void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel, - std::vector kernels) { +void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, + const std::vector &trans_kernels, + const std::vector &kernels) { // For kernel before trans, there may be multiple outputs. auto cur_out_kernels = pre_kernel->out_kernels(); for (size_t i = 0; i < kernels.size(); i++) { @@ -116,11 +120,11 @@ void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, cur_out_kernels.erase(itr); } } - cur_out_kernels.push_back(trans_kernel); + std::copy(trans_kernels.begin(), trans_kernels.end(), std::back_inserter(cur_out_kernels)); pre_kernel->set_out_kernels(cur_out_kernels); // For kernel before trans, the output tensor is used for output tensor of trans, so replace the output tensor with // the input tensor of trans. - pre_kernel->set_out_tensors(trans_kernel->in_tensors()); + pre_kernel->set_out_tensors({trans_kernels.at(0)->in_tensors().at(0)}); } void NPUPassUtils::UpdateNH2NCTransNodePostKernel(kernel::LiteKernel *trans_kernel, kernel::LiteKernel *post_kernel) { @@ -230,4 +234,11 @@ kernel::LiteKernel *NPUPassUtils::KernelInputFromKernel(const kernel::LiteKernel } return *it; } + +bool NPUPassUtils::Scale4dCase(const kernel::LiteKernel *kernel) { + MS_ASSERT(kernel != nullptr && kernel->op_parameter() != nullptr); + auto scale_param = reinterpret_cast(kernel->op_parameter()); + auto in_tensor = kernel->in_tensors().at(1); + return in_tensor->shape().size() == 1 && (scale_param->axis_ == 3 || scale_param->axis_ == -1); +} } // namespace mindspore::lite diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h index c1adf4d409e..7b776f903db 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_pass_utils.h @@ -37,8 +37,9 @@ class NPUPassUtils { static void UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel, kernel::LiteKernel *kernel); - static void UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel, - std::vector kernels); + static void UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, + const std::vector &trans_kernels, + const std::vector &kernels); static void UpdateNH2NCTransNodePostKernel(kernel::LiteKernel *trans_kernel, kernel::LiteKernel *post_kernel); @@ -52,6 +53,7 @@ class NPUPassUtils { static bool IsNchw2Nhwc(const kernel::LiteKernel *kernel); static kernel::LiteKernel *KernelInputFromKernel(const kernel::LiteKernel *kernel, size_t in_tensor_index); + static bool Scale4dCase(const kernel::LiteKernel *kernel); }; } // namespace mindspore::lite #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_ diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc index d5451041369..3e7e907e428 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc @@ -14,12 +14,18 @@ * limitations under the License. */ #include "src/runtime/agent/npu/optimizer/npu_transform_pass.h" +#include #include #include "src/lite_kernel.h" #include "src/runtime/agent/npu/npu_manager.h" #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" namespace mindspore::lite { using kernel::KERNEL_ARCH::kNPU; + +static std::set npu_trans_nodes = { + schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_Resize, + schema::PrimitiveType_MaxPoolFusion, schema::PrimitiveType_AvgPoolFusion, schema::PrimitiveType_ScaleFusion}; + int NPUTransformPass::InsertPreNodes(kernel::LiteKernel *kernel, std::vector *trans_kernels) { bool is_input_kernel = kernel->in_kernels().empty(); // single input @@ -80,57 +86,93 @@ int NPUTransformPass::InsertPostNodes(kernel::LiteKernel *kernel, std::vector post_insert_kernels; + std::vector post_non_insert_kernels; for (int i = 0; i < kernel->out_kernels().size(); i++) { auto post_kernel = kernel->out_kernels()[i]; if (post_kernel->desc().arch != kNPU || npu_trans_nodes.find(post_kernel->Type()) == npu_trans_nodes.end()) { post_insert_kernels.push_back(post_kernel); + } else { + post_non_insert_kernels.push_back(post_kernel); } } if (is_output_kernel || !post_insert_kernels.empty()) { // Create post transform kernel's in tensor. auto nhwc_shape = kernel->out_tensors()[0]->shape(); std::vector nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; - auto tensor = + auto nc2nh_tensor = new (std::nothrow) Tensor(kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NCHW, Tensor::VAR); - if (tensor == nullptr) { + if (nc2nh_tensor == nullptr) { MS_LOG(ERROR) << "New nchw tensor failed when inserting post nchw2nhwc kernel."; return RET_ERROR; } - std::vector post_trans_in_tensors = {tensor}; - all_tensors_->push_back(tensor); + all_tensors_->push_back(nc2nh_tensor); auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++); - tensor->set_tensor_name(name + "/input0"); + nc2nh_tensor->set_tensor_name(name + "/input0"); - auto nc2nh_perm_tensor = new Tensor(kNumberTypeInt32, {4}, schema::Format_NHWC, Tensor::CONST_TENSOR); - auto nc2nh_data = nc2nh_perm_tensor->MutableData(); - if (nc2nh_data == nullptr) { - return RET_ERROR; - } - - std::vector nc2nh_perm_vector = {0, 2, 3, 1}; - memcpy(nc2nh_data, nc2nh_perm_vector.data(), 4 * sizeof(int)); - all_tensors_->push_back(nc2nh_perm_tensor); - - // Create post transform kernel: Nchw2Nhwc - auto *post_trans_kernel = NPUPassUtils::CreateNchw2NhwcKernel({post_trans_in_tensors[0], nc2nh_perm_tensor}, - kernel->out_tensors(), context_, name); - - // Set in_kernels, out_kernels, in_tensors, out_tensors for transform kernel - NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, post_insert_kernels, post_trans_kernel->in_tensors(), - kernel->out_tensors()); - trans_kernels->push_back(post_trans_kernel); - - if (!is_output_kernel) { - for (int i = 0; i < kernel->out_kernels().size(); i++) { - auto post_kernel = kernel->out_kernels()[i]; - if (find(post_insert_kernels.begin(), post_insert_kernels.end(), post_kernel) != post_insert_kernels.end()) { - NPUPassUtils::UpdateNC2NHTransNodePostKernel(kernel, post_trans_kernel, post_kernel); - } else { - NPUPassUtils::UpdateNC2NHPostKernelInTensors(kernel, post_trans_kernel, post_kernel); - } + if (is_output_kernel) { + // perm tensor + auto nc2nh_perm_tensor = new Tensor(kNumberTypeInt32, {4}, schema::Format_NHWC, Tensor::CONST_TENSOR); + auto nc2nh_data = nc2nh_perm_tensor->MutableData(); + if (nc2nh_data == nullptr) { + return RET_ERROR; } + std::vector nc2nh_perm_vector = {0, 2, 3, 1}; + memcpy(nc2nh_data, nc2nh_perm_vector.data(), 4 * sizeof(int)); + all_tensors_->push_back(nc2nh_perm_tensor); + std::vector nc2nh_out_tensors{kernel->out_tensors().at(0)}; + // Create post transform kernel: Nchw2Nhwc + auto *post_trans_kernel = + NPUPassUtils::CreateNchw2NhwcKernel({nc2nh_tensor, nc2nh_perm_tensor}, nc2nh_out_tensors, context_, name); + // Set in_kernels, out_kernels, in_tensors, out_tensors for transform kernel + NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {}, post_trans_kernel->in_tensors(), + post_trans_kernel->out_tensors()); + trans_kernels->push_back(post_trans_kernel); } - NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, post_insert_kernels); + // for each to-be-insert out kernel, create one transpose kernel, one perm tensor, one out tensor + // but using same one in_tensor. + for (auto i = 0; i < post_insert_kernels.size(); ++i) { + auto post_insert_kernel = post_insert_kernels.at(i); + // perm tensor + auto nc2nh_perm_tensor = new Tensor(kNumberTypeInt32, {4}, schema::Format_NHWC, Tensor::CONST_TENSOR); + auto nc2nh_data = nc2nh_perm_tensor->MutableData(); + if (nc2nh_data == nullptr) { + return RET_ERROR; + } + std::vector nc2nh_perm_vector = {0, 2, 3, 1}; + memcpy(nc2nh_data, nc2nh_perm_vector.data(), 4 * sizeof(int)); + all_tensors_->push_back(nc2nh_perm_tensor); + // nc2nh kernel out tensor: 1st kernel uses original out_tensor, remaining kernels use newly created out tensor. + std::vector nc2nh_out_tensors{nullptr}; + + auto origin_out_tensor = kernel->out_tensors().at(0); + auto out_tensor = lite::Tensor::CopyTensor(*origin_out_tensor, false); + if (out_tensor == nullptr) { + MS_LOG(ERROR) << "New nhwc tensor failed when inserting post nchw2nhwc kernel."; + return RET_ERROR; + } + all_tensors_->push_back(out_tensor); + auto out_tensor_name = kernel->name() + "_post_trans" + "_Nchw2Nhwc_" + std::to_string(i) + "_out_tensor"; + out_tensor->set_tensor_name(out_tensor_name); + nc2nh_out_tensors[0] = out_tensor; + + // Create post transform kernel: Nchw2Nhwc + auto *post_trans_kernel = + NPUPassUtils::CreateNchw2NhwcKernel({nc2nh_tensor, nc2nh_perm_tensor}, nc2nh_out_tensors, context_, name); + // Set in_kernels, out_kernels, in_tensors, out_tensors for transform kernel + NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {post_insert_kernel}, post_trans_kernel->in_tensors(), + post_trans_kernel->out_tensors()); + trans_kernels->push_back(post_trans_kernel); + // update post kernel in_tensors in_kernels + NPUPassUtils::UpdateNC2NHTransNodePostKernel(kernel, post_trans_kernel, post_insert_kernel); + } + // for those non-insert post kernels, update their in_tensor + for (auto non_insert_kernel : post_non_insert_kernels) { + auto in_tensors = non_insert_kernel->in_tensors(); + std::replace(in_tensors.begin(), in_tensors.end(), kernel->out_tensors().at(0), nc2nh_tensor); + non_insert_kernel->set_in_tensors(in_tensors); + } + // update origin kernel's out tensor and out kernel + NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel, *trans_kernels, post_insert_kernels); } return RET_OK; } @@ -142,6 +184,10 @@ int NPUTransformPass::Run() { i++; continue; } + if (kernel->Type() == schema::PrimitiveType_ScaleFusion && !NPUPassUtils::Scale4dCase(kernel)) { + i++; + continue; + } if (kernel->Type() == schema::PrimitiveType_Resize && kernel->in_tensors()[0]->Height() > kernel->out_tensors()[0]->Height()) { i++; diff --git a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc index d4bce542204..801126dc83c 100644 --- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc +++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc @@ -33,6 +33,10 @@ namespace mindspore::kernel { using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; +static std::set npu_specific_weight_nodes = { + schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_ScaleFusion, + schema::PrimitiveType_BatchNorm, schema::PrimitiveType_FullConnection, schema::PrimitiveType_InstanceNorm}; + SubGraphNpuKernel::~SubGraphNpuKernel() { subgraph_input_op_.clear(); subgraph_output_op_.clear(); @@ -125,7 +129,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { // weight tensor if (is_weight_tensor) { - if (lite::npu_trans_nodes.find(node->Type()) == lite::npu_trans_nodes.end()) { + if (npu_specific_weight_nodes.find(node->Type()) == npu_specific_weight_nodes.end()) { auto name = node->name() + "_" + std::to_string(count++); auto weight_const = new (std::nothrow) hiai::op::Const(node->name() + "_" + std::to_string(count++)); if (weight_const == nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/npu/scale_npu.cc b/mindspore/lite/src/runtime/kernel/npu/scale_npu.cc index cd4b981ce91..7d1d152a39e 100644 --- a/mindspore/lite/src/runtime/kernel/npu/scale_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/scale_npu.cc @@ -15,10 +15,13 @@ */ #include "src/runtime/kernel/npu/scale_npu.h" +#include #include "src/kernel_registry.h" +#include "src/runtime/agent/npu/npu_converter_utils.h" using mindspore::kernel::KERNEL_ARCH::kNPU; using mindspore::lite::KernelRegistrar; +using mindspore::schema::Format_NHWC; using mindspore::schema::PrimitiveType_ScaleFusion; namespace mindspore::kernel { @@ -27,6 +30,13 @@ int ScaleNPUKernel::IsSupport(const std::vector &inputs, const s if (scale_parameter_->axis_ < 0) { scale_parameter_->axis_ = scale_parameter_->axis_ + inputs[0]->shape().size(); } + if (inputs.size() > 1 && inputs[0]->shape().size() == 4 && inputs[0]->format() == schema::Format_NHWC) { + if (scale_parameter_->axis_ != 3) { + MS_LOG(ERROR) << "Npu scale axis attr only support on channel, now is " << scale_parameter_->axis_; + return RET_ERROR; + } + return RET_OK; + } if (scale_parameter_->axis_ != 1) { MS_LOG(ERROR) << "Npu scale axis attr only support 1, now is " << scale_parameter_->axis_; return RET_ERROR; @@ -41,22 +51,102 @@ int ScaleNPUKernel::SetNPUInputs(const std::vector &inputs, cons MS_LOG(ERROR) << name_ << " op is nullptr"; return RET_ERROR; } - op_->set_attr_axis(scale_parameter_->axis_); - op_->set_input_x(*npu_inputs[0]); - op_->set_input_scale(*npu_inputs[1]); - if (npu_inputs[2] != nullptr) { - op_->set_input_bias(*npu_inputs[2]); + op_->set_attr_axis(1); // only support axis 1 now + op_->set_input_x(*npu_inputs.at(0)); + + MS_ASSERT(inputs.size() > 1); + auto scale_shape = inputs.at(1)->shape(); + std::shared_ptr scale_tensor = std::shared_ptr(new (std::nothrow) ge::Tensor()); + if (scale_tensor == nullptr) { + MS_LOG(ERROR) << "new scale_tensor failed."; + return RET_ERROR; } + ge::TensorDesc scale_tensor_desc(lite::ConverterToNPUShape({1, scale_shape[0], 1, 1}), ge::FORMAT_NCHW, + lite::ConverterToNPUDataType(inputs[1]->data_type())); + scale_tensor->SetTensorDesc(scale_tensor_desc); + scale_tensor->SetData(reinterpret_cast(inputs[1]->data_c()), inputs[1]->Size()); + scale_ = new (std::nothrow) hiai::op::Const(name_ + "_scale"); + if (scale_ == nullptr) { + MS_LOG(ERROR) << "New scale_ const failed."; + return RET_ERROR; + } + scale_->set_attr_value(scale_tensor); + op_->set_input_scale(*scale_); + + if (inputs.size() > 2 && inputs[2] != nullptr) { + auto bias_shape = inputs[2]->shape(); + std::shared_ptr bias_tensor = std::shared_ptr(new (std::nothrow) ge::Tensor()); + if (bias_tensor == nullptr) { + MS_LOG(ERROR) << "new bias_tensor failed."; + return RET_ERROR; + } + ge::TensorDesc bias_tensor_desc(lite::ConverterToNPUShape({1, bias_shape[0], 1, 1}), ge::FORMAT_NCHW, + lite::ConverterToNPUDataType(inputs[2]->data_type())); + bias_tensor->SetTensorDesc(bias_tensor_desc); + bias_tensor->SetData(reinterpret_cast(inputs[2]->data_c()), inputs[2]->Size()); + bias_ = new (std::nothrow) hiai::op::Const(name_ + "_beta"); + if (bias_ == nullptr) { + MS_LOG(ERROR) << "New beta_ const failed."; + return RET_ERROR; + } + bias_->set_attr_value(bias_tensor); + op_->set_input_bias(*bias_); + } + + if (scale_parameter_->activation_type_ != schema::ActivationType_NO_ACTIVATION) { + auto ret = SetActivation(op_, scale_parameter_->activation_type_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed."; + return ret; + } + } + return RET_OK; } -ge::Operator *mindspore::kernel::ScaleNPUKernel::GetNPUOp() { return this->op_; } +ge::Operator *mindspore::kernel::ScaleNPUKernel::GetNPUOp() { + if (scale_parameter_->activation_type_ == schema::ActivationType_NO_ACTIVATION) { + return op_; + } else { + return act_; + } +} + +int ScaleNPUKernel::SetActivation(const ge::Operator *input, int act_type) { + act_ = new (std::nothrow) hiai::op::Activation(name_ + "_act"); + if (act_ == nullptr) { + MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed."; + return RET_ERROR; + } + act_->set_input_x(*input); + if (act_type == schema::ActivationType_RELU) { + act_->set_attr_mode(1); + } else if (act_type == schema::ActivationType_RELU6) { + act_->set_attr_mode(14); + } else { + MS_LOG(ERROR) << "Unsupported activation type for scale."; + return RET_ERROR; + } + return RET_OK; +} ScaleNPUKernel::~ScaleNPUKernel() { if (op_ != nullptr) { delete op_; op_ = nullptr; } + if (scale_ != nullptr) { + delete scale_; + scale_ = nullptr; + } + if (bias_ != nullptr) { + delete bias_; + bias_ = nullptr; + } + if (act_ != nullptr) { + delete act_; + act_ = nullptr; + } } REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_ScaleFusion, NPUKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/npu/scale_npu.h b/mindspore/lite/src/runtime/kernel/npu/scale_npu.h index b592fc31d9d..6bbc0e82c38 100644 --- a/mindspore/lite/src/runtime/kernel/npu/scale_npu.h +++ b/mindspore/lite/src/runtime/kernel/npu/scale_npu.h @@ -19,6 +19,7 @@ #include #include "nnacl/scale.h" #include "src/runtime/kernel/npu/npu_kernel.h" +#include "include/graph/op/all_ops.h" #include "include/graph/op/nn_defs.h" namespace mindspore::kernel { class ScaleNPUKernel : public NPUKernel { @@ -36,8 +37,14 @@ class ScaleNPUKernel : public NPUKernel { const std::vector &npu_inputs) override; ge::Operator *GetNPUOp() override; + protected: + int SetActivation(const ge::Operator *input, int act_type); + private: hiai::op::Scale *op_ = nullptr; + hiai::op::Const *scale_ = nullptr; + hiai::op::Const *bias_ = nullptr; + hiai::op::Activation *act_ = nullptr; ScaleParameter *scale_parameter_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/test/models_npu.cfg b/mindspore/lite/test/models_npu.cfg index c19e5c68334..e8bf16360c5 100644 --- a/mindspore/lite/test/models_npu.cfg +++ b/mindspore/lite/test/models_npu.cfg @@ -69,3 +69,4 @@ ml_video_edit_v10_best_model_nomean_20200723 8 #ml_edu_kit_hand_detection.onnx 1 ml_edu_kit_hand_key_position.onnx 2 #ml_video_edit_oneclick_adaptis.pb #too many subgraphs +densenet.tflite 3