forked from mindspore-Ecosystem/mindspore
commit
0bd1e34a4d
|
@ -108,11 +108,11 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<
|
|||
}
|
||||
break;
|
||||
}
|
||||
if (index == in_tensors.size()) {
|
||||
MS_LOG(ERROR) << "Can't find corresponding ms lite tensor of " << i << " input tensor for npu executor "
|
||||
<< model_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
if (index == in_tensors.size()) {
|
||||
MS_LOG(ERROR) << "Can't find corresponding ms lite tensor of " << i << " input tensor for npu executor "
|
||||
<< model_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
context.AddPara("model_name", model_name_);
|
||||
|
|
|
@ -27,9 +27,7 @@
|
|||
#include "include/HiAiModelManagerService.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
static std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = {
|
||||
schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_Resize,
|
||||
schema::PrimitiveType_MaxPoolFusion, schema::PrimitiveType_AvgPoolFusion};
|
||||
|
||||
struct SubGraphModel {
|
||||
public:
|
||||
SubGraphModel(int index, std::string model_name, std::shared_ptr<domi::ModelBufferData> model_buffer_data)
|
||||
|
|
|
@ -117,7 +117,7 @@ int NPUInsertTransformPass::InsertNode(kernel::LiteKernel *kernel, kernel::LiteK
|
|||
std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]};
|
||||
|
||||
auto nh2nc_name = kernel_name + "_nh2nc_" + std::to_string(total++);
|
||||
auto nh2nc_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR);
|
||||
auto nh2nc_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nchw_shape, schema::Format_NCHW, Tensor::VAR);
|
||||
if (nh2nc_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "New nchw tensor failed when inserting nchw2nhwc kernel.";
|
||||
return RET_ERROR;
|
||||
|
@ -127,7 +127,7 @@ int NPUInsertTransformPass::InsertNode(kernel::LiteKernel *kernel, kernel::LiteK
|
|||
all_tensors_->push_back(nh2nc_tensors[0]);
|
||||
|
||||
auto nc2nh_name = kernel_name + "_nc2nh_" + std::to_string(total++);
|
||||
auto nc2nh_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nhwc_shape, schema::Format_NCHW, Tensor::VAR);
|
||||
auto nc2nh_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nhwc_shape, schema::Format_NHWC, Tensor::VAR);
|
||||
if (nc2nh_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "New nhwc tensor failed when inserting nhwc2nchw kernel.";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -15,8 +15,10 @@
|
|||
*/
|
||||
|
||||
#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h"
|
||||
#include <algorithm>
|
||||
#include "src/runtime/agent/npu/npu_manager.h"
|
||||
#include "nnacl/transpose.h"
|
||||
#include "nnacl/scale.h"
|
||||
#include "src/ops/populate/populate_register.h"
|
||||
#include "src/runtime/kernel/arm/fp32/transpose_fp32.h"
|
||||
|
||||
|
@ -47,6 +49,7 @@ kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector<Tensor
|
|||
kernel->set_desc(key);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "New Nchw2Nhwc Kernel failed.";
|
||||
free(transpose_param);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -106,8 +109,9 @@ void NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *pre_kernel,
|
|||
pre_kernel->set_out_kernels(out_kernels);
|
||||
}
|
||||
|
||||
void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel,
|
||||
std::vector<kernel::LiteKernel *> kernels) {
|
||||
void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel,
|
||||
const std::vector<kernel::LiteKernel *> &trans_kernels,
|
||||
const std::vector<kernel::LiteKernel *> &kernels) {
|
||||
// For kernel before trans, there may be multiple outputs.
|
||||
auto cur_out_kernels = pre_kernel->out_kernels();
|
||||
for (size_t i = 0; i < kernels.size(); i++) {
|
||||
|
@ -116,11 +120,11 @@ void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel,
|
|||
cur_out_kernels.erase(itr);
|
||||
}
|
||||
}
|
||||
cur_out_kernels.push_back(trans_kernel);
|
||||
std::copy(trans_kernels.begin(), trans_kernels.end(), std::back_inserter(cur_out_kernels));
|
||||
pre_kernel->set_out_kernels(cur_out_kernels);
|
||||
// For kernel before trans, the output tensor is used for output tensor of trans, so replace the output tensor with
|
||||
// the input tensor of trans.
|
||||
pre_kernel->set_out_tensors(trans_kernel->in_tensors());
|
||||
pre_kernel->set_out_tensors({trans_kernels.at(0)->in_tensors().at(0)});
|
||||
}
|
||||
|
||||
void NPUPassUtils::UpdateNH2NCTransNodePostKernel(kernel::LiteKernel *trans_kernel, kernel::LiteKernel *post_kernel) {
|
||||
|
@ -230,4 +234,11 @@ kernel::LiteKernel *NPUPassUtils::KernelInputFromKernel(const kernel::LiteKernel
|
|||
}
|
||||
return *it;
|
||||
}
|
||||
|
||||
bool NPUPassUtils::Scale4dCase(const kernel::LiteKernel *kernel) {
|
||||
MS_ASSERT(kernel != nullptr && kernel->op_parameter() != nullptr);
|
||||
auto scale_param = reinterpret_cast<ScaleParameter *>(kernel->op_parameter());
|
||||
auto in_tensor = kernel->in_tensors().at(1);
|
||||
return in_tensor->shape().size() == 1 && (scale_param->axis_ == 3 || scale_param->axis_ == -1);
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -37,8 +37,9 @@ class NPUPassUtils {
|
|||
static void UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel,
|
||||
kernel::LiteKernel *kernel);
|
||||
|
||||
static void UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel,
|
||||
std::vector<kernel::LiteKernel *> kernels);
|
||||
static void UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel,
|
||||
const std::vector<kernel::LiteKernel *> &trans_kernels,
|
||||
const std::vector<kernel::LiteKernel *> &kernels);
|
||||
|
||||
static void UpdateNH2NCTransNodePostKernel(kernel::LiteKernel *trans_kernel, kernel::LiteKernel *post_kernel);
|
||||
|
||||
|
@ -52,6 +53,7 @@ class NPUPassUtils {
|
|||
|
||||
static bool IsNchw2Nhwc(const kernel::LiteKernel *kernel);
|
||||
static kernel::LiteKernel *KernelInputFromKernel(const kernel::LiteKernel *kernel, size_t in_tensor_index);
|
||||
static bool Scale4dCase(const kernel::LiteKernel *kernel);
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_
|
||||
|
|
|
@ -14,12 +14,18 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/agent/npu/optimizer/npu_transform_pass.h"
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/runtime/agent/npu/npu_manager.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_pass_utils.h"
|
||||
namespace mindspore::lite {
|
||||
using kernel::KERNEL_ARCH::kNPU;
|
||||
|
||||
static std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = {
|
||||
schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_Resize,
|
||||
schema::PrimitiveType_MaxPoolFusion, schema::PrimitiveType_AvgPoolFusion, schema::PrimitiveType_ScaleFusion};
|
||||
|
||||
int NPUTransformPass::InsertPreNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels) {
|
||||
bool is_input_kernel = kernel->in_kernels().empty();
|
||||
// single input
|
||||
|
@ -80,57 +86,93 @@ int NPUTransformPass::InsertPostNodes(kernel::LiteKernel *kernel, std::vector<ke
|
|||
// Get the post kernel that need insert trans kernel.
|
||||
// If no need for inserting trans kernel, the post kernel must be npu and in trans_nodes.
|
||||
std::vector<kernel::LiteKernel *> post_insert_kernels;
|
||||
std::vector<kernel::LiteKernel *> post_non_insert_kernels;
|
||||
for (int i = 0; i < kernel->out_kernels().size(); i++) {
|
||||
auto post_kernel = kernel->out_kernels()[i];
|
||||
if (post_kernel->desc().arch != kNPU || npu_trans_nodes.find(post_kernel->Type()) == npu_trans_nodes.end()) {
|
||||
post_insert_kernels.push_back(post_kernel);
|
||||
} else {
|
||||
post_non_insert_kernels.push_back(post_kernel);
|
||||
}
|
||||
}
|
||||
if (is_output_kernel || !post_insert_kernels.empty()) {
|
||||
// Create post transform kernel's in tensor.
|
||||
auto nhwc_shape = kernel->out_tensors()[0]->shape();
|
||||
std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]};
|
||||
auto tensor =
|
||||
auto nc2nh_tensor =
|
||||
new (std::nothrow) Tensor(kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NCHW, Tensor::VAR);
|
||||
if (tensor == nullptr) {
|
||||
if (nc2nh_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "New nchw tensor failed when inserting post nchw2nhwc kernel.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
std::vector<Tensor *> post_trans_in_tensors = {tensor};
|
||||
all_tensors_->push_back(tensor);
|
||||
all_tensors_->push_back(nc2nh_tensor);
|
||||
auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++);
|
||||
tensor->set_tensor_name(name + "/input0");
|
||||
nc2nh_tensor->set_tensor_name(name + "/input0");
|
||||
|
||||
auto nc2nh_perm_tensor = new Tensor(kNumberTypeInt32, {4}, schema::Format_NHWC, Tensor::CONST_TENSOR);
|
||||
auto nc2nh_data = nc2nh_perm_tensor->MutableData();
|
||||
if (nc2nh_data == nullptr) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
std::vector<int> nc2nh_perm_vector = {0, 2, 3, 1};
|
||||
memcpy(nc2nh_data, nc2nh_perm_vector.data(), 4 * sizeof(int));
|
||||
all_tensors_->push_back(nc2nh_perm_tensor);
|
||||
|
||||
// Create post transform kernel: Nchw2Nhwc
|
||||
auto *post_trans_kernel = NPUPassUtils::CreateNchw2NhwcKernel({post_trans_in_tensors[0], nc2nh_perm_tensor},
|
||||
kernel->out_tensors(), context_, name);
|
||||
|
||||
// Set in_kernels, out_kernels, in_tensors, out_tensors for transform kernel
|
||||
NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, post_insert_kernels, post_trans_kernel->in_tensors(),
|
||||
kernel->out_tensors());
|
||||
trans_kernels->push_back(post_trans_kernel);
|
||||
|
||||
if (!is_output_kernel) {
|
||||
for (int i = 0; i < kernel->out_kernels().size(); i++) {
|
||||
auto post_kernel = kernel->out_kernels()[i];
|
||||
if (find(post_insert_kernels.begin(), post_insert_kernels.end(), post_kernel) != post_insert_kernels.end()) {
|
||||
NPUPassUtils::UpdateNC2NHTransNodePostKernel(kernel, post_trans_kernel, post_kernel);
|
||||
} else {
|
||||
NPUPassUtils::UpdateNC2NHPostKernelInTensors(kernel, post_trans_kernel, post_kernel);
|
||||
}
|
||||
if (is_output_kernel) {
|
||||
// perm tensor
|
||||
auto nc2nh_perm_tensor = new Tensor(kNumberTypeInt32, {4}, schema::Format_NHWC, Tensor::CONST_TENSOR);
|
||||
auto nc2nh_data = nc2nh_perm_tensor->MutableData();
|
||||
if (nc2nh_data == nullptr) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
std::vector<int> nc2nh_perm_vector = {0, 2, 3, 1};
|
||||
memcpy(nc2nh_data, nc2nh_perm_vector.data(), 4 * sizeof(int));
|
||||
all_tensors_->push_back(nc2nh_perm_tensor);
|
||||
std::vector<lite::Tensor *> nc2nh_out_tensors{kernel->out_tensors().at(0)};
|
||||
// Create post transform kernel: Nchw2Nhwc
|
||||
auto *post_trans_kernel =
|
||||
NPUPassUtils::CreateNchw2NhwcKernel({nc2nh_tensor, nc2nh_perm_tensor}, nc2nh_out_tensors, context_, name);
|
||||
// Set in_kernels, out_kernels, in_tensors, out_tensors for transform kernel
|
||||
NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {}, post_trans_kernel->in_tensors(),
|
||||
post_trans_kernel->out_tensors());
|
||||
trans_kernels->push_back(post_trans_kernel);
|
||||
}
|
||||
NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, post_insert_kernels);
|
||||
// for each to-be-insert out kernel, create one transpose kernel, one perm tensor, one out tensor
|
||||
// but using same one in_tensor.
|
||||
for (auto i = 0; i < post_insert_kernels.size(); ++i) {
|
||||
auto post_insert_kernel = post_insert_kernels.at(i);
|
||||
// perm tensor
|
||||
auto nc2nh_perm_tensor = new Tensor(kNumberTypeInt32, {4}, schema::Format_NHWC, Tensor::CONST_TENSOR);
|
||||
auto nc2nh_data = nc2nh_perm_tensor->MutableData();
|
||||
if (nc2nh_data == nullptr) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
std::vector<int> nc2nh_perm_vector = {0, 2, 3, 1};
|
||||
memcpy(nc2nh_data, nc2nh_perm_vector.data(), 4 * sizeof(int));
|
||||
all_tensors_->push_back(nc2nh_perm_tensor);
|
||||
// nc2nh kernel out tensor: 1st kernel uses original out_tensor, remaining kernels use newly created out tensor.
|
||||
std::vector<lite::Tensor *> nc2nh_out_tensors{nullptr};
|
||||
|
||||
auto origin_out_tensor = kernel->out_tensors().at(0);
|
||||
auto out_tensor = lite::Tensor::CopyTensor(*origin_out_tensor, false);
|
||||
if (out_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "New nhwc tensor failed when inserting post nchw2nhwc kernel.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
all_tensors_->push_back(out_tensor);
|
||||
auto out_tensor_name = kernel->name() + "_post_trans" + "_Nchw2Nhwc_" + std::to_string(i) + "_out_tensor";
|
||||
out_tensor->set_tensor_name(out_tensor_name);
|
||||
nc2nh_out_tensors[0] = out_tensor;
|
||||
|
||||
// Create post transform kernel: Nchw2Nhwc
|
||||
auto *post_trans_kernel =
|
||||
NPUPassUtils::CreateNchw2NhwcKernel({nc2nh_tensor, nc2nh_perm_tensor}, nc2nh_out_tensors, context_, name);
|
||||
// Set in_kernels, out_kernels, in_tensors, out_tensors for transform kernel
|
||||
NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {post_insert_kernel}, post_trans_kernel->in_tensors(),
|
||||
post_trans_kernel->out_tensors());
|
||||
trans_kernels->push_back(post_trans_kernel);
|
||||
// update post kernel in_tensors in_kernels
|
||||
NPUPassUtils::UpdateNC2NHTransNodePostKernel(kernel, post_trans_kernel, post_insert_kernel);
|
||||
}
|
||||
// for those non-insert post kernels, update their in_tensor
|
||||
for (auto non_insert_kernel : post_non_insert_kernels) {
|
||||
auto in_tensors = non_insert_kernel->in_tensors();
|
||||
std::replace(in_tensors.begin(), in_tensors.end(), kernel->out_tensors().at(0), nc2nh_tensor);
|
||||
non_insert_kernel->set_in_tensors(in_tensors);
|
||||
}
|
||||
// update origin kernel's out tensor and out kernel
|
||||
NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel, *trans_kernels, post_insert_kernels);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -142,6 +184,10 @@ int NPUTransformPass::Run() {
|
|||
i++;
|
||||
continue;
|
||||
}
|
||||
if (kernel->Type() == schema::PrimitiveType_ScaleFusion && !NPUPassUtils::Scale4dCase(kernel)) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
if (kernel->Type() == schema::PrimitiveType_Resize &&
|
||||
kernel->in_tensors()[0]->Height() > kernel->out_tensors()[0]->Height()) {
|
||||
i++;
|
||||
|
|
|
@ -33,6 +33,10 @@ namespace mindspore::kernel {
|
|||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
||||
static std::set<mindspore::schema::PrimitiveType> npu_specific_weight_nodes = {
|
||||
schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_ScaleFusion,
|
||||
schema::PrimitiveType_BatchNorm, schema::PrimitiveType_FullConnection, schema::PrimitiveType_InstanceNorm};
|
||||
|
||||
SubGraphNpuKernel::~SubGraphNpuKernel() {
|
||||
subgraph_input_op_.clear();
|
||||
subgraph_output_op_.clear();
|
||||
|
@ -125,7 +129,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
|
|||
|
||||
// weight tensor
|
||||
if (is_weight_tensor) {
|
||||
if (lite::npu_trans_nodes.find(node->Type()) == lite::npu_trans_nodes.end()) {
|
||||
if (npu_specific_weight_nodes.find(node->Type()) == npu_specific_weight_nodes.end()) {
|
||||
auto name = node->name() + "_" + std::to_string(count++);
|
||||
auto weight_const = new (std::nothrow) hiai::op::Const(node->name() + "_" + std::to_string(count++));
|
||||
if (weight_const == nullptr) {
|
||||
|
|
|
@ -15,10 +15,13 @@
|
|||
*/
|
||||
|
||||
#include "src/runtime/kernel/npu/scale_npu.h"
|
||||
#include <memory>
|
||||
#include "src/kernel_registry.h"
|
||||
#include "src/runtime/agent/npu/npu_converter_utils.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kNPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::schema::Format_NHWC;
|
||||
using mindspore::schema::PrimitiveType_ScaleFusion;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
@ -27,6 +30,13 @@ int ScaleNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const s
|
|||
if (scale_parameter_->axis_ < 0) {
|
||||
scale_parameter_->axis_ = scale_parameter_->axis_ + inputs[0]->shape().size();
|
||||
}
|
||||
if (inputs.size() > 1 && inputs[0]->shape().size() == 4 && inputs[0]->format() == schema::Format_NHWC) {
|
||||
if (scale_parameter_->axis_ != 3) {
|
||||
MS_LOG(ERROR) << "Npu scale axis attr only support on channel, now is " << scale_parameter_->axis_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
if (scale_parameter_->axis_ != 1) {
|
||||
MS_LOG(ERROR) << "Npu scale axis attr only support 1, now is " << scale_parameter_->axis_;
|
||||
return RET_ERROR;
|
||||
|
@ -41,22 +51,102 @@ int ScaleNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, cons
|
|||
MS_LOG(ERROR) << name_ << " op is nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
op_->set_attr_axis(scale_parameter_->axis_);
|
||||
op_->set_input_x(*npu_inputs[0]);
|
||||
op_->set_input_scale(*npu_inputs[1]);
|
||||
if (npu_inputs[2] != nullptr) {
|
||||
op_->set_input_bias(*npu_inputs[2]);
|
||||
op_->set_attr_axis(1); // only support axis 1 now
|
||||
op_->set_input_x(*npu_inputs.at(0));
|
||||
|
||||
MS_ASSERT(inputs.size() > 1);
|
||||
auto scale_shape = inputs.at(1)->shape();
|
||||
std::shared_ptr<ge::Tensor> scale_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
|
||||
if (scale_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "new scale_tensor failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ge::TensorDesc scale_tensor_desc(lite::ConverterToNPUShape({1, scale_shape[0], 1, 1}), ge::FORMAT_NCHW,
|
||||
lite::ConverterToNPUDataType(inputs[1]->data_type()));
|
||||
scale_tensor->SetTensorDesc(scale_tensor_desc);
|
||||
scale_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs[1]->data_c()), inputs[1]->Size());
|
||||
scale_ = new (std::nothrow) hiai::op::Const(name_ + "_scale");
|
||||
if (scale_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New scale_ const failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
scale_->set_attr_value(scale_tensor);
|
||||
op_->set_input_scale(*scale_);
|
||||
|
||||
if (inputs.size() > 2 && inputs[2] != nullptr) {
|
||||
auto bias_shape = inputs[2]->shape();
|
||||
std::shared_ptr<ge::Tensor> bias_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
|
||||
if (bias_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "new bias_tensor failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ge::TensorDesc bias_tensor_desc(lite::ConverterToNPUShape({1, bias_shape[0], 1, 1}), ge::FORMAT_NCHW,
|
||||
lite::ConverterToNPUDataType(inputs[2]->data_type()));
|
||||
bias_tensor->SetTensorDesc(bias_tensor_desc);
|
||||
bias_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs[2]->data_c()), inputs[2]->Size());
|
||||
bias_ = new (std::nothrow) hiai::op::Const(name_ + "_beta");
|
||||
if (bias_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New beta_ const failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
bias_->set_attr_value(bias_tensor);
|
||||
op_->set_input_bias(*bias_);
|
||||
}
|
||||
|
||||
if (scale_parameter_->activation_type_ != schema::ActivationType_NO_ACTIVATION) {
|
||||
auto ret = SetActivation(op_, scale_parameter_->activation_type_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed.";
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
ge::Operator *mindspore::kernel::ScaleNPUKernel::GetNPUOp() { return this->op_; }
|
||||
ge::Operator *mindspore::kernel::ScaleNPUKernel::GetNPUOp() {
|
||||
if (scale_parameter_->activation_type_ == schema::ActivationType_NO_ACTIVATION) {
|
||||
return op_;
|
||||
} else {
|
||||
return act_;
|
||||
}
|
||||
}
|
||||
|
||||
int ScaleNPUKernel::SetActivation(const ge::Operator *input, int act_type) {
|
||||
act_ = new (std::nothrow) hiai::op::Activation(name_ + "_act");
|
||||
if (act_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
act_->set_input_x(*input);
|
||||
if (act_type == schema::ActivationType_RELU) {
|
||||
act_->set_attr_mode(1);
|
||||
} else if (act_type == schema::ActivationType_RELU6) {
|
||||
act_->set_attr_mode(14);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupported activation type for scale.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
ScaleNPUKernel::~ScaleNPUKernel() {
|
||||
if (op_ != nullptr) {
|
||||
delete op_;
|
||||
op_ = nullptr;
|
||||
}
|
||||
if (scale_ != nullptr) {
|
||||
delete scale_;
|
||||
scale_ = nullptr;
|
||||
}
|
||||
if (bias_ != nullptr) {
|
||||
delete bias_;
|
||||
bias_ = nullptr;
|
||||
}
|
||||
if (act_ != nullptr) {
|
||||
delete act_;
|
||||
act_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_ScaleFusion, NPUKernelCreator<ScaleNPUKernel>)
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <vector>
|
||||
#include "nnacl/scale.h"
|
||||
#include "src/runtime/kernel/npu/npu_kernel.h"
|
||||
#include "include/graph/op/all_ops.h"
|
||||
#include "include/graph/op/nn_defs.h"
|
||||
namespace mindspore::kernel {
|
||||
class ScaleNPUKernel : public NPUKernel {
|
||||
|
@ -36,8 +37,14 @@ class ScaleNPUKernel : public NPUKernel {
|
|||
const std::vector<ge::Operator *> &npu_inputs) override;
|
||||
ge::Operator *GetNPUOp() override;
|
||||
|
||||
protected:
|
||||
int SetActivation(const ge::Operator *input, int act_type);
|
||||
|
||||
private:
|
||||
hiai::op::Scale *op_ = nullptr;
|
||||
hiai::op::Const *scale_ = nullptr;
|
||||
hiai::op::Const *bias_ = nullptr;
|
||||
hiai::op::Activation *act_ = nullptr;
|
||||
ScaleParameter *scale_parameter_;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -69,3 +69,4 @@ ml_video_edit_v10_best_model_nomean_20200723 8
|
|||
#ml_edu_kit_hand_detection.onnx 1
|
||||
ml_edu_kit_hand_key_position.onnx 2
|
||||
#ml_video_edit_oneclick_adaptis.pb #too many subgraphs
|
||||
densenet.tflite 3
|
||||
|
|
Loading…
Reference in New Issue