!30256 [MS][LITE] fix npu op bugs

Merge pull request !30256 from XianglongZeng/myms_new
This commit is contained in:
i-robot 2022-02-23 02:15:05 +00:00 committed by Gitee
commit 31d4c91ef1
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
10 changed files with 81 additions and 15 deletions

View File

@ -96,8 +96,8 @@ void AssistDataNHWC2NCHW(int *data, size_t unit_size);
int MaskDataNHWC2NCHW(int mask);
template <typename T>
ge::Operator *GetNPUConst(const uint8_t *const_data, const std::vector<int64_t> &shape, const ge::DataType data_type,
std::string name = "const", bool is_expand_4d = false) {
hiai::op::Const *GetNPUConst(const uint8_t *const_data, const std::vector<int64_t> &shape, const ge::DataType data_type,
std::string name = "const", bool is_expand_4d = false) {
MS_CHECK_TRUE_MSG(const_data != nullptr, nullptr, "Const data can not be nullptr.");
int element_num = 1;
if (!shape.empty()) {

View File

@ -186,6 +186,7 @@ Status NPUDelegate::Init() {
{schema::PrimitiveType_Transpose, GetNPUOp<TransposeNPUOp>},
{schema::PrimitiveType_Unsqueeze, GetNPUOp<UnsqueezeNPUOp>},
{schema::PrimitiveType_Abs, GetNPUOp<AbsNPUOp>},
{schema::PrimitiveType_Flatten, GetNPUOp<FlattenNPUOp>},
};
return mindspore::kSuccess;
}

View File

@ -16,7 +16,11 @@
#include "src/delegate/npu/op/avg_pooling_npu.h"
#include "src/delegate/npu/npu_converter_utils.h"
#include "src/delegate/npu/npu_manager.h"
namespace mindspore {
constexpr int MAX_HW_SIZE = 65534;
int AvgPoolingNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
auto pooling_prim = primitive->value_as_AvgPoolFusion();
@ -32,6 +36,13 @@ int AvgPoolingNPUOp::IsSupport(const schema::Primitive *primitive, const std::ve
MS_LOG(WARNING) << "Npu pooling does not support pad > stride.";
return RET_NOT_SUPPORT;
}
auto input_shape = in_tensors.front().Shape();
auto height = input_shape.at(NHWC_H);
auto width = input_shape.at(NHWC_W);
if (!NPUManager::CheckDDKVerGreatEqual("100.330.011.032") && height * width > MAX_HW_SIZE) {
MS_LOG(WARNING) << "The pooling size of " << name_ << " exceeds the max size that NPU support.";
return RET_NOT_SUPPORT;
}
return RET_OK;
}

View File

@ -58,6 +58,11 @@ int ConvolutionBaseNPUOp::InitWeightConst(const std::vector<mindspore::MSTensor>
if (inputs[1].DataType() == DataType::kNumberTypeFloat16) {
#ifdef ENABLE_ARM64
nchw_weight_ = reinterpret_cast<float *>(malloc(inputs[1].ElementNum() * sizeof(float)));
if (nchw_weight_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}
fp32_weight_ = reinterpret_cast<float *>(malloc(inputs[1].ElementNum() * sizeof(float)));
if (fp32_weight_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";

View File

@ -17,6 +17,7 @@
#include "src/delegate/npu/op/flatten_npu.h"
#include "include/graph/op/all_ops.h"
#include "src/delegate/npu/npu_converter_utils.h"
#include "src/delegate/npu/npu_manager.h"
namespace mindspore {
int FlattenNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
@ -25,15 +26,24 @@ int FlattenNPUOp::IsSupport(const schema::Primitive *primitive, const std::vecto
MS_LOG(WARNING) << "The output tensor can only be flatten to 2 dimension.";
return RET_NOT_SUPPORT;
}
use_reshape_ = !NPUManager::CheckDDKVerGreatEqual("100.330.011.032");
return RET_OK;
}
int FlattenNPUOp::Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) {
flatten_ = new (std::nothrow) hiai::op::Flatten(name_);
if (flatten_ == nullptr) {
MS_LOG(ERROR) << name_ << " op is nullptr";
return RET_ERROR;
if (use_reshape_) {
reshape_ = new (std::nothrow) hiai::op::Reshape(name_ + "_reshape");
if (reshape_ == nullptr) {
MS_LOG(ERROR) << "New Reshape operator for op " << name_ << " failed.";
return RET_ERROR;
}
} else {
flatten_ = new (std::nothrow) hiai::op::Flatten(name_);
if (flatten_ == nullptr) {
MS_LOG(ERROR) << "New Flatten operator for op " << name_ << " failed.";
return RET_ERROR;
}
}
return RET_OK;
}
@ -41,16 +51,47 @@ int FlattenNPUOp::Init(const schema::Primitive *primitive, const std::vector<min
int FlattenNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors,
const std::vector<ge::Operator *> &npu_inputs) {
flatten_->set_input_x(*npu_inputs[0]);
if (use_reshape_) {
auto output_shape = out_tensors.front().Shape();
int64_t dims = output_shape.size();
std::vector<int> valid_shape;
for (int i = 0; i < dims; i++) {
valid_shape.emplace_back(static_cast<int>(output_shape.at(i)));
}
auto valid_data_ptr = reinterpret_cast<const uint8_t *>(valid_shape.data());
shape_ = GetNPUConst<int>(valid_data_ptr, {dims}, ge::DT_INT32, name_ + "_shape");
if (shape_ == nullptr) {
MS_LOG(ERROR) << "Get NPU Const for Reshape failed.";
return RET_ERROR;
}
reshape_->set_input_x(*npu_inputs[0]);
reshape_->set_input_shape(*shape_);
} else {
flatten_->set_input_x(*npu_inputs[0]);
}
return RET_OK;
}
ge::Operator *FlattenNPUOp::GetNPUOp() { return this->flatten_; }
ge::Operator *FlattenNPUOp::GetNPUOp() {
if (use_reshape_) {
return this->reshape_;
} else {
return this->flatten_;
}
}
FlattenNPUOp::~FlattenNPUOp() {
if (flatten_ != nullptr) {
delete flatten_;
flatten_ = nullptr;
}
if (reshape_ != nullptr) {
delete reshape_;
reshape_ = nullptr;
}
if (shape_ != nullptr) {
delete shape_;
shape_ = nullptr;
}
}
} // namespace mindspore

View File

@ -43,6 +43,9 @@ class FlattenNPUOp : public NPUOp {
private:
hiai::op::Flatten *flatten_ = nullptr;
hiai::op::Reshape *reshape_ = nullptr;
hiai::op::Const *shape_ = nullptr;
bool use_reshape_ = false;
};
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_FLATTEN_NPU_H_

View File

@ -32,8 +32,11 @@ int SplitNPUOp::Init(const schema::Primitive *primitive, const std::vector<minds
return RET_ERROR;
}
axis_ = static_cast<int>(split_prim->axis());
auto split_dim = in_tensors.at(0).Shape().at(axis_);
auto in_tensor = in_tensors.at(0);
auto axis = static_cast<int>(split_prim->axis());
axis_ = axis >= 0 ? axis : axis + static_cast<int>(in_tensor.Shape().size());
MS_CHECK_TRUE_MSG(axis_ >= 0, RET_ERROR, "The split axis is illegal!");
auto split_dim = in_tensor.Shape().at(axis_);
auto sizes_split = split_prim->size_splits();
int size = split_prim->output_num();
std::vector<int> sizes_split_vec;

View File

@ -27,7 +27,8 @@ namespace mindspore {
std::set<mindspore::schema::PrimitiveType> insert_nodes = {
schema::PrimitiveType_Concat, schema::PrimitiveType_AddFusion, schema::PrimitiveType_Eltwise,
schema::PrimitiveType_Activation, schema::PrimitiveType_Split, schema::PrimitiveType_PadFusion,
schema::PrimitiveType_StridedSlice, schema::PrimitiveType_MulFusion, schema::PrimitiveType_DivFusion};
schema::PrimitiveType_StridedSlice, schema::PrimitiveType_MulFusion, schema::PrimitiveType_DivFusion,
schema::PrimitiveType_Cast};
// this pass goal is to minimize subgraphs generated
// by inserting nchw2nhwc or nhwc2nchw before or after the operator (e.g. concat, add, etc..) together with

View File

@ -14,8 +14,8 @@
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SCATTER_ND_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SCATTER_ND_H_
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SCATTER_ND_BASE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SCATTER_ND_BASE_H_
#include <vector>
#include "src/inner_kernel.h"
@ -43,4 +43,4 @@ class ScatterNDCPUKernel : public InnerKernel {
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SCATTER_ND_H_
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SCATTER_ND_BASE_H_

View File

@ -45,7 +45,8 @@ squeezenet1.0-9.onnx;1:data_0
residual_distill_cifar10_bs_1.onnx;1:actual_input
residual_distill_cifar10_bs_32.onnx;1:actual_input
residual_distill_bs_1.onnx;1:actual_input
residual_distill_bs_32.onnx;1:actual_input
#residual_distill_bs_32.onnx has random precision error in p50
residual_distill_bs_32.onnx;1:actual_input 200
crnn_lite_lstm_v2.onnx;1:input;32,32,32,1
psenet_lite_mbv2.onnx;1:input;1,32,32,3
residual_distill_res34_cifar10_bs_1_update.onnx;1:actual_input