From 3930624ef32bd144f4b74264e0ef561753e8117f Mon Sep 17 00:00:00 2001 From: yangruoqi713 Date: Thu, 6 Jan 2022 17:26:46 +0800 Subject: [PATCH] [MSLITE][DEVELOP] judge tensor type when loading model, fix bug of gelu fp16 --- .../cpu/nnacl/fp16/activation_fp16.c | 11 +++---- .../cpu/nnacl/infer/conv2d_infer.c | 31 ++++++++++--------- .../intrinsics/ms_simd_neon_instructions.h | 12 ++++++- .../cpu/nnacl/tensor_array_parameter.h | 2 +- mindspore/lite/src/common/prim_util.cc | 31 +++++++++++++++++++ mindspore/lite/src/common/prim_util.h | 2 +- mindspore/lite/src/lite_model.cc | 14 +++++++++ .../populate/control/tensor_array_populate.cc | 7 +---- .../kernel/arm/control/tensor_array.cc | 4 --- .../lite/test/config/models_onnx_fp16.cfg | 2 +- 10 files changed, 82 insertions(+), 34 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/activation_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/activation_fp16.c index 35ac6060e70..3e7658adba8 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/activation_fp16.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/activation_fp16.c @@ -223,12 +223,11 @@ int GeluFp16(const float16_t *src, int length, float16_t *dst, bool approximate) if (approximate) { // dst = 0.5 * x * (1 + tanh((2 / pi) ^ 0.5 * (x + 0.044715x^3))) #ifdef ENABLE_NEON - int C8 = DOWN_ROUND(length, C8NUM); - for (; i < C8; i += C8NUM) { - float16x8_t in = vld1q_f16(src + i); - float16x8_t res = - 0.5f * in * (1.0f + MS_TANHX8_F16(((float16_t)0.79788456080287f + (float16_t)0.035677408136f * in * in) * in)); - vst1q_f16(dst + i, res); + int C4 = DOWN_ROUND(length, C4NUM); + for (; i < C4; i += C4NUM) { + float32x4_t in = MS_CVT_F32_F16(vld1_f16(src + i)); + float32x4_t res = 0.5f * in * (1.0f + MS_TANHX4_F32((0.79788456080287f + 0.035677408136f * in * in) * in)); + vst1_f16(dst + i, MS_CVT_F16_F32(res)); } #endif for (; i < length; i++) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_infer.c index ceae47e3276..9b189537efd 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_infer.c @@ -103,6 +103,9 @@ int Conv2dInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC * return NNACL_FORMAT_ERROR; } const TensorC *weight_tensor = inputs[1]; + if (weight_tensor->format_ != Format_NHWC && weight_tensor->format_ != Format_KHWC) { + return NNACL_FORMAT_ERROR; + } TensorC *out_tensor = outputs[0]; if (out_tensor->format_ != Format_NC4HW4) { out_tensor->format_ = input_tensor->format_; @@ -123,9 +126,9 @@ int Conv2dInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC * if (input_tensor->shape_size_ == 0) { return NNACL_INFER_INVALID; } - int input_h = in_shape[1]; - int input_w = in_shape[2]; - int input_c = in_shape[3]; + int input_h = in_shape[DIMENSION_1D]; + int input_w = in_shape[DIMENSION_2D]; + int input_c = in_shape[DIMENSION_3D]; int output_w = 0, output_h = 0; int ret = CheckConvAttr(input_c, weight_tensor, param); @@ -141,19 +144,19 @@ int Conv2dInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC * int out_shape[MAX_SHAPE_SIZE]; size_t out_shape_size = 0; ShapeSet(out_shape, &out_shape_size, input_tensor->shape_, input_tensor->shape_size_); - out_shape[1] = output_h >= 0 ? output_h : 1; - out_shape[2] = output_w >= 0 ? output_w : 1; - out_shape[3] = GetBatch(weight_tensor); + out_shape[DIMENSION_1D] = output_h >= 0 ? output_h : 1; + out_shape[DIMENSION_2D] = output_w >= 0 ? output_w : 1; + out_shape[DIMENSION_3D] = GetBatch(weight_tensor); SetShapeArray(out_tensor, out_shape, out_shape_size); - param->input_batch_ = in_shape[0]; - param->input_h_ = in_shape[1]; - param->input_w_ = in_shape[2]; - param->input_channel_ = in_shape[3]; - param->output_batch_ = out_shape[0]; - param->output_h_ = out_shape[1]; - param->output_w_ = out_shape[2]; - param->output_channel_ = out_shape[3]; + param->input_batch_ = in_shape[DIMENSION_0D]; + param->input_h_ = in_shape[DIMENSION_1D]; + param->input_w_ = in_shape[DIMENSION_2D]; + param->input_channel_ = in_shape[DIMENSION_3D]; + param->output_batch_ = out_shape[DIMENSION_0D]; + param->output_h_ = out_shape[DIMENSION_1D]; + param->output_w_ = out_shape[DIMENSION_2D]; + param->output_channel_ = out_shape[DIMENSION_3D]; return NNACL_OK; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ms_simd_neon_instructions.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ms_simd_neon_instructions.h index f24ad892b83..47029ab9c53 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ms_simd_neon_instructions.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/intrinsics/ms_simd_neon_instructions.h @@ -134,13 +134,23 @@ static inline MS_FLOAT32X4 MS_TANHX4_F32(MS_FLOAT32X4 src) { static const MS_FLOAT32X4 data5 = {62370.0f, 62370.0f, 62370.0f, 62370.0f}; static const MS_FLOAT32X4 neg = {-1.0f, -1.0f, -1.0f, -1.0f}; static const MS_FLOAT32X4 pos = {1.0f, 1.0f, 1.0f, 1.0f}; + static const MS_FLOAT32X4 up_limit = {5.0f, 5.0f, 5.0f, 5.0f}; + static const MS_FLOAT32X4 down_limit = {-5.0f, -5.0f, -5.0f, -5.0f}; + + MS_UINT32X4 up_mask = MS_CMPGTQ_F32(src, up_limit); + MS_UINT32X4 down_mask = MS_CMPGTQ_F32(down_limit, src); + MS_FLOAT32X4 square = MS_MULQ_F32(src, src); MS_FLOAT32X4 a = MS_MULQ_F32( MS_ADDQ_F32(MS_MULQ_F32(MS_ADDQ_F32(MS_MULQ_F32(MS_ADDQ_F32(square, data0), square), data1), square), data2), src); MS_FLOAT32X4 b = MS_ADDQ_F32( MS_MULQ_F32(MS_ADDQ_F32(MS_MULQ_F32(MS_ADDQ_F32(MS_MULQ_F32(data3, square), data4), square), data5), square), data2); - return MS_MINQ_F32(MS_MAXQ_F32(MS_DIVQ_F32(a, b), neg), pos); + + MS_FLOAT32X4 tanh_value = MS_DIVQ_F32(a, b); + MS_FLOAT32X4 res = MS_BLENDQ_F32(tanh_value, pos, up_mask); + res = MS_BLENDQ_F32(res, neg, down_mask); + return res; } static inline MS_FLOAT32X4 MS_ERFX4_F32(MS_FLOAT32X4 src) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/tensor_array_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/tensor_array_parameter.h index cfe5b670934..0b71e28e582 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/tensor_array_parameter.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/tensor_array_parameter.h @@ -21,7 +21,7 @@ typedef struct TensorArrayParameter { OpParameter op_parameter_; bool dynamic_size_; bool identical_element_shapes_; - int *element_shape_; + int element_shape_[MAX_SHAPE_SIZE]; int element_shape_size_; int data_type_; } TensorArrayParameter; diff --git a/mindspore/lite/src/common/prim_util.cc b/mindspore/lite/src/common/prim_util.cc index 2a7d8e1fd02..bf10bc91ebd 100644 --- a/mindspore/lite/src/common/prim_util.cc +++ b/mindspore/lite/src/common/prim_util.cc @@ -15,6 +15,7 @@ */ #include "src/common/prim_util.h" +#include #include "nnacl/op_base.h" #include "schema/model_generated.h" #include "src/common/log_adapter.h" @@ -25,6 +26,17 @@ namespace mindspore { namespace lite { +static std::set tensor_list_ops = { + schema::PrimitiveType_TensorListFromTensor, schema::PrimitiveType_TensorListGetItem, + schema::PrimitiveType_TensorListReserve, schema::PrimitiveType_TensorListSetItem, + schema::PrimitiveType_TensorListStack}; +#ifdef ENABLE_V0 +static std::set v0_tensor_list_ops = { + schema::v0::PrimitiveType_TensorListFromTensor, schema::v0::PrimitiveType_TensorListGetItem, + schema::v0::PrimitiveType_TensorListReserve, schema::v0::PrimitiveType_TensorListSetItem, + schema::v0::PrimitiveType_TensorListStack}; +#endif + int GetPrimitiveType(const void *primitive, int schema_version) { if (primitive == nullptr) { return -1; @@ -101,6 +113,25 @@ bool IsCustomNode(const void *primitive, int schema_version) { return false; } +bool IsTensorListNode(const void *primitive, int schema_version) { + MS_CHECK_TRUE_MSG(primitive != nullptr, false, "primtive cannot be nullptr"); + if (schema_version == SCHEMA_CUR) { + if (tensor_list_ops.find(reinterpret_cast(primitive)->value_type()) != + tensor_list_ops.end()) { + return true; + } + } +#ifdef ENABLE_V0 + if (schema_version == SCHEMA_V0) { + if (v0_tensor_list_ops.find(reinterpret_cast(primitive)->value_type()) != + v0_tensor_list_ops.end()) { + return true; + } + } +#endif + return false; +} + int GetPartialGraphIndex(const void *primitive, int schema_version) { MS_CHECK_TRUE_MSG(primitive != nullptr, -1, "primtive cannot be nullptr"); int index = -1; diff --git a/mindspore/lite/src/common/prim_util.h b/mindspore/lite/src/common/prim_util.h index f67edf55776..c97ec5641a1 100644 --- a/mindspore/lite/src/common/prim_util.h +++ b/mindspore/lite/src/common/prim_util.h @@ -28,7 +28,7 @@ bool IsCallNode(const void *node, int schema_version); bool IsSwitchNode(const void *node, int schema_version); bool IsSwitchLayerNode(const void *node, int schema_version); bool IsCustomNode(const void *primitive, int schema_version); -bool IsCastNode(const void *primitive, int schema_version); +bool IsTensorListNode(const void *primitive, int schema_version); int GetPartialGraphIndex(const void *primitive, int schema_version); } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/lite_model.cc b/mindspore/lite/src/lite_model.cc index eccca908275..83fa0114d38 100644 --- a/mindspore/lite/src/lite_model.cc +++ b/mindspore/lite/src/lite_model.cc @@ -241,6 +241,20 @@ int LiteModel::NodeVerify() const { return RET_ERROR; } } + if ((!IsTensorListNode(node->primitive_, schema_version_)) && (!IsPartialNode(node->primitive_, schema_version_))) { + if (std::any_of(node->input_indices_.begin(), node->input_indices_.end(), [this](const uint32_t &idx) { + return TypeId(this->all_tensors_[idx]->dataType()) == kObjectTypeTensorType; + })) { + MS_LOG(ERROR) << "node input tensor type can't be object type, node name: " << node->name_; + return RET_ERROR; + } + if (std::any_of(node->output_indices_.begin(), node->output_indices_.end(), [this](const uint32_t &idx) { + return TypeId(this->all_tensors_[idx]->dataType()) == kObjectTypeTensorType; + })) { + MS_LOG(ERROR) << "node output tensor type can't be object type, node name: " << node->name_; + return RET_ERROR; + } + } } return RET_OK; } diff --git a/mindspore/lite/src/ops/populate/control/tensor_array_populate.cc b/mindspore/lite/src/ops/populate/control/tensor_array_populate.cc index 4cee72fa93f..034dfde5d00 100644 --- a/mindspore/lite/src/ops/populate/control/tensor_array_populate.cc +++ b/mindspore/lite/src/ops/populate/control/tensor_array_populate.cc @@ -45,12 +45,7 @@ OpParameter *PopulateTensorArrayParameter(const void *prim) { std::vector primitive_element_shape(value->element_shape()->begin(), value->element_shape()->end()); param->element_shape_size_ = static_cast(primitive_element_shape.size()); auto size = sizeof(int) * param->element_shape_size_; - param->element_shape_ = static_cast(malloc(size)); - if (param->element_shape_ == nullptr) { - MS_LOG(ERROR) << "malloc element_shape failed!"; - free(param); - return nullptr; - } + MS_CHECK_LE(size, MAX_SHAPE_SIZE, nullptr); memset(param->element_shape_, 0, size); memcpy(param->element_shape_, primitive_element_shape.data(), size); param->data_type_ = value->data_type(); diff --git a/mindspore/lite/src/runtime/kernel/arm/control/tensor_array.cc b/mindspore/lite/src/runtime/kernel/arm/control/tensor_array.cc index 52245792779..188325dac1c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/control/tensor_array.cc +++ b/mindspore/lite/src/runtime/kernel/arm/control/tensor_array.cc @@ -142,8 +142,4 @@ int TensorArrayWriteCPUKernel::Run() { lite::Tensor::CopyTensorData(*value, TensorArrayBaseCPUKernel::handle_); return RET_OK; } - -REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_TensorArray, LiteKernelCreator) -REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_TensorArrayRead, LiteKernelCreator) -REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_TensorArrayWrite, LiteKernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/test/config/models_onnx_fp16.cfg b/mindspore/lite/test/config/models_onnx_fp16.cfg index ca480ac60e2..3fb078fe514 100644 --- a/mindspore/lite/test/config/models_onnx_fp16.cfg +++ b/mindspore/lite/test/config/models_onnx_fp16.cfg @@ -122,6 +122,6 @@ ml_video_edit_shot_selection_face_emotion.onnx 0.7 ml_video_edit_face_edit_face3d.onnx 2 ml_video_edit_face_edit_retinaface.onnx;1;1,120,128,3 2.5 rvm_mobilenetv3_192.onnx;6 5 -bert_span.onnx;3 2 +bert_span.onnx;3 3.5 ml_video_edit_dimming_tech_model_studio_20.onnx;2 6.5 ml_audio_edit_rhythm_check_model.onnx;1:input;1,1024,81,1 1