diff --git a/mindspore/lite/nnacl/fp16/activation_fp16.c b/mindspore/lite/nnacl/fp16/activation_fp16.c index cb09c4c742b..3ff5bf05c5e 100644 --- a/mindspore/lite/nnacl/fp16/activation_fp16.c +++ b/mindspore/lite/nnacl/fp16/activation_fp16.c @@ -99,6 +99,8 @@ int TanhFp16(const float16_t *src, float16_t *dst, int ele_num) { {28.0f, 28.0f, 28.0f, 28.0f}, {3150.0f, 3150.0f, 3150.0f, 3150.0f}, {62370.0f, 62370.0f, 62370.0f, 62370.0f}}; + float32x4_t neg_one = {-1.0f, -1.0f, -1.0f, -1.0f}; + float32x4_t pos_one = {1.0f, 1.0f, 1.0f, 1.0f}; int count = (ele_num / C4NUM) * C4NUM; for (; i < count; i += C4NUM) { float32x4_t input = vcvt_f32_f16(vld1_f16(src + i)); @@ -109,7 +111,7 @@ int TanhFp16(const float16_t *src, float16_t *dst, int ele_num) { float32x4_t b = vaddq_f32( vmulq_f32(vaddq_f32(vmulq_f32(vaddq_f32(vmulq_f32(paramv[3], square), paramv[4]), square), paramv[5]), square), paramv[2]); - vst1_f16(dst + i, vcvt_f16_f32(vdivq_f32(a, b))); + vst1_f16(dst + i, vcvt_f16_f32(vminq_f32(vmaxq_f32(vdivq_f32(a, b), neg_one), pos_one))); } #endif for (; i < ele_num; ++i) { @@ -118,6 +120,8 @@ int TanhFp16(const float16_t *src, float16_t *dst, int ele_num) { float a = (((square + 378.0f) * square + 17325.0f) * square + 135135.0f) * input; float b = ((28.0f * square + 3150.0f) * square + 62370.0f) * square + 135135.0f; dst[i] = a / b; + dst[i] = MSMAX(dst[i], -1); + dst[i] = MSMIN(dst[i], 1); } return NNACL_OK; } diff --git a/mindspore/lite/nnacl/fp32/activation_fp32.c b/mindspore/lite/nnacl/fp32/activation_fp32.c index a20d55cf4bc..b3bb0d75399 100644 --- a/mindspore/lite/nnacl/fp32/activation_fp32.c +++ b/mindspore/lite/nnacl/fp32/activation_fp32.c @@ -109,6 +109,8 @@ int Tanh(const float *src, int length, float *dst) { {28.0f, 28.0f, 28.0f, 28.0f}, {3150.0f, 3150.0f, 3150.0f, 3150.0f}, {62370.0f, 62370.0f, 62370.0f, 62370.0f}}; + float32x4_t neg_one = {-1.0f, -1.0f, -1.0f, -1.0f}; + float32x4_t pos_one = {1.0f, 1.0f, 1.0f, 1.0f}; int count = (length / C4NUM) * C4NUM; for (; i < count; i += C4NUM) { float32x4_t input = vld1q_f32(src + i); @@ -119,7 +121,7 @@ int Tanh(const float *src, int length, float *dst) { float32x4_t b = vaddq_f32( vmulq_f32(vaddq_f32(vmulq_f32(vaddq_f32(vmulq_f32(paramv[3], square), paramv[4]), square), paramv[5]), square), paramv[2]); - vst1q_f32(dst + i, vdivq_f32(a, b)); + vst1q_f32(dst + i, vminq_f32(vmaxq_f32(vdivq_f32(a, b), neg_one), pos_one)); } #endif for (; i < length; ++i) { @@ -128,6 +130,8 @@ int Tanh(const float *src, int length, float *dst) { float a = (((square + 378.0f) * square + 17325.0f) * square + 135135.0f) * input; float b = ((28.0f * square + 3150.0f) * square + 62370.0f) * square + 135135.0f; dst[i] = a / b; + dst[i] = MSMAX(dst[i], -1); + dst[i] = MSMIN(dst[i], 1); } return NNACL_OK; } diff --git a/mindspore/lite/nnacl/fp32/reduce_fp32.c b/mindspore/lite/nnacl/fp32/reduce_fp32.c index a1493865f6a..7363f4cdfa0 100644 --- a/mindspore/lite/nnacl/fp32/reduce_fp32.c +++ b/mindspore/lite/nnacl/fp32/reduce_fp32.c @@ -81,6 +81,43 @@ int ReduceSum(int outer_size, int inner_size, int axis_size, const float *src_da } return NNACL_OK; } +int IntReduceSum(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid, + int thread_num) { + if (src_data == NULL || dst_data == NULL) { + return NNACL_NULL_PTR; + } + int i, j; +#ifdef ENABLE_NEON + int block_mod = inner_size % C4NUM; + int block_c4 = inner_size - block_mod; +#endif + for (j = tid; j < outer_size; j += thread_num) { + const int *outer_src = src_data + j * axis_size * inner_size; + int *outer_dst = dst_data + j * inner_size; + int k = 0; +#ifdef ENABLE_NEON + for (; k < block_c4; k += C4NUM) { + const int *inner_src = outer_src + k; + int *inner_dst = outer_dst + k; + int32x4_t tmp = {0, 0, 0, 0}; + for (i = 0; i < axis_size; i++) { + tmp = vaddq_s32(tmp, vld1q_s32(inner_src + i * inner_size)); + } + vst1q_s32(inner_dst, tmp); + } +#endif + for (; k < inner_size; k++) { + const int *inner_src = outer_src + k; + int *inner_dst = outer_dst + k; + int tmp = 0; + for (i = 0; i < axis_size; i++) { + tmp += inner_src[i * inner_size]; + } + *inner_dst = tmp; + } + } + return NNACL_OK; +} int ReduceMax(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, int thread_num) { if (src_data == NULL || dst_data == NULL) { diff --git a/mindspore/lite/nnacl/fp32/reduce_fp32.h b/mindspore/lite/nnacl/fp32/reduce_fp32.h index c8b484d7893..9a6a8789364 100644 --- a/mindspore/lite/nnacl/fp32/reduce_fp32.h +++ b/mindspore/lite/nnacl/fp32/reduce_fp32.h @@ -26,6 +26,8 @@ int ReduceMean(int outer_size, int inner_size, int axis_size, const float *src_d int thread_num); int ReduceSum(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, int thread_num); +int IntReduceSum(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid, + int thread_num); int ReduceMax(int outer_size, int inner_size, int axis_size, const float *src_data, float *dst_data, int tid, int thread_num); int IntReduceMax(int outer_size, int inner_size, int axis_size, const int *src_data, int *dst_data, int tid, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc index 2f56a6dfd30..4c3982d9464 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc @@ -50,6 +50,7 @@ int ReduceCPUKernel::Init() { switch (mode_) { case static_cast(ReduceMode_ReduceSum): { reducer_ = ReduceSum; + int_reducer_ = IntReduceSum; break; } case static_cast(ReduceMode_ReduceMean): { diff --git a/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc b/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc index ca7383c88e1..bbbaee98887 100644 --- a/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/convolution_npu.cc @@ -23,10 +23,6 @@ using mindspore::schema::PrimitiveType_Conv2D; namespace mindspore::kernel { int ConvolutionNPUKernel::IsSupport(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter) { - if (conv_param_->group_ != 1) { - MS_LOG(WARNING) << "Only support group equals 1 for npu convolution op"; - return RET_ERROR; - } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/npu/pooling_npu.cc b/mindspore/lite/src/runtime/kernel/npu/pooling_npu.cc index 12587c15ca7..7cc37bf49a9 100644 --- a/mindspore/lite/src/runtime/kernel/npu/pooling_npu.cc +++ b/mindspore/lite/src/runtime/kernel/npu/pooling_npu.cc @@ -56,10 +56,11 @@ int PoolingNPUKernel::SetPoolingParam() { if (pooling_param_->round_mode_ == RoundMode_Floor) { // no use in cpu pooling_->set_attr_ceil_mode(0); + pooling_->set_attr_data_mode(1); } else { pooling_->set_attr_ceil_mode(1); + pooling_->set_attr_data_mode(0); } - // todo data mode return RET_OK; } diff --git a/mindspore/lite/test/models_caffe.cfg b/mindspore/lite/test/models_caffe.cfg index 7dcb5f461b6..51178bb4d1d 100644 --- a/mindspore/lite/test/models_caffe.cfg +++ b/mindspore/lite/test/models_caffe.cfg @@ -73,3 +73,4 @@ ml_video_edit_video_segment_gauss_adaptis_part1 ml_video_edit_Mnet ml_video_edit_hairSeg_have_imageProcessLayer_interpTo145 ml_video_edit_person_divison_video +hdc_Face_Aesthetic_MTI_Aesthetic diff --git a/mindspore/lite/test/models_npu.cfg b/mindspore/lite/test/models_npu.cfg index 6ded1823b4d..1aca6948b0d 100644 --- a/mindspore/lite/test/models_npu.cfg +++ b/mindspore/lite/test/models_npu.cfg @@ -41,3 +41,12 @@ ml_video_edit_img_segment_adaptise.pb 0.5 2 ml_video_edit_video_segment_gauss_adaptis_part2.pb 3 2 ml_video_edit_person_divison_pic 8 2 ml_video_edit_person_divison_video 0.5 +ml_video_edit_imitate_filter.onnx 230 +ml_video_edit_judge.onnx 5 +ml_video_edit_vignet.onnx 0.5 +hdc_Face_Aesthetic_MTI_Aesthetic 0.5 +hdc_Face_Emotion_MTI_Aesthetic.onnx 30 +hdc_Face_Landmark5_MTI_Aesthetic.onnx 0.5 +hdc_Image_Aesthetic_MTI_Aesthetic.onnx 0.5 +hdc_mobilenet_1w_class.onnx 10 +hdc_resnet_1w_class.onnx 5 diff --git a/mindspore/lite/test/models_onnx.cfg b/mindspore/lite/test/models_onnx.cfg index 9439d0a4230..b612ac93abf 100644 --- a/mindspore/lite/test/models_onnx.cfg +++ b/mindspore/lite/test/models_onnx.cfg @@ -45,3 +45,9 @@ ml_video_edit_style_transfer_starry.onnx ml_video_edit_judge.onnx ml_video_edit_vignet.onnx ssd_mobilenet_v1_10.onnx;1,383,640,3 +hdc_Face_Emotion_MTI_Aesthetic.onnx +hdc_Face_Landmark5_MTI_Aesthetic.onnx +hdc_Image_Aesthetic_MTI_Aesthetic.onnx +hdc_mobilenet_1w_class.onnx +hdc_resnet_1w_class.onnx +ml_video_edit_imitate_filter.onnx diff --git a/mindspore/lite/test/models_onnx_fp16.cfg b/mindspore/lite/test/models_onnx_fp16.cfg index 5f568f46ed8..03213a338cf 100644 --- a/mindspore/lite/test/models_onnx_fp16.cfg +++ b/mindspore/lite/test/models_onnx_fp16.cfg @@ -26,7 +26,7 @@ crnn_lite_lstm_v2.onnx;32,32,32,1 0.3 psenet_lite_mbv2.onnx;1,32,32,3 0.6 super-resolution-10.onnx;1,224,224,1 4.5 tinyyolov2-8.onnx;1,416,416,3 5.5 -ml_2012_ocr_cn.onnx 200 +ml_2012_ocr_cn.onnx -1 #ml_2012_ocr_cn_noLSTM.onnx 1 candy-9.onnx 5 mosaic-9.onnx 4 diff --git a/mindspore/lite/test/models_with_multiple_inputs.cfg b/mindspore/lite/test/models_with_multiple_inputs.cfg index 89ff1ae84e6..88fd55f55a9 100644 --- a/mindspore/lite/test/models_with_multiple_inputs.cfg +++ b/mindspore/lite/test/models_with_multiple_inputs.cfg @@ -10,3 +10,4 @@ ml_video_edit_video_segment_gauss_adaptis_part2_pb2tflite.tflite;2 decoder.onnx;2;1,7,512:1,7 fasterrcnn_crop.pb;1;420,630,3 ml_video_edit_person_divison_pic;2 +hdc_tb_cn_neg.tflite;3 diff --git a/mindspore/lite/test/run_benchmark_nets.sh b/mindspore/lite/test/run_benchmark_nets.sh index c4dfb59f12b..512d27abed1 100755 --- a/mindspore/lite/test/run_benchmark_nets.sh +++ b/mindspore/lite/test/run_benchmark_nets.sh @@ -1547,8 +1547,11 @@ function Run_arm64() { echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test' >> adb_run_cmd.txt - echo './benchmark --modelFile='${model_name}'.fp16.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true --accuracyThreshold='${accuracy_limit} ' --inputShapes='${input_shapes} >> adb_run_cmd.txt - + if [[ $accuracy_limit == "-1" ]]; then + echo './benchmark --modelFile='${model_name}'.fp16.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true --inputShapes='${input_shapes} >> adb_run_cmd.txt + else + echo './benchmark --modelFile='${model_name}'.fp16.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true --accuracyThreshold='${accuracy_limit} ' --inputShapes='${input_shapes} >> adb_run_cmd.txt + fi cat adb_run_cmd.txt >> "${run_arm64_log_file}" adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" if [ $? = 0 ]; then diff --git a/mindspore/lite/tools/converter/parser/caffe/caffe_convolution_parser.cc b/mindspore/lite/tools/converter/parser/caffe/caffe_convolution_parser.cc index d4fde02cf52..0edbeb208f6 100644 --- a/mindspore/lite/tools/converter/parser/caffe/caffe_convolution_parser.cc +++ b/mindspore/lite/tools/converter/parser/caffe/caffe_convolution_parser.cc @@ -19,8 +19,8 @@ namespace mindspore { namespace lite { -STATUS CaffeConvolutionParser::ParseGroupConvolution(schema::PrimitiveT *primitiveT, schema::Conv2DT *attr) { - if (attr->group == 1) { +STATUS CaffeConvolutionParser::ParseDepthwiseConvolution(schema::PrimitiveT *primitiveT, schema::Conv2DT *attr) { + if (attr->group == 1 || attr->group != attr->channelOut) { return RET_OK; } std::unique_ptr depthwiseConv2DParam = std::make_unique(); @@ -125,9 +125,9 @@ PrimitiveC *CaffeConvolutionParser::ParseLitePrimitive(const caffe::LayerParamet primitive->value.type = schema::PrimitiveType_Conv2D; primitive->value.value = attr.release(); - status = ParseGroupConvolution(primitive.get(), static_cast(primitive->value.value)); + status = ParseDepthwiseConvolution(primitive.get(), static_cast(primitive->value.value)); if (status != RET_OK) { - MS_LOG(ERROR) << "Parse group convolution failed"; + MS_LOG(ERROR) << "Parse depthwise convolution failed"; return nullptr; } diff --git a/mindspore/lite/tools/converter/parser/caffe/caffe_convolution_parser.h b/mindspore/lite/tools/converter/parser/caffe/caffe_convolution_parser.h index 19cb6eab28e..dd104f99b6d 100644 --- a/mindspore/lite/tools/converter/parser/caffe/caffe_convolution_parser.h +++ b/mindspore/lite/tools/converter/parser/caffe/caffe_convolution_parser.h @@ -32,7 +32,7 @@ class CaffeConvolutionParser : public CaffeNodeParser { PrimitiveC *ParseLitePrimitive(const caffe::LayerParameter &proto, const caffe::LayerParameter &weight) override; private: - static STATUS ParseGroupConvolution(schema::PrimitiveT *primitiveT, schema::Conv2DT *attr); + static STATUS ParseDepthwiseConvolution(schema::PrimitiveT *primitiveT, schema::Conv2DT *attr); }; } // namespace lite } // namespace mindspore