!7684 [MSLITE] fp16 run test with settable accuracy

Merge pull request !7684 from ling/bug
2020-10-27 15:22:38 +08:00 · 2020-10-27 15:22:38 +08:00 · 52717bba31
parent 897ce09346 72d261c488
commit 52717bba31
5 changed files with 66 additions and 44 deletions
--- a/mindspore/lite/nnacl/assembly/arm32/PostFuncBiasReluC4.S
+++ b/mindspore/lite/nnacl/assembly/arm32/PostFuncBiasReluC4.S
@ -1,24 +1,11 @@

-  .text
-  .align 5
-  //.p2align 5,,15
-  .global PostFuncBiasReluC4
-  #ifndef __APPLE__
-  .type PostFuncBiasReluC4, %function
-  #endif
-
-//void PostFuncBiasReluC4(float *dst, const float *src, const float *bias, size_t oc4div, size_t oc4mod,
-//                        size_t plane_size, size_t plane_stride, size_t relu_type);
-// r0 dst           r1 srx           r2 bias
-// r3 oc4div        r4 oc4mod        r5 plane_size
-// r6 plane_stride  r7 relu_type
-
-// v0 ~ v15 value
-// v16  v17 bias data
-// r10  r11  weite loop tmp buf
-// r16  relu6  #6;    r17 relu #0
-// lr  oc8 loop control
-// r8  hw  loop control
+.text
+.align 5
+//.p2align 5,,15
+.global PostFuncBiasReluC4
+#ifndef __APPLE__
+.type PostFuncBiasReluC4, %function
+#endif

 PostFuncBiasReluC4:
  push {r4-r8, r10, r11, lr}
@ -220,7 +207,7 @@ Loop_C1_3_Relu6:
  vadd.f32 q0, q0, q12
  vmin.f32 q0, q0, q14
  vmax.f32 q0, q0, q15
-  vst1.32 {d0}, [r0], r6
+  vst1.32 {d0}, [r0], r12
  vst1.32 {d1[0]}, [r11], r12
  b Loop_C1_3_Relu6
 Loop_C1_3_Relu:
@ -230,7 +217,7 @@ Loop_C1_3_Relu:
  vld1.32 {q0}, [r1]!
  vadd.f32 q0, q0, q12
  vmax.f32 q0, q0, q15
-  vst1.32 {d0}, [r0], r6
+  vst1.32 {d0}, [r0], r12
  vst1.32 {d1[0]}, [r11], r12
  b Loop_C1_3_Relu
 Loop_C1_3_Write:
@ -239,7 +226,7 @@ Loop_C1_3_Write:
  sub r8, r8, #1
  vld1.32 {q0}, [r1]!
  vadd.f32 q0, q0, q12
-  vst1.32 {d0}, [r0], r6
+  vst1.32 {d0}, [r0], r12
  vst1.32 {d1[0]}, [r11], r12
  b Loop_C1_3_Write

--- a/mindspore/lite/test/models_arm32.cfg
+++ b/mindspore/lite/test/models_arm32.cfg
@ -12,6 +12,7 @@ mnet
 ml_face_landmark
 ml_liveness_detect_landmark
 deconv_test_model
+deconvs_model
 # aware_training
 video_infer.tflite
 mobilenet_v1_1.0_224_quant.tflite
--- a/mindspore/lite/test/models_caffe.cfg
+++ b/mindspore/lite/test/models_caffe.cfg
@ -57,6 +57,8 @@ detect-mbv1-shortcut-400-400_nopostprocess_simplified
 detect_mbv1_640_480_nopostprocess_simplified
 retinaface
 deconv_test_model
+deconvs_model
+HWSR-s_256_256
 age_new
 detection_retinaface_fix
 landmark
--- a/mindspore/lite/test/models_fp16.cfg
+++ b/mindspore/lite/test/models_fp16.cfg
@ -1,10 +1,34 @@
-detect-deeper-halfdeeper-mbv1-shortcut-400-400_nopostprocess_simplified.fp16
-model_emotions_0727_nosoftmax.tflite.fp16
-mtk_isface.fp16
-mtk_landmark.fp16
-mtk_pose_tuku.fp16
-mtk_age_gender.tflite.fp16
-mtk_model_face_dress.tflite.fp16
-ml_face_contour.fp16
-ml_face_landmark.fp16
-retinaface.fp16
+hiai_cpu_face_detect 4
+#hiai_cpu_face_attr 5000
+hiai_cpu_face_hat 0.3
+hiai_face_detect_rfb 4
+hiai_face_isface 0.1
+hiai_face_landmark 0.2
+hiai_face_pose_tuku 1.3
+ml_face_contour 0.5
+ml_face_landmark 1
+mtk_isface 0.2
+mtk_landmark 0.3
+mtk_pose_tuku 1
+mtk_age_gender.tflite 0.1
+mtk_model_face_dress.tflite 1
+mtk_model_ckpt.tflite 20
+mtk_face_features_v1.tflite 20
+mtk_new_detect.tflite 3
+mtk_pose.tflite 2
+mtk_model_emotions_0727_nosoftmax.tflite 2
+#mtk_model_normalize_object_scene_ps_20200519_f32.tflite 20
+mtk_model_normalize_object_scene_ps_20200826_f32_no_softmax.tflite 22
+mtk_276landmark_0913.tflite 16
+mtk_face_recognition.tflite 8
+mtk_convert_model.tflite 5
+mobilenet_v1_0.5_128.tflite 2.5
+mobilenet_v1_1.0_192.tflite 26
+mobilenet_v2_1.0_224.tflite 3
+retinaface 6
+deconvs_model 1
+efficientnet_lite4_fp32_2.tflite 6
+mnasnet_0.50_224_1_metadata_1.tflite 5
+detect-deeper-halfdeeper-mbv1-shortcut-400-400_nopostprocess_simplified 1
+HWSR-s_256_256 10
+#model_name accuracy_limit
--- a/mindspore/lite/test/run_benchmark_nets.sh
+++ b/mindspore/lite/test/run_benchmark_nets.sh
@ -132,10 +132,11 @@ function Run_Converter() {

    # Copy fp16 ms models:
    while read line; do
-        model_name=${line%.*}
-        if [[ $model_name == \#* ]]; then
-            continue
+        fp16_line_info=${line}
+        if [[ $fp16_line_info == \#* ]]; then
+          continue
        fi
+        model_name=`echo ${fp16_line_info}|awk -F ' ' '{print $1}'`
        echo 'cp '${ms_models_path}'/'${model_name}'.ms' ${ms_models_path}'/'${model_name}'.fp16.ms'
        cp ${ms_models_path}/${model_name}.ms ${ms_models_path}/${model_name}.fp16.ms
        if [ $? = 0 ]; then
@ -511,25 +512,32 @@ function Run_arm64() {

    # Run fp16 converted models:
    while read line; do
-        model_name=${line}
-        if [[ $model_name == \#* ]]; then
+        fp16_line_info=${line}
+        if [[ $fp16_line_info == \#* ]]; then
          continue
        fi
-        echo ${model_name} >> "${run_arm64_log_file}"
+        model_name=`echo ${fp16_line_info}|awk -F ' ' '{print $1}'`
+        accuracy_limit=`echo ${fp16_line_info}|awk -F ' ' '{print $2}'`
+        echo "---------------------------------------------------------" >> "${run_arm64_log_file}"
+        echo "fp16 run: ${model_name}, accuracy limit:${accuracy_limit}" >> "${run_arm64_log_file}"
+
        echo 'cd  /data/local/tmp/benchmark_test' > adb_run_cmd.txt
-        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true --accuracyThreshold=6' >> "${run_arm64_log_file}"
-        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true --accuracyThreshold=6' >> adb_run_cmd.txt
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test' >> adb_run_cmd.txt
+        echo './benchmark --modelFile='${model_name}'.fp16.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true --accuracyThreshold='${accuracy_limit} >> adb_run_cmd.txt
+
+        cat adb_run_cmd.txt >> "${run_arm64_log_file}"
        adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}"
        if [ $? = 0 ]; then
            run_result='arm64_fp16: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
        else
            run_result='arm64_fp16: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
        fi
+
        # run benchmark test without clib data
-        echo ${model_name} >> "${run_arm64_log_file}"
        echo 'cd  /data/local/tmp/benchmark_test' > adb_run_cmd.txt
-        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --enableFp16=true' >> "${run_arm64_log_file}"
-        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --enableFp16=true' >> adb_run_cmd.txt
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test' >> adb_run_cmd.txt
+        echo './benchmark --modelFile='${model_name}'.fp16.ms --warmUpLoopCount=1 --loopCount=2 --enableFp16=true' >> adb_run_cmd.txt
+        cat adb_run_cmd.txt >> "${run_arm64_log_file}"
        adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}"
        if [ $? = 0 ]; then
            run_result='arm64_fp16: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}