forked from mindspore-Ecosystem/mindspore
!7684 [MSLITE] fp16 run test with settable accuracy
Merge pull request !7684 from ling/bug
This commit is contained in:
commit
52717bba31
|
@ -1,24 +1,11 @@
|
|||
|
||||
.text
|
||||
.align 5
|
||||
//.p2align 5,,15
|
||||
.global PostFuncBiasReluC4
|
||||
#ifndef __APPLE__
|
||||
.type PostFuncBiasReluC4, %function
|
||||
#endif
|
||||
|
||||
//void PostFuncBiasReluC4(float *dst, const float *src, const float *bias, size_t oc4div, size_t oc4mod,
|
||||
// size_t plane_size, size_t plane_stride, size_t relu_type);
|
||||
// r0 dst r1 srx r2 bias
|
||||
// r3 oc4div r4 oc4mod r5 plane_size
|
||||
// r6 plane_stride r7 relu_type
|
||||
|
||||
// v0 ~ v15 value
|
||||
// v16 v17 bias data
|
||||
// r10 r11 weite loop tmp buf
|
||||
// r16 relu6 #6; r17 relu #0
|
||||
// lr oc8 loop control
|
||||
// r8 hw loop control
|
||||
.text
|
||||
.align 5
|
||||
//.p2align 5,,15
|
||||
.global PostFuncBiasReluC4
|
||||
#ifndef __APPLE__
|
||||
.type PostFuncBiasReluC4, %function
|
||||
#endif
|
||||
|
||||
PostFuncBiasReluC4:
|
||||
push {r4-r8, r10, r11, lr}
|
||||
|
@ -220,7 +207,7 @@ Loop_C1_3_Relu6:
|
|||
vadd.f32 q0, q0, q12
|
||||
vmin.f32 q0, q0, q14
|
||||
vmax.f32 q0, q0, q15
|
||||
vst1.32 {d0}, [r0], r6
|
||||
vst1.32 {d0}, [r0], r12
|
||||
vst1.32 {d1[0]}, [r11], r12
|
||||
b Loop_C1_3_Relu6
|
||||
Loop_C1_3_Relu:
|
||||
|
@ -230,7 +217,7 @@ Loop_C1_3_Relu:
|
|||
vld1.32 {q0}, [r1]!
|
||||
vadd.f32 q0, q0, q12
|
||||
vmax.f32 q0, q0, q15
|
||||
vst1.32 {d0}, [r0], r6
|
||||
vst1.32 {d0}, [r0], r12
|
||||
vst1.32 {d1[0]}, [r11], r12
|
||||
b Loop_C1_3_Relu
|
||||
Loop_C1_3_Write:
|
||||
|
@ -239,7 +226,7 @@ Loop_C1_3_Write:
|
|||
sub r8, r8, #1
|
||||
vld1.32 {q0}, [r1]!
|
||||
vadd.f32 q0, q0, q12
|
||||
vst1.32 {d0}, [r0], r6
|
||||
vst1.32 {d0}, [r0], r12
|
||||
vst1.32 {d1[0]}, [r11], r12
|
||||
b Loop_C1_3_Write
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ mnet
|
|||
ml_face_landmark
|
||||
ml_liveness_detect_landmark
|
||||
deconv_test_model
|
||||
deconvs_model
|
||||
# aware_training
|
||||
video_infer.tflite
|
||||
mobilenet_v1_1.0_224_quant.tflite
|
||||
|
|
|
@ -57,6 +57,8 @@ detect-mbv1-shortcut-400-400_nopostprocess_simplified
|
|||
detect_mbv1_640_480_nopostprocess_simplified
|
||||
retinaface
|
||||
deconv_test_model
|
||||
deconvs_model
|
||||
HWSR-s_256_256
|
||||
age_new
|
||||
detection_retinaface_fix
|
||||
landmark
|
||||
|
|
|
@ -1,10 +1,34 @@
|
|||
detect-deeper-halfdeeper-mbv1-shortcut-400-400_nopostprocess_simplified.fp16
|
||||
model_emotions_0727_nosoftmax.tflite.fp16
|
||||
mtk_isface.fp16
|
||||
mtk_landmark.fp16
|
||||
mtk_pose_tuku.fp16
|
||||
mtk_age_gender.tflite.fp16
|
||||
mtk_model_face_dress.tflite.fp16
|
||||
ml_face_contour.fp16
|
||||
ml_face_landmark.fp16
|
||||
retinaface.fp16
|
||||
hiai_cpu_face_detect 4
|
||||
#hiai_cpu_face_attr 5000
|
||||
hiai_cpu_face_hat 0.3
|
||||
hiai_face_detect_rfb 4
|
||||
hiai_face_isface 0.1
|
||||
hiai_face_landmark 0.2
|
||||
hiai_face_pose_tuku 1.3
|
||||
ml_face_contour 0.5
|
||||
ml_face_landmark 1
|
||||
mtk_isface 0.2
|
||||
mtk_landmark 0.3
|
||||
mtk_pose_tuku 1
|
||||
mtk_age_gender.tflite 0.1
|
||||
mtk_model_face_dress.tflite 1
|
||||
mtk_model_ckpt.tflite 20
|
||||
mtk_face_features_v1.tflite 20
|
||||
mtk_new_detect.tflite 3
|
||||
mtk_pose.tflite 2
|
||||
mtk_model_emotions_0727_nosoftmax.tflite 2
|
||||
#mtk_model_normalize_object_scene_ps_20200519_f32.tflite 20
|
||||
mtk_model_normalize_object_scene_ps_20200826_f32_no_softmax.tflite 22
|
||||
mtk_276landmark_0913.tflite 16
|
||||
mtk_face_recognition.tflite 8
|
||||
mtk_convert_model.tflite 5
|
||||
mobilenet_v1_0.5_128.tflite 2.5
|
||||
mobilenet_v1_1.0_192.tflite 26
|
||||
mobilenet_v2_1.0_224.tflite 3
|
||||
retinaface 6
|
||||
deconvs_model 1
|
||||
efficientnet_lite4_fp32_2.tflite 6
|
||||
mnasnet_0.50_224_1_metadata_1.tflite 5
|
||||
detect-deeper-halfdeeper-mbv1-shortcut-400-400_nopostprocess_simplified 1
|
||||
HWSR-s_256_256 10
|
||||
#model_name accuracy_limit
|
||||
|
|
|
@ -132,10 +132,11 @@ function Run_Converter() {
|
|||
|
||||
# Copy fp16 ms models:
|
||||
while read line; do
|
||||
model_name=${line%.*}
|
||||
if [[ $model_name == \#* ]]; then
|
||||
continue
|
||||
fp16_line_info=${line}
|
||||
if [[ $fp16_line_info == \#* ]]; then
|
||||
continue
|
||||
fi
|
||||
model_name=`echo ${fp16_line_info}|awk -F ' ' '{print $1}'`
|
||||
echo 'cp '${ms_models_path}'/'${model_name}'.ms' ${ms_models_path}'/'${model_name}'.fp16.ms'
|
||||
cp ${ms_models_path}/${model_name}.ms ${ms_models_path}/${model_name}.fp16.ms
|
||||
if [ $? = 0 ]; then
|
||||
|
@ -511,25 +512,32 @@ function Run_arm64() {
|
|||
|
||||
# Run fp16 converted models:
|
||||
while read line; do
|
||||
model_name=${line}
|
||||
if [[ $model_name == \#* ]]; then
|
||||
fp16_line_info=${line}
|
||||
if [[ $fp16_line_info == \#* ]]; then
|
||||
continue
|
||||
fi
|
||||
echo ${model_name} >> "${run_arm64_log_file}"
|
||||
model_name=`echo ${fp16_line_info}|awk -F ' ' '{print $1}'`
|
||||
accuracy_limit=`echo ${fp16_line_info}|awk -F ' ' '{print $2}'`
|
||||
echo "---------------------------------------------------------" >> "${run_arm64_log_file}"
|
||||
echo "fp16 run: ${model_name}, accuracy limit:${accuracy_limit}" >> "${run_arm64_log_file}"
|
||||
|
||||
echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true --accuracyThreshold=6' >> "${run_arm64_log_file}"
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true --accuracyThreshold=6' >> adb_run_cmd.txt
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test' >> adb_run_cmd.txt
|
||||
echo './benchmark --modelFile='${model_name}'.fp16.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true --accuracyThreshold='${accuracy_limit} >> adb_run_cmd.txt
|
||||
|
||||
cat adb_run_cmd.txt >> "${run_arm64_log_file}"
|
||||
adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='arm64_fp16: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='arm64_fp16: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||
fi
|
||||
|
||||
# run benchmark test without clib data
|
||||
echo ${model_name} >> "${run_arm64_log_file}"
|
||||
echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --enableFp16=true' >> "${run_arm64_log_file}"
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --enableFp16=true' >> adb_run_cmd.txt
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test' >> adb_run_cmd.txt
|
||||
echo './benchmark --modelFile='${model_name}'.fp16.ms --warmUpLoopCount=1 --loopCount=2 --enableFp16=true' >> adb_run_cmd.txt
|
||||
cat adb_run_cmd.txt >> "${run_arm64_log_file}"
|
||||
adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='arm64_fp16: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
|
|
Loading…
Reference in New Issue