add_gpu_runner_benchmark_ST

This commit is contained in:
yefeng 2023-01-04 11:53:11 +08:00
parent 378b787bf1
commit 2e06a4c9b7
3 changed files with 29 additions and 6 deletions

View File

@ -4,3 +4,11 @@ resnet18_batch_size1.onnx;1:input_node;1,6,224,224
screenshot_direction.pb;1:input_1;1,64,512,3
screenshot_angle.pb;1:input_images;1,1024,1024,3 3.0
screenshot_ocr_220613_batch32_textlen24.pb;1:img_data;1,32,256,3
#For ModelParallelRunner API
yolox.onnx;1:input;1,3,640,640;;parallel_predict
screenshot_text_location_220613_100.onnx;1:input_images;1,3,1024,1024;;parallel_predict
resnet18_batch_size1.onnx;1:input_node;1,6,224,224;;parallel_predict
screenshot_direction.pb;1:input_1;1,64,512,3;;parallel_predict
screenshot_angle.pb;1:input_images;1,1024,1024,3;;parallel_predict 3.0
screenshot_ocr_220613_batch32_textlen24.pb;1:img_data;1,32,256,3;;parallel_predict
#end ModelParallelRunner API

View File

@ -67,4 +67,8 @@ hiai_dress_detect.pb;1:data;1,960,960,3;;parallel_predict
hiai_cn_recognize_modify_padv2.pb;1:input_0;1,32,512,1;;parallel_predict
inception_v4.pb;1:input;1,299,299,3;;parallel_predict
mobilenet_v1_0.25_128_frozen.pb;1:input;1,128,128,3;;parallel_predict
on_v4.pb;1:input;1,299,299,3;;parallel_predict
inception_v3.pb;1:input;1,299,299,3;;parallel_predict
hiai_label_and_video.pb;1:input_0;1,224,224,3;;parallel_predict
hiai_model_0909_kd_rot_ps_softmax.pb;1:input_0;1,224,224,3;;parallel_predict
screenshot_angle.pb;1:input_images;1,1024,1024,3;;parallel_predict 3.0
ml_object_detect.pb;1:input/input_data;1,288,288,3;;parallel_predict

View File

@ -71,7 +71,10 @@ function Run_TensorRT() {
echo "Skip ${model_name} ......"
continue
fi
use_parallel_predict="false"
if [[ ${mode} == "parallel_predict" ]]; then
use_parallel_predict="true"
fi
echo "Benchmarking ${model_name} ......"
model_file=${ms_models_path}'/'${model_name}'.mindir'
input_files=""
@ -99,13 +102,21 @@ function Run_TensorRT() {
if [[ ${mode} == "fp16" ]]; then
enableFp16="true"
fi
echo 'CUDA_VISIBLE_DEVICES='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --device=GPU'
CUDA_VISIBLE_DEVICES=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --device=GPU
echo 'CUDA_VISIBLE_DEVICES='${cuda_device_id}' ./benchmark --enableParallelPredict='${use_parallel_predict}' --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --device=GPU'
CUDA_VISIBLE_DEVICES=${cuda_device_id} ./benchmark --enableParallelPredict=${use_parallel_predict} --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --device=GPU
if [ $? = 0 ]; then
run_result='TensorRT: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
if [[ ${mode} == "parallel_predict" ]]; then
run_result='TensorRT: '${model_name}' parallel_pass'; echo ${run_result} >> ${run_benchmark_result_file}
else
run_result='TensorRT: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
fi
else
run_result='TensorRT: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
if [[ ${mode} == "parallel_predict" ]]; then
run_result='TensorRT: '${model_name}' parallel_failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
else
run_result='TensorRT: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
fi
fi
done < ${cfg_file}