!33556 [MSLITE] fix tensorrt server inference ci

Merge pull request !33556 from Liu_Xuu/trt_0425_ci
2022-05-11 03:16:17 +00:00 · 2022-05-11 03:16:17 +00:00 · f2ba3ca2e8
parent a0a9c942c8 1cf5c307b4
commit f2ba3ca2e8
5 changed files with 42 additions and 16 deletions
--- a/mindspore/lite/src/delegate/tensorrt/op/lstm_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/lstm_tensorrt.cc
@ -33,6 +33,11 @@ int LSTMTensorRT::IsSupport(const schema::Primitive *primitive, const std::vecto
    MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
    return RET_ERROR;
  }
+  mindspore::MSTensor &hidden_in_init = in_tensors_[HIDDEN_IN_TENSOR_INIT];
+  hidden_init_name_ = hidden_in_init.Name() + "_hidden_init";
+  mindspore::MSTensor &cell_in_init = in_tensors_[CELL_IN_TENSOR_INIT];
+  cell_init_name_ = cell_in_init.Name() + "_cell_init";
+
  dynamic_shape_params_.support_dynamic_ = false;
  dynamic_shape_params_.support_hw_dynamic_ = false;
  return RET_OK;
@ -108,11 +113,10 @@ int LSTMTensorRT::PreProcess() {
 }

 int LSTMTensorRT::AddLSTMLayers() {
-  nvinfer1::ITensor *data_out{nullptr};
  mindspore::MSTensor &hidden_in_init = in_tensors_[HIDDEN_IN_TENSOR_INIT];
-  hidden_init_name_ = hidden_in_init.Name() + "_hidden_init";
  mindspore::MSTensor &cell_in_init = in_tensors_[CELL_IN_TENSOR_INIT];
-  cell_init_name_ = cell_in_init.Name() + "_cell_init";
+
+  nvinfer1::ITensor *data_out{nullptr};
  nvinfer1::ITensor *hidden_init = network_->addInput(
    hidden_init_name_.c_str(), nvinfer1::DataType::kFLOAT,
    nvinfer1::Dims3(params_.layer_count_ * params_.directional_cnt_, params_.batch_size_, params_.hidden_size_));
--- a/mindspore/lite/test/config_level0/models_server_inference_tensorrt.cfg
+++ b/mindspore/lite/test/config_level0/models_server_inference_tensorrt.cfg
@ -1,5 +1,4 @@
 gender_resnet34_lzl.onnx;1:input.1;1,300,64,1;
-gender_lstm_vad.onnx;1:input.1;1,198,64;;offline_resize
 ml_video_edit_person_divison_video;2:graph_input-0,graph_input-1;1,512,512,3:1,512,512,1;
 ml_audio_kit_vocals_unet_spectrum.onnx;1:waveform;1,512,1024,4;
 ml_video_edit_vignet.onnx;1:input;1,256,256,6;
--- a/mindspore/lite/test/config_level0/models_tensorrt.cfg
+++ b/mindspore/lite/test/config_level0/models_tensorrt.cfg
@ -1,6 +1,8 @@
 # model_info                                   accuracy_limit
 # model_name;input_num:input_name;input_shapes;spec_threads;mode      accuracy_limit/CONVERTER

+gender_lstm_vad.onnx;1:input.1;1,198,64;;offline_resize
+
 # Run in distribution server
 # wide_and_deep_.mindir CONVERTER
 # wide_and_deep_1.mindir CONVERTER
--- a/mindspore/lite/test/st/scripts/run_benchmark_server_inference_tensorrt.sh
+++ b/mindspore/lite/test/st/scripts/run_benchmark_server_inference_tensorrt.sh
@ -95,10 +95,8 @@ function Run_TensorRT() {
            if [[ ${mode} == "fp16" ]]; then
                enableFp16="true"
            fi
-
-            # different tensorrt run mode use different cuda command
-            echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --device=GPU --enableParallelPredict=true' >> "${run_benchmark_result_file}"
-            CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --device=GPU --enableParallelPredict=true >> ${run_benchmark_result_file}
+            echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --enableParallelPredict=true --device=GPU'
+            CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --enableParallelPredict=true --device=GPU

            if [ $? = 0 ]; then
                run_result='TensorRT: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
@ -110,6 +108,28 @@ function Run_TensorRT() {
    done
 }

+# Print start msg before run testcase
+function MS_PRINT_TESTCASE_START_MSG() {
+    echo ""
+    echo -e "-------------------------------------------------------------------------------------------------------------------------"
+    echo -e "env                    Testcase                                                                                 Result   "
+    echo -e "---                    --------                                                                                 ------   "
+}
+
+# Print start msg after run testcase
+function MS_PRINT_TESTCASE_END_MSG() {
+    echo -e "-------------------------------------------------------------------------------------------------------------------------"
+}
+
+function Print_Benchmark_Result() {
+    MS_PRINT_TESTCASE_START_MSG
+    while read line; do
+        arr=("${line}")
+        printf "%-20s %-90s %-7s\n" ${arr[0]} ${arr[1]} ${arr[2]}
+    done < $1
+    MS_PRINT_TESTCASE_END_MSG
+}
+
 # Example:sh run_benchmark_gpu.sh -r /home/temp_test -m /home/temp_test/models -d "8KE5T19620002408" -e arm_cpu
 while getopts "r:m:d:e:l:" opt; do
    case ${opt} in
@ -142,6 +162,7 @@ done

 run_fail_not_return="OFF"
 basepath=$(pwd)
+echo "NVIDIA TensorRT, bashpath is ${basepath}"
 x86_path=${release_path}/centos_x86  # ../release_pkg/lite
 tensorrt_path=${x86_path}/server/tensorrt/cuda-11.1

@ -154,13 +175,15 @@ fi
 IFS="-" read -r -a file_name_array <<< "$file_name"
 version=${file_name_array[2]}
 cd ${basepath}
+rm -rf ./*

 # Set models config filepath
 config_folder="config_level0"
 if [[ ${level} == "level1" ]]; then
    config_folder="config_level1"
 fi
-models_server_inference_config=${basepath}/../${config_folder}/models_server_inference_tensorrt.cfg
+cp ${basepath}/../${config_folder}/models_server_inference_tensorrt.cfg ./
+models_server_inference_config=${basepath}/models_server_inference_tensorrt.cfg

 ms_models_path=${basepath}/ms_models

@ -183,11 +206,9 @@ Print_Converter_Result $run_converter_result_file

 if [[ ${Run_converter_status} = 0 ]];then
    echo "Run converter success"
-    Print_Converter_Result $run_converter_result_file
 else
    echo "Run converter failed"
    cat ${run_converter_log_file}
-    Print_Converter_Result $run_converter_result_file
    exit 1
 fi
 # Empty config file is allowed, but warning message will be shown
@ -197,11 +218,10 @@ if [[ $(Exist_File_In_Path ${ms_models_path} ".ms") != "true" ]]; then
 fi

 # Write benchmark result to temp file
+export GLOG_logtostderr=0
 run_benchmark_result_file=${basepath}/run_benchmark_result.txt
 echo ' ' > ${run_benchmark_result_file}

-echo 'run server inference x86 logs: ' > ${run_benchmark_result_file}
-
 # Copy the MindSpore models:
 echo "Push files and run benchmark"
 benchmark_test_path=${basepath}/benchmark_test
@ -220,7 +240,6 @@ fi
 if [[ $backend == "all" || $backend == "server_inference_x86_gpu" ]]; then
    if [[ ${Run_x86_status} != 0 ]];then
        echo "run x86 server inference failed"
-        cat ${run_benchmark_result_file}
        isFailed=1
    fi
 fi
--- a/mindspore/lite/test/st/scripts/run_benchmark_tensorrt.sh
+++ b/mindspore/lite/test/st/scripts/run_benchmark_tensorrt.sh
@ -145,6 +145,7 @@ function Run_TensorRT() {
                done
            fi
            output_file=${data_path}'output/'${model_name}'.ms.out'
+            config_file_path=${data_path}'input/'${model_name}'.config'

            # set accuracy limitation
            acc_limit="0.5"
@ -168,8 +169,9 @@ function Run_TensorRT() {
            fi

            # different tensorrt run mode use different cuda command
-            echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --cosineDistanceThreshold=${cosine_limit} --device=GPU' >> "${run_tensorrt_log_file}"
-            CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --cosineDistanceThreshold=${cosine_limit} --device=GPU >> ${run_tensorrt_log_file}
+            echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --configFile='${config_file_path}' --cosineDistanceThreshold=${cosine_limit} --device=GPU' >> "${run_tensorrt_log_file}"
+            CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --configFile=${config_file_path} --cosineDistanceThreshold=${cosine_limit} --device=GPU >> ${run_tensorrt_log_file}
+            CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --configFile=${config_file_path} --cosineDistanceThreshold=${cosine_limit} --device=GPU >> ${run_tensorrt_log_file}

            if [ $? = 0 ]; then
                run_result='TensorRT: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}