!33556 [MSLITE] fix tensorrt server inference ci

Merge pull request !33556 from Liu_Xuu/trt_0425_ci
This commit is contained in:
i-robot 2022-05-11 03:16:17 +00:00 committed by Gitee
commit f2ba3ca2e8
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
5 changed files with 42 additions and 16 deletions

View File

@ -33,6 +33,11 @@ int LSTMTensorRT::IsSupport(const schema::Primitive *primitive, const std::vecto
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
mindspore::MSTensor &hidden_in_init = in_tensors_[HIDDEN_IN_TENSOR_INIT];
hidden_init_name_ = hidden_in_init.Name() + "_hidden_init";
mindspore::MSTensor &cell_in_init = in_tensors_[CELL_IN_TENSOR_INIT];
cell_init_name_ = cell_in_init.Name() + "_cell_init";
dynamic_shape_params_.support_dynamic_ = false;
dynamic_shape_params_.support_hw_dynamic_ = false;
return RET_OK;
@ -108,11 +113,10 @@ int LSTMTensorRT::PreProcess() {
}
int LSTMTensorRT::AddLSTMLayers() {
nvinfer1::ITensor *data_out{nullptr};
mindspore::MSTensor &hidden_in_init = in_tensors_[HIDDEN_IN_TENSOR_INIT];
hidden_init_name_ = hidden_in_init.Name() + "_hidden_init";
mindspore::MSTensor &cell_in_init = in_tensors_[CELL_IN_TENSOR_INIT];
cell_init_name_ = cell_in_init.Name() + "_cell_init";
nvinfer1::ITensor *data_out{nullptr};
nvinfer1::ITensor *hidden_init = network_->addInput(
hidden_init_name_.c_str(), nvinfer1::DataType::kFLOAT,
nvinfer1::Dims3(params_.layer_count_ * params_.directional_cnt_, params_.batch_size_, params_.hidden_size_));

View File

@ -1,5 +1,4 @@
gender_resnet34_lzl.onnx;1:input.1;1,300,64,1;
gender_lstm_vad.onnx;1:input.1;1,198,64;;offline_resize
ml_video_edit_person_divison_video;2:graph_input-0,graph_input-1;1,512,512,3:1,512,512,1;
ml_audio_kit_vocals_unet_spectrum.onnx;1:waveform;1,512,1024,4;
ml_video_edit_vignet.onnx;1:input;1,256,256,6;

View File

@ -1,6 +1,8 @@
# model_info accuracy_limit
# model_name;input_num:input_name;input_shapes;spec_threads;mode accuracy_limit/CONVERTER
gender_lstm_vad.onnx;1:input.1;1,198,64;;offline_resize
# Run in distribution server
# wide_and_deep_.mindir CONVERTER
# wide_and_deep_1.mindir CONVERTER

View File

@ -95,10 +95,8 @@ function Run_TensorRT() {
if [[ ${mode} == "fp16" ]]; then
enableFp16="true"
fi
# different tensorrt run mode use different cuda command
echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --device=GPU --enableParallelPredict=true' >> "${run_benchmark_result_file}"
CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --device=GPU --enableParallelPredict=true >> ${run_benchmark_result_file}
echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --enableParallelPredict=true --device=GPU'
CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --enableParallelPredict=true --device=GPU
if [ $? = 0 ]; then
run_result='TensorRT: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
@ -110,6 +108,28 @@ function Run_TensorRT() {
done
}
# Print start msg before run testcase
function MS_PRINT_TESTCASE_START_MSG() {
echo ""
echo -e "-------------------------------------------------------------------------------------------------------------------------"
echo -e "env Testcase Result "
echo -e "--- -------- ------ "
}
# Print start msg after run testcase
function MS_PRINT_TESTCASE_END_MSG() {
echo -e "-------------------------------------------------------------------------------------------------------------------------"
}
function Print_Benchmark_Result() {
MS_PRINT_TESTCASE_START_MSG
while read line; do
arr=("${line}")
printf "%-20s %-90s %-7s\n" ${arr[0]} ${arr[1]} ${arr[2]}
done < $1
MS_PRINT_TESTCASE_END_MSG
}
# Example:sh run_benchmark_gpu.sh -r /home/temp_test -m /home/temp_test/models -d "8KE5T19620002408" -e arm_cpu
while getopts "r:m:d:e:l:" opt; do
case ${opt} in
@ -142,6 +162,7 @@ done
run_fail_not_return="OFF"
basepath=$(pwd)
echo "NVIDIA TensorRT, bashpath is ${basepath}"
x86_path=${release_path}/centos_x86 # ../release_pkg/lite
tensorrt_path=${x86_path}/server/tensorrt/cuda-11.1
@ -154,13 +175,15 @@ fi
IFS="-" read -r -a file_name_array <<< "$file_name"
version=${file_name_array[2]}
cd ${basepath}
rm -rf ./*
# Set models config filepath
config_folder="config_level0"
if [[ ${level} == "level1" ]]; then
config_folder="config_level1"
fi
models_server_inference_config=${basepath}/../${config_folder}/models_server_inference_tensorrt.cfg
cp ${basepath}/../${config_folder}/models_server_inference_tensorrt.cfg ./
models_server_inference_config=${basepath}/models_server_inference_tensorrt.cfg
ms_models_path=${basepath}/ms_models
@ -183,11 +206,9 @@ Print_Converter_Result $run_converter_result_file
if [[ ${Run_converter_status} = 0 ]];then
echo "Run converter success"
Print_Converter_Result $run_converter_result_file
else
echo "Run converter failed"
cat ${run_converter_log_file}
Print_Converter_Result $run_converter_result_file
exit 1
fi
# Empty config file is allowed, but warning message will be shown
@ -197,11 +218,10 @@ if [[ $(Exist_File_In_Path ${ms_models_path} ".ms") != "true" ]]; then
fi
# Write benchmark result to temp file
export GLOG_logtostderr=0
run_benchmark_result_file=${basepath}/run_benchmark_result.txt
echo ' ' > ${run_benchmark_result_file}
echo 'run server inference x86 logs: ' > ${run_benchmark_result_file}
# Copy the MindSpore models:
echo "Push files and run benchmark"
benchmark_test_path=${basepath}/benchmark_test
@ -220,7 +240,6 @@ fi
if [[ $backend == "all" || $backend == "server_inference_x86_gpu" ]]; then
if [[ ${Run_x86_status} != 0 ]];then
echo "run x86 server inference failed"
cat ${run_benchmark_result_file}
isFailed=1
fi
fi

View File

@ -145,6 +145,7 @@ function Run_TensorRT() {
done
fi
output_file=${data_path}'output/'${model_name}'.ms.out'
config_file_path=${data_path}'input/'${model_name}'.config'
# set accuracy limitation
acc_limit="0.5"
@ -168,8 +169,9 @@ function Run_TensorRT() {
fi
# different tensorrt run mode use different cuda command
echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --cosineDistanceThreshold=${cosine_limit} --device=GPU' >> "${run_tensorrt_log_file}"
CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --cosineDistanceThreshold=${cosine_limit} --device=GPU >> ${run_tensorrt_log_file}
echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --configFile='${config_file_path}' --cosineDistanceThreshold=${cosine_limit} --device=GPU' >> "${run_tensorrt_log_file}"
CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --configFile=${config_file_path} --cosineDistanceThreshold=${cosine_limit} --device=GPU >> ${run_tensorrt_log_file}
CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --configFile=${config_file_path} --cosineDistanceThreshold=${cosine_limit} --device=GPU >> ${run_tensorrt_log_file}
if [ $? = 0 ]; then
run_result='TensorRT: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}