!33556 [MSLITE] fix tensorrt server inference ci
Merge pull request !33556 from Liu_Xuu/trt_0425_ci
This commit is contained in:
commit
f2ba3ca2e8
|
@ -33,6 +33,11 @@ int LSTMTensorRT::IsSupport(const schema::Primitive *primitive, const std::vecto
|
|||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
mindspore::MSTensor &hidden_in_init = in_tensors_[HIDDEN_IN_TENSOR_INIT];
|
||||
hidden_init_name_ = hidden_in_init.Name() + "_hidden_init";
|
||||
mindspore::MSTensor &cell_in_init = in_tensors_[CELL_IN_TENSOR_INIT];
|
||||
cell_init_name_ = cell_in_init.Name() + "_cell_init";
|
||||
|
||||
dynamic_shape_params_.support_dynamic_ = false;
|
||||
dynamic_shape_params_.support_hw_dynamic_ = false;
|
||||
return RET_OK;
|
||||
|
@ -108,11 +113,10 @@ int LSTMTensorRT::PreProcess() {
|
|||
}
|
||||
|
||||
int LSTMTensorRT::AddLSTMLayers() {
|
||||
nvinfer1::ITensor *data_out{nullptr};
|
||||
mindspore::MSTensor &hidden_in_init = in_tensors_[HIDDEN_IN_TENSOR_INIT];
|
||||
hidden_init_name_ = hidden_in_init.Name() + "_hidden_init";
|
||||
mindspore::MSTensor &cell_in_init = in_tensors_[CELL_IN_TENSOR_INIT];
|
||||
cell_init_name_ = cell_in_init.Name() + "_cell_init";
|
||||
|
||||
nvinfer1::ITensor *data_out{nullptr};
|
||||
nvinfer1::ITensor *hidden_init = network_->addInput(
|
||||
hidden_init_name_.c_str(), nvinfer1::DataType::kFLOAT,
|
||||
nvinfer1::Dims3(params_.layer_count_ * params_.directional_cnt_, params_.batch_size_, params_.hidden_size_));
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
gender_resnet34_lzl.onnx;1:input.1;1,300,64,1;
|
||||
gender_lstm_vad.onnx;1:input.1;1,198,64;;offline_resize
|
||||
ml_video_edit_person_divison_video;2:graph_input-0,graph_input-1;1,512,512,3:1,512,512,1;
|
||||
ml_audio_kit_vocals_unet_spectrum.onnx;1:waveform;1,512,1024,4;
|
||||
ml_video_edit_vignet.onnx;1:input;1,256,256,6;
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
# model_info accuracy_limit
|
||||
# model_name;input_num:input_name;input_shapes;spec_threads;mode accuracy_limit/CONVERTER
|
||||
|
||||
gender_lstm_vad.onnx;1:input.1;1,198,64;;offline_resize
|
||||
|
||||
# Run in distribution server
|
||||
# wide_and_deep_.mindir CONVERTER
|
||||
# wide_and_deep_1.mindir CONVERTER
|
||||
|
|
|
@ -95,10 +95,8 @@ function Run_TensorRT() {
|
|||
if [[ ${mode} == "fp16" ]]; then
|
||||
enableFp16="true"
|
||||
fi
|
||||
|
||||
# different tensorrt run mode use different cuda command
|
||||
echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --device=GPU --enableParallelPredict=true' >> "${run_benchmark_result_file}"
|
||||
CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --device=GPU --enableParallelPredict=true >> ${run_benchmark_result_file}
|
||||
echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --enableParallelPredict=true --device=GPU'
|
||||
CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --enableParallelPredict=true --device=GPU
|
||||
|
||||
if [ $? = 0 ]; then
|
||||
run_result='TensorRT: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
|
@ -110,6 +108,28 @@ function Run_TensorRT() {
|
|||
done
|
||||
}
|
||||
|
||||
# Print start msg before run testcase
|
||||
function MS_PRINT_TESTCASE_START_MSG() {
|
||||
echo ""
|
||||
echo -e "-------------------------------------------------------------------------------------------------------------------------"
|
||||
echo -e "env Testcase Result "
|
||||
echo -e "--- -------- ------ "
|
||||
}
|
||||
|
||||
# Print start msg after run testcase
|
||||
function MS_PRINT_TESTCASE_END_MSG() {
|
||||
echo -e "-------------------------------------------------------------------------------------------------------------------------"
|
||||
}
|
||||
|
||||
function Print_Benchmark_Result() {
|
||||
MS_PRINT_TESTCASE_START_MSG
|
||||
while read line; do
|
||||
arr=("${line}")
|
||||
printf "%-20s %-90s %-7s\n" ${arr[0]} ${arr[1]} ${arr[2]}
|
||||
done < $1
|
||||
MS_PRINT_TESTCASE_END_MSG
|
||||
}
|
||||
|
||||
# Example:sh run_benchmark_gpu.sh -r /home/temp_test -m /home/temp_test/models -d "8KE5T19620002408" -e arm_cpu
|
||||
while getopts "r:m:d:e:l:" opt; do
|
||||
case ${opt} in
|
||||
|
@ -142,6 +162,7 @@ done
|
|||
|
||||
run_fail_not_return="OFF"
|
||||
basepath=$(pwd)
|
||||
echo "NVIDIA TensorRT, bashpath is ${basepath}"
|
||||
x86_path=${release_path}/centos_x86 # ../release_pkg/lite
|
||||
tensorrt_path=${x86_path}/server/tensorrt/cuda-11.1
|
||||
|
||||
|
@ -154,13 +175,15 @@ fi
|
|||
IFS="-" read -r -a file_name_array <<< "$file_name"
|
||||
version=${file_name_array[2]}
|
||||
cd ${basepath}
|
||||
rm -rf ./*
|
||||
|
||||
# Set models config filepath
|
||||
config_folder="config_level0"
|
||||
if [[ ${level} == "level1" ]]; then
|
||||
config_folder="config_level1"
|
||||
fi
|
||||
models_server_inference_config=${basepath}/../${config_folder}/models_server_inference_tensorrt.cfg
|
||||
cp ${basepath}/../${config_folder}/models_server_inference_tensorrt.cfg ./
|
||||
models_server_inference_config=${basepath}/models_server_inference_tensorrt.cfg
|
||||
|
||||
ms_models_path=${basepath}/ms_models
|
||||
|
||||
|
@ -183,11 +206,9 @@ Print_Converter_Result $run_converter_result_file
|
|||
|
||||
if [[ ${Run_converter_status} = 0 ]];then
|
||||
echo "Run converter success"
|
||||
Print_Converter_Result $run_converter_result_file
|
||||
else
|
||||
echo "Run converter failed"
|
||||
cat ${run_converter_log_file}
|
||||
Print_Converter_Result $run_converter_result_file
|
||||
exit 1
|
||||
fi
|
||||
# Empty config file is allowed, but warning message will be shown
|
||||
|
@ -197,11 +218,10 @@ if [[ $(Exist_File_In_Path ${ms_models_path} ".ms") != "true" ]]; then
|
|||
fi
|
||||
|
||||
# Write benchmark result to temp file
|
||||
export GLOG_logtostderr=0
|
||||
run_benchmark_result_file=${basepath}/run_benchmark_result.txt
|
||||
echo ' ' > ${run_benchmark_result_file}
|
||||
|
||||
echo 'run server inference x86 logs: ' > ${run_benchmark_result_file}
|
||||
|
||||
# Copy the MindSpore models:
|
||||
echo "Push files and run benchmark"
|
||||
benchmark_test_path=${basepath}/benchmark_test
|
||||
|
@ -220,7 +240,6 @@ fi
|
|||
if [[ $backend == "all" || $backend == "server_inference_x86_gpu" ]]; then
|
||||
if [[ ${Run_x86_status} != 0 ]];then
|
||||
echo "run x86 server inference failed"
|
||||
cat ${run_benchmark_result_file}
|
||||
isFailed=1
|
||||
fi
|
||||
fi
|
||||
|
|
|
@ -145,6 +145,7 @@ function Run_TensorRT() {
|
|||
done
|
||||
fi
|
||||
output_file=${data_path}'output/'${model_name}'.ms.out'
|
||||
config_file_path=${data_path}'input/'${model_name}'.config'
|
||||
|
||||
# set accuracy limitation
|
||||
acc_limit="0.5"
|
||||
|
@ -168,8 +169,9 @@ function Run_TensorRT() {
|
|||
fi
|
||||
|
||||
# different tensorrt run mode use different cuda command
|
||||
echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --cosineDistanceThreshold=${cosine_limit} --device=GPU' >> "${run_tensorrt_log_file}"
|
||||
CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --cosineDistanceThreshold=${cosine_limit} --device=GPU >> ${run_tensorrt_log_file}
|
||||
echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --configFile='${config_file_path}' --cosineDistanceThreshold=${cosine_limit} --device=GPU' >> "${run_tensorrt_log_file}"
|
||||
CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --configFile=${config_file_path} --cosineDistanceThreshold=${cosine_limit} --device=GPU >> ${run_tensorrt_log_file}
|
||||
CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --configFile=${config_file_path} --cosineDistanceThreshold=${cosine_limit} --device=GPU >> ${run_tensorrt_log_file}
|
||||
|
||||
if [ $? = 0 ]; then
|
||||
run_result='TensorRT: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
|
|
Loading…
Reference in New Issue