diff --git a/mindspore/lite/src/delegate/tensorrt/op/allgather_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/allgather_tensorrt.cc index cb14bb9f43a..0b2ed7b0380 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/allgather_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/allgather_tensorrt.cc @@ -57,8 +57,8 @@ int AllGatherTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { return RET_ERROR; } int rank = GetGPUGroupSize(); - auto plugin = std::make_shared(op_name_, rank); + MS_LOG(INFO) << op_name_ << " group size: " << rank << ", rank id: " << GetRankID(); nvinfer1::IPluginV2Layer *allgather_layer = network->addPluginV2(inputTensors, 1, *plugin); if (allgather_layer == nullptr) { MS_LOG(ERROR) << "create AllGather layer failed for: " << op_name_; diff --git a/mindspore/lite/src/delegate/tensorrt/op/reducescatter_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/reducescatter_tensorrt.cc index 9148ce8c0f6..4a9f5b51b7f 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/reducescatter_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/reducescatter_tensorrt.cc @@ -59,10 +59,9 @@ int ReduceScatterTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { return RET_ERROR; } auto reduce_mode = reduce_op->mode(); - auto rank = GetGPUGroupSize(); - auto plugin = std::make_shared(op_name_, reduce_mode, rank); + MS_LOG(INFO) << op_name_ << " group size: " << rank << ", rank id: " << GetRankID(); nvinfer1::IPluginV2Layer *reduce_scatter_layer = network->addPluginV2(inputTensors, 1, *plugin); if (reduce_scatter_layer == nullptr) { MS_LOG(ERROR) << "create ReduceScatter layer failed for: " << op_name_; diff --git a/mindspore/lite/test/config/models_tensorrt.cfg b/mindspore/lite/test/config/models_tensorrt.cfg index 417d9dbc169..a4c655bc69f 100644 --- a/mindspore/lite/test/config/models_tensorrt.cfg +++ b/mindspore/lite/test/config/models_tensorrt.cfg @@ -1,10 +1,30 @@ # model_info accuracy_limit -# model_name;input_info;input_shapes;mode accuracy_limit/CONVERTER +# model_name;input_num:input_name;input_shapes;spec_threads;mode accuracy_limit/CONVERTER # Run in one cuda server gender_resnet34_lzl.onnx +gender_lstm_vad.onnx;1:input.1;1,198,64;;offline_resize +gender_lstm_scd.onnx;1:input.1;1,198,64;;offline_resize ml_video_edit_person_divison_pic ml_video_edit_person_divison_video;2 +ml_audio_kit_vocals_unet_spectrum.onnx +ml_video_edit_vignet.onnx +ml_video_edit_reid +ml_video_edit_judge.onnx +ml_video_edit_imitate_filter.onnx +ml_video_edit_hair_dyeing_segmodel_20211119 +ml_video_edit_detect_20211111 +detect_curve2.pb +detect_straight.pb;1:input;1,19200,960,3;;offline_resize +direction.pb;1:input;1,2048,2048,1;;offline_resize +languageClassify.pb;1:input_0;2,32,512,1;;offline_resize +languageClassify_latin.pb;1:data;2,48,1,50;;offline_resize +recognize_chineseEnglish.pb;1:input_0;1,2048,2048,1;;offline_resize +recognize_chineseEnglish_vertical.pb;1:input_0;1,2048,2048,1;;offline_resize +recognize_JapaneseKorean.pb;1:input_0;1,2048,2048,1;;offline_resize +recognize_latin.pb;1:input_0;1,2048,2048,1;;offline_resize +textremoval_v5_nofill.pb;3:input_images,input_masks,ones_image;1,1024,1024,3:1,1024,1024,1:1,1024,1024,1;;offline_resize + # Run in distribution server wide_and_deep_.mindir CONVERTER wide_and_deep_1.mindir CONVERTER diff --git a/mindspore/lite/test/st/scripts/base_functions.sh b/mindspore/lite/test/st/scripts/base_functions.sh index b8ccd95d803..14babfc5ee0 100644 --- a/mindspore/lite/test/st/scripts/base_functions.sh +++ b/mindspore/lite/test/st/scripts/base_functions.sh @@ -74,8 +74,8 @@ function Convert() { if [[ ${input_num} == "" ]]; then input_num=1 fi - LFS="," read -r -a name_array <<< ${input_names} - LFS=":" read -r -a shape_array <<< ${input_shapes} + IFS="," read -r -a name_array <<< ${input_names} + IFS=":" read -r -a shape_array <<< ${input_shapes} for i in $(seq 0 $((${input_num}-1))) do spec_shapes=${spec_shapes}${name_array[$i]}':'${shape_array[$i]}';' diff --git a/mindspore/lite/test/st/scripts/tensorrt/run_benchmark_tensorrt.sh b/mindspore/lite/test/st/scripts/tensorrt/run_benchmark_tensorrt.sh index 3a59b456c0b..ea57a1394a6 100644 --- a/mindspore/lite/test/st/scripts/tensorrt/run_benchmark_tensorrt.sh +++ b/mindspore/lite/test/st/scripts/tensorrt/run_benchmark_tensorrt.sh @@ -64,7 +64,7 @@ function Run_TensorRT() { source /etc/profile local line_info model_info spec_acc_limit model_name input_num input_shapes \ mode model_file input_files output_file data_path acc_limit enableFp16 \ - run_result + run_result config_file_path while read line; do line_info=${line} @@ -80,7 +80,7 @@ function Run_TensorRT() { model_name=`echo ${model_info} | awk -F ';' '{print $1}'` input_info=`echo ${model_info} | awk -F ';' '{print $2}'` input_shapes=`echo ${model_info} | awk -F ';' '{print $3}'` - mode=`echo ${model_info} | awk -F ';' '{print $3}'` + mode=`echo ${model_info} | awk -F ';' '{print $5}'` input_num=`echo ${input_info} | sed 's/:/;/' | awk -F ';' '{print $1}'` if [[ ${model_name##*.} == "caffemodel" ]]; then model_name=${model_name%.*} @@ -106,6 +106,7 @@ function Run_TensorRT() { done fi output_file=${data_path}'output/'${model_name}'.ms.out' + config_file_path=${data_path}'input/'${model_name}'.config' # set accuracy limitation acc_limit="0.5" @@ -119,10 +120,14 @@ function Run_TensorRT() { if [[ ${mode} == "fp16" ]]; then enableFp16="true" fi + if [[ ${mode} == "offline_resize" ]]; then + input_shapes="" + fi # different tensorrt run mode use different cuda command - echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --device=GPU' >> "${run_tensorrt_log_file}" - CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --device=GPU >> ${run_tensorrt_log_file} + echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --configFile='${config_file_path}' --device=GPU' >> "${run_tensorrt_log_file}" + CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --configFile=${config_file_path} --device=GPU >> ${run_tensorrt_log_file} + CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --configFile=${config_file_path} --device=GPU >> ${run_tensorrt_log_file} if [ $? = 0 ]; then run_result='TensorRT: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} @@ -207,6 +212,7 @@ run_tensorrt_mpirun_log_file=${basepath}/run_tensorrt_mpirun_log.txt echo 'run tensorrt mpirun logs: ' > ${run_tensorrt_mpirun_log_file} echo "Running in tensorrt with mpirun" +export GLOG_v=1 Run_TensorRT_Mpirun & Run_TensorRT_Mpirun_PID=$! sleep 1