forked from mindspore-Ecosystem/mindspore
!28419 [MSLITE] add more ci in tensorrt
Merge pull request !28419 from Liu_Xuu/trt_1230_ci
This commit is contained in:
commit
f26d6f3dda
|
@ -57,8 +57,8 @@ int AllGatherTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
int rank = GetGPUGroupSize();
|
||||
|
||||
auto plugin = std::make_shared<AllGatherPlugin>(op_name_, rank);
|
||||
MS_LOG(INFO) << op_name_ << " group size: " << rank << ", rank id: " << GetRankID();
|
||||
nvinfer1::IPluginV2Layer *allgather_layer = network->addPluginV2(inputTensors, 1, *plugin);
|
||||
if (allgather_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create AllGather layer failed for: " << op_name_;
|
||||
|
|
|
@ -59,10 +59,9 @@ int ReduceScatterTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
auto reduce_mode = reduce_op->mode();
|
||||
|
||||
auto rank = GetGPUGroupSize();
|
||||
|
||||
auto plugin = std::make_shared<ReduceScatterPlugin>(op_name_, reduce_mode, rank);
|
||||
MS_LOG(INFO) << op_name_ << " group size: " << rank << ", rank id: " << GetRankID();
|
||||
nvinfer1::IPluginV2Layer *reduce_scatter_layer = network->addPluginV2(inputTensors, 1, *plugin);
|
||||
if (reduce_scatter_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create ReduceScatter layer failed for: " << op_name_;
|
||||
|
|
|
@ -1,10 +1,30 @@
|
|||
# model_info accuracy_limit
|
||||
# model_name;input_info;input_shapes;mode accuracy_limit/CONVERTER
|
||||
# model_name;input_num:input_name;input_shapes;spec_threads;mode accuracy_limit/CONVERTER
|
||||
|
||||
# Run in one cuda server
|
||||
gender_resnet34_lzl.onnx
|
||||
gender_lstm_vad.onnx;1:input.1;1,198,64;;offline_resize
|
||||
gender_lstm_scd.onnx;1:input.1;1,198,64;;offline_resize
|
||||
ml_video_edit_person_divison_pic
|
||||
ml_video_edit_person_divison_video;2
|
||||
ml_audio_kit_vocals_unet_spectrum.onnx
|
||||
ml_video_edit_vignet.onnx
|
||||
ml_video_edit_reid
|
||||
ml_video_edit_judge.onnx
|
||||
ml_video_edit_imitate_filter.onnx
|
||||
ml_video_edit_hair_dyeing_segmodel_20211119
|
||||
ml_video_edit_detect_20211111
|
||||
detect_curve2.pb
|
||||
detect_straight.pb;1:input;1,19200,960,3;;offline_resize
|
||||
direction.pb;1:input;1,2048,2048,1;;offline_resize
|
||||
languageClassify.pb;1:input_0;2,32,512,1;;offline_resize
|
||||
languageClassify_latin.pb;1:data;2,48,1,50;;offline_resize
|
||||
recognize_chineseEnglish.pb;1:input_0;1,2048,2048,1;;offline_resize
|
||||
recognize_chineseEnglish_vertical.pb;1:input_0;1,2048,2048,1;;offline_resize
|
||||
recognize_JapaneseKorean.pb;1:input_0;1,2048,2048,1;;offline_resize
|
||||
recognize_latin.pb;1:input_0;1,2048,2048,1;;offline_resize
|
||||
textremoval_v5_nofill.pb;3:input_images,input_masks,ones_image;1,1024,1024,3:1,1024,1024,1:1,1024,1024,1;;offline_resize
|
||||
|
||||
# Run in distribution server
|
||||
wide_and_deep_.mindir CONVERTER
|
||||
wide_and_deep_1.mindir CONVERTER
|
||||
|
|
|
@ -74,8 +74,8 @@ function Convert() {
|
|||
if [[ ${input_num} == "" ]]; then
|
||||
input_num=1
|
||||
fi
|
||||
LFS="," read -r -a name_array <<< ${input_names}
|
||||
LFS=":" read -r -a shape_array <<< ${input_shapes}
|
||||
IFS="," read -r -a name_array <<< ${input_names}
|
||||
IFS=":" read -r -a shape_array <<< ${input_shapes}
|
||||
for i in $(seq 0 $((${input_num}-1)))
|
||||
do
|
||||
spec_shapes=${spec_shapes}${name_array[$i]}':'${shape_array[$i]}';'
|
||||
|
|
|
@ -64,7 +64,7 @@ function Run_TensorRT() {
|
|||
source /etc/profile
|
||||
local line_info model_info spec_acc_limit model_name input_num input_shapes \
|
||||
mode model_file input_files output_file data_path acc_limit enableFp16 \
|
||||
run_result
|
||||
run_result config_file_path
|
||||
|
||||
while read line; do
|
||||
line_info=${line}
|
||||
|
@ -80,7 +80,7 @@ function Run_TensorRT() {
|
|||
model_name=`echo ${model_info} | awk -F ';' '{print $1}'`
|
||||
input_info=`echo ${model_info} | awk -F ';' '{print $2}'`
|
||||
input_shapes=`echo ${model_info} | awk -F ';' '{print $3}'`
|
||||
mode=`echo ${model_info} | awk -F ';' '{print $3}'`
|
||||
mode=`echo ${model_info} | awk -F ';' '{print $5}'`
|
||||
input_num=`echo ${input_info} | sed 's/:/;/' | awk -F ';' '{print $1}'`
|
||||
if [[ ${model_name##*.} == "caffemodel" ]]; then
|
||||
model_name=${model_name%.*}
|
||||
|
@ -106,6 +106,7 @@ function Run_TensorRT() {
|
|||
done
|
||||
fi
|
||||
output_file=${data_path}'output/'${model_name}'.ms.out'
|
||||
config_file_path=${data_path}'input/'${model_name}'.config'
|
||||
|
||||
# set accuracy limitation
|
||||
acc_limit="0.5"
|
||||
|
@ -119,10 +120,14 @@ function Run_TensorRT() {
|
|||
if [[ ${mode} == "fp16" ]]; then
|
||||
enableFp16="true"
|
||||
fi
|
||||
if [[ ${mode} == "offline_resize" ]]; then
|
||||
input_shapes=""
|
||||
fi
|
||||
|
||||
# different tensorrt run mode use different cuda command
|
||||
echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --device=GPU' >> "${run_tensorrt_log_file}"
|
||||
CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --device=GPU >> ${run_tensorrt_log_file}
|
||||
echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --configFile='${config_file_path}' --device=GPU' >> "${run_tensorrt_log_file}"
|
||||
CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --configFile=${config_file_path} --device=GPU >> ${run_tensorrt_log_file}
|
||||
CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --configFile=${config_file_path} --device=GPU >> ${run_tensorrt_log_file}
|
||||
|
||||
if [ $? = 0 ]; then
|
||||
run_result='TensorRT: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
|
@ -207,6 +212,7 @@ run_tensorrt_mpirun_log_file=${basepath}/run_tensorrt_mpirun_log.txt
|
|||
echo 'run tensorrt mpirun logs: ' > ${run_tensorrt_mpirun_log_file}
|
||||
|
||||
echo "Running in tensorrt with mpirun"
|
||||
export GLOG_v=1
|
||||
Run_TensorRT_Mpirun &
|
||||
Run_TensorRT_Mpirun_PID=$!
|
||||
sleep 1
|
||||
|
|
Loading…
Reference in New Issue