!22534 solve deeptext centerface issues

Merge pull request !22534 from chenweitao_295/deeptext_centerface_gpu_issues
2021-08-30 01:15:53 +00:00 · 2021-08-30 01:15:53 +00:00 · b57d26fd9f
parent 12dd435584 3546987288
commit b57d26fd9f
6 changed files with 39 additions and 12 deletions
--- a/model_zoo/official/cv/centerface/scripts/train_standalone_gpu.sh
+++ b/model_zoo/official/cv/centerface/scripts/train_standalone_gpu.sh
@ -14,7 +14,7 @@
 # limitations under the License.
 # ============================================================================

-if [ $# != 1 ]
+if [ $# != 4 ]
 then
    echo "Usage: bash train_standalone_gpu.sh [USE_DEVICE_ID] [PRETRAINED_BACKBONE] [ANNOTATIONS] [DATASET]"
    exit 1
--- a/model_zoo/official/cv/deeptext/eval.py
+++ b/model_zoo/official/cv/deeptext/eval.py
@ -31,10 +31,9 @@ from model_utils.config import config
 from model_utils.moxing_adapter import moxing_wrapper
 from model_utils.device_adapter import get_device_id, get_device_num

-
 set_seed(1)

-context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=get_device_id())
+context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, device_id=get_device_id())


 def deeptext_eval_test(dataset_path='', ckpt_path=''):
@ -113,7 +112,7 @@ def deeptext_eval_test(dataset_path='', ckpt_path=''):
    print("\n========================================\n", flush=True)
    for i in range(config.num_classes - 1):
        j = i + 1
-        f1 = (2 *  precisions[j] * recalls[j]) / (precisions[j] + recalls[j] + 1e-6)
+        f1 = (2 * precisions[j] * recalls[j]) / (precisions[j] + recalls[j] + 1e-6)
        print("class {} precision is {:.2f}%, recall is {:.2f}%,"
              "F1 is {:.2f}%".format(j, precisions[j] * 100, recalls[j] * 100, f1 * 100), flush=True)
        if config.use_ambigous_sample:
@ -122,6 +121,7 @@ def deeptext_eval_test(dataset_path='', ckpt_path=''):

 def modelarts_pre_process():
    '''modelarts pre process function.'''
+
    def unzip(zip_file, save_dir):
        import zipfile
        s_time = time.time()
--- a/model_zoo/official/cv/deeptext/scripts/run_distribute_train_gpu.sh
+++ b/model_zoo/official/cv/deeptext/scripts/run_distribute_train_gpu.sh
@ -63,5 +63,12 @@ cp $PATH4 ../src/

 echo "======start training======"

-mpirun -n $RANK_SIZE python train.py --imgs_path=$PATH1 --annos_path=$PATH2 --run_distribute=True --device_target="GPU" --pre_trained=$PATH3 > log &
+mpirun --allow-run-as-root -n $RANK_SIZE --output-filename log_output --merge-stderr-to-stdout \
+nohup python train.py \
+  --imgs_path=$PATH1 \
+  --annos_path=$PATH2 \
+  --run_distribute=True \
+  --device_target="GPU" \
+  --export_device_target="GPU" \
+  --pre_trained=$PATH3 > log.txt 2>&1 &
 cd ..
--- a/model_zoo/official/cv/deeptext/scripts/run_eval_gpu.sh
+++ b/model_zoo/official/cv/deeptext/scripts/run_eval_gpu.sh
@ -31,6 +31,10 @@ PATH1=$(get_real_path $1)
 PATH2=$(get_real_path $2)
 PATH3=$(get_real_path $3)
 PATH4=$(get_real_path $4)
+DEVICE_ID=0
+if [ $# == 5 ]; then
+    DEVICE_ID=$5
+fi
 echo $PATH1
 echo $PATH2
 echo $PATH3
@ -66,5 +70,11 @@ cp -r ../model_utils ./eval
 cd ./eval || exit
 env > env.log
 echo "start eval for device $DEVICE_ID"
-CUDA_VISIBLE_DEVICE=$DEVICE_ID python eval.py --imgs_path=$PATH1 --annos_path=$PATH2 --checkpoint_path=$PATH3 &> log &
+export CUDA_VISIBLE_DEVICES=$DEVICE_ID
+python eval.py \
+  --device_target="GPU" \
+  --export_device_target="GPU" \
+  --imgs_path=$PATH1 \
+  --annos_path=$PATH2 \
+  --checkpoint_path=$PATH3 &> log &
 cd ..
--- a/model_zoo/official/cv/deeptext/scripts/run_standalone_train_gpu.sh
+++ b/model_zoo/official/cv/deeptext/scripts/run_standalone_train_gpu.sh
@ -36,6 +36,10 @@ PATH3=$(get_real_path $3)
 echo $PATH3
 PATH4=$(get_real_path $4)
 echo $PATH4
+DEVICE_ID=0
+if [ $# == 5 ]; then
+    DEVICE_ID=$5
+fi

 if [ ! -f $PATH3 ]
 then 
@ -56,11 +60,18 @@ cp ../*.yaml ./train
 cp *.sh ./train
 cp -r ../src ./train
 cp -r ../model_utils ./train
-cd ./train_parallel || exit
+cd ./train || exit

 export RANK_SIZE=1
 cp $PATH4 ../src/

 echo "======start training======"

-CUDA_VISIBLE_DEVICE=$DEVICE_ID python train.py --imgs_path=$PATH1 --annos_path=$PATH2 --run_distribute=False --device_target="GPU" --pre_trained=$PATH3 > log &
+export CUDA_VISIBLE_DEVICES=$DEVICE_ID
+python train.py \
+  --imgs_path=$PATH1 \
+  --annos_path=$PATH2 \
+  --run_distribute=False \
+  --device_target="GPU" \
+  --export_device_target="GPU" \
+  --pre_trained=$PATH3 > log.txt 2>&1 &
--- a/model_zoo/official/cv/deeptext/train.py
+++ b/model_zoo/official/cv/deeptext/train.py
@ -47,6 +47,7 @@ context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target,

 def modelarts_pre_process():
    '''modelarts pre process function.'''
+
    def unzip(zip_file, save_dir):
        import zipfile
        s_time = time.time()
@ -98,23 +99,21 @@ def modelarts_pre_process():

    config.save_checkpoint_path = os.path.join(config.output_path, config.save_checkpoint_path)

+
@moxing_wrapper(pre_process=modelarts_pre_process)
 def run_train():
    device_type = "Ascend" if context.get_context("device_target") == "Ascend" else "GPU"
    if config.run_distribute:
+        init()
        if device_type == "Ascend":
            rank = get_rank_id()
            device_num = get_device_num()
-
        else:
            context.reset_auto_parallel_context()
            rank = get_rank()
            device_num = get_group_size()
-
        context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                          gradients_mean=True)
-        init()
-
    else:
        rank = get_rank_id()
        device_num = 1