forked from mindspore-Ecosystem/mindspore
!22534 solve deeptext centerface issues
Merge pull request !22534 from chenweitao_295/deeptext_centerface_gpu_issues
This commit is contained in:
commit
b57d26fd9f
|
@ -14,7 +14,7 @@
|
|||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
if [ $# != 1 ]
|
||||
if [ $# != 4 ]
|
||||
then
|
||||
echo "Usage: bash train_standalone_gpu.sh [USE_DEVICE_ID] [PRETRAINED_BACKBONE] [ANNOTATIONS] [DATASET]"
|
||||
exit 1
|
||||
|
|
|
@ -31,10 +31,9 @@ from model_utils.config import config
|
|||
from model_utils.moxing_adapter import moxing_wrapper
|
||||
from model_utils.device_adapter import get_device_id, get_device_num
|
||||
|
||||
|
||||
set_seed(1)
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=get_device_id())
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, device_id=get_device_id())
|
||||
|
||||
|
||||
def deeptext_eval_test(dataset_path='', ckpt_path=''):
|
||||
|
@ -113,7 +112,7 @@ def deeptext_eval_test(dataset_path='', ckpt_path=''):
|
|||
print("\n========================================\n", flush=True)
|
||||
for i in range(config.num_classes - 1):
|
||||
j = i + 1
|
||||
f1 = (2 * precisions[j] * recalls[j]) / (precisions[j] + recalls[j] + 1e-6)
|
||||
f1 = (2 * precisions[j] * recalls[j]) / (precisions[j] + recalls[j] + 1e-6)
|
||||
print("class {} precision is {:.2f}%, recall is {:.2f}%,"
|
||||
"F1 is {:.2f}%".format(j, precisions[j] * 100, recalls[j] * 100, f1 * 100), flush=True)
|
||||
if config.use_ambigous_sample:
|
||||
|
@ -122,6 +121,7 @@ def deeptext_eval_test(dataset_path='', ckpt_path=''):
|
|||
|
||||
def modelarts_pre_process():
|
||||
'''modelarts pre process function.'''
|
||||
|
||||
def unzip(zip_file, save_dir):
|
||||
import zipfile
|
||||
s_time = time.time()
|
||||
|
|
|
@ -63,5 +63,12 @@ cp $PATH4 ../src/
|
|||
|
||||
echo "======start training======"
|
||||
|
||||
mpirun -n $RANK_SIZE python train.py --imgs_path=$PATH1 --annos_path=$PATH2 --run_distribute=True --device_target="GPU" --pre_trained=$PATH3 > log &
|
||||
mpirun --allow-run-as-root -n $RANK_SIZE --output-filename log_output --merge-stderr-to-stdout \
|
||||
nohup python train.py \
|
||||
--imgs_path=$PATH1 \
|
||||
--annos_path=$PATH2 \
|
||||
--run_distribute=True \
|
||||
--device_target="GPU" \
|
||||
--export_device_target="GPU" \
|
||||
--pre_trained=$PATH3 > log.txt 2>&1 &
|
||||
cd ..
|
|
@ -31,6 +31,10 @@ PATH1=$(get_real_path $1)
|
|||
PATH2=$(get_real_path $2)
|
||||
PATH3=$(get_real_path $3)
|
||||
PATH4=$(get_real_path $4)
|
||||
DEVICE_ID=0
|
||||
if [ $# == 5 ]; then
|
||||
DEVICE_ID=$5
|
||||
fi
|
||||
echo $PATH1
|
||||
echo $PATH2
|
||||
echo $PATH3
|
||||
|
@ -66,5 +70,11 @@ cp -r ../model_utils ./eval
|
|||
cd ./eval || exit
|
||||
env > env.log
|
||||
echo "start eval for device $DEVICE_ID"
|
||||
CUDA_VISIBLE_DEVICE=$DEVICE_ID python eval.py --imgs_path=$PATH1 --annos_path=$PATH2 --checkpoint_path=$PATH3 &> log &
|
||||
export CUDA_VISIBLE_DEVICES=$DEVICE_ID
|
||||
python eval.py \
|
||||
--device_target="GPU" \
|
||||
--export_device_target="GPU" \
|
||||
--imgs_path=$PATH1 \
|
||||
--annos_path=$PATH2 \
|
||||
--checkpoint_path=$PATH3 &> log &
|
||||
cd ..
|
||||
|
|
|
@ -36,6 +36,10 @@ PATH3=$(get_real_path $3)
|
|||
echo $PATH3
|
||||
PATH4=$(get_real_path $4)
|
||||
echo $PATH4
|
||||
DEVICE_ID=0
|
||||
if [ $# == 5 ]; then
|
||||
DEVICE_ID=$5
|
||||
fi
|
||||
|
||||
if [ ! -f $PATH3 ]
|
||||
then
|
||||
|
@ -56,11 +60,18 @@ cp ../*.yaml ./train
|
|||
cp *.sh ./train
|
||||
cp -r ../src ./train
|
||||
cp -r ../model_utils ./train
|
||||
cd ./train_parallel || exit
|
||||
cd ./train || exit
|
||||
|
||||
export RANK_SIZE=1
|
||||
cp $PATH4 ../src/
|
||||
|
||||
echo "======start training======"
|
||||
|
||||
CUDA_VISIBLE_DEVICE=$DEVICE_ID python train.py --imgs_path=$PATH1 --annos_path=$PATH2 --run_distribute=False --device_target="GPU" --pre_trained=$PATH3 > log &
|
||||
export CUDA_VISIBLE_DEVICES=$DEVICE_ID
|
||||
python train.py \
|
||||
--imgs_path=$PATH1 \
|
||||
--annos_path=$PATH2 \
|
||||
--run_distribute=False \
|
||||
--device_target="GPU" \
|
||||
--export_device_target="GPU" \
|
||||
--pre_trained=$PATH3 > log.txt 2>&1 &
|
||||
|
|
|
@ -47,6 +47,7 @@ context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target,
|
|||
|
||||
def modelarts_pre_process():
|
||||
'''modelarts pre process function.'''
|
||||
|
||||
def unzip(zip_file, save_dir):
|
||||
import zipfile
|
||||
s_time = time.time()
|
||||
|
@ -98,23 +99,21 @@ def modelarts_pre_process():
|
|||
|
||||
config.save_checkpoint_path = os.path.join(config.output_path, config.save_checkpoint_path)
|
||||
|
||||
|
||||
@moxing_wrapper(pre_process=modelarts_pre_process)
|
||||
def run_train():
|
||||
device_type = "Ascend" if context.get_context("device_target") == "Ascend" else "GPU"
|
||||
if config.run_distribute:
|
||||
init()
|
||||
if device_type == "Ascend":
|
||||
rank = get_rank_id()
|
||||
device_num = get_device_num()
|
||||
|
||||
else:
|
||||
context.reset_auto_parallel_context()
|
||||
rank = get_rank()
|
||||
device_num = get_group_size()
|
||||
|
||||
context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
|
||||
gradients_mean=True)
|
||||
init()
|
||||
|
||||
else:
|
||||
rank = get_rank_id()
|
||||
device_num = 1
|
||||
|
|
Loading…
Reference in New Issue