solve maskrcnn_mobilenetv1 scripts problems

This commit is contained in:
root 2020-12-08 17:11:04 +08:00
parent 0f18cdca84
commit 3df3b1c585
4 changed files with 35 additions and 21 deletions

View File

@ -178,7 +178,6 @@ Usage: sh run_standalone_train.sh [PRETRAINED_MODEL]
# random threshold in data augmentation # random threshold in data augmentation
"keep_ratio": True, "keep_ratio": True,
"flip_ratio": 0.5, "flip_ratio": 0.5,
"photo_ratio": 0.5,
"expand_ratio": 1.0, "expand_ratio": 1.0,
"max_instance_count": 128, # max number of bbox for each image "max_instance_count": 128, # max number of bbox for each image
@ -265,7 +264,6 @@ Usage: sh run_standalone_train.sh [PRETRAINED_MODEL]
"test_max_per_img": 100, # max number of instance "test_max_per_img": 100, # max number of instance
"test_batch_size": 2, # batch size "test_batch_size": 2, # batch size
"rpn_head_loss_type": "CrossEntropyLoss", # loss type in rpn
"rpn_head_use_sigmoid": True, # whether use sigmoid or not in rpn "rpn_head_use_sigmoid": True, # whether use sigmoid or not in rpn
"rpn_head_weight": 1.0, # rpn head weight in loss "rpn_head_weight": 1.0, # rpn head weight in loss
"mask_thr_binary": 0.5, # mask threshold for in rcnn "mask_thr_binary": 0.5, # mask threshold for in rcnn
@ -275,7 +273,6 @@ Usage: sh run_standalone_train.sh [PRETRAINED_MODEL]
"base_step": 58633, # bsae step in lr generator "base_step": 58633, # bsae step in lr generator
"total_epoch": 13, # total epoch in lr generator "total_epoch": 13, # total epoch in lr generator
"warmup_step": 500, # warmp up step in lr generator "warmup_step": 500, # warmp up step in lr generator
"warmup_mode": "linear", # warmp up mode
"warmup_ratio": 1/3.0, # warpm up ratio "warmup_ratio": 1/3.0, # warpm up ratio
"sgd_momentum": 0.9, # momentum in optimizer "sgd_momentum": 0.9, # momentum in optimizer

View File

@ -14,9 +14,9 @@
# limitations under the License. # limitations under the License.
# ============================================================================ # ============================================================================
if [ $# != 2 ] if [ $# != 2 ] && [ $# != 1 ]
then then
echo "Usage: sh run_train.sh [RANK_TABLE_FILE] [PRETRAINED_PATH]" echo "Usage: sh run_distribute_train.sh [RANK_TABLE_FILE] [PRETRAINED_PATH](optional)"
exit 1 exit 1
fi fi
@ -31,7 +31,11 @@ PATH1=$(get_real_path $1)
PATH2=$2 PATH2=$2
echo $PATH1 echo $PATH1
echo $PATH2
if [ $# == 2 ]
then
echo $PATH2
fi
if [ ! -f $PATH1 ] if [ ! -f $PATH1 ]
then then
@ -67,7 +71,16 @@ do
cd ./train_parallel$i || exit cd ./train_parallel$i || exit
echo "start training for rank $RANK_ID, device $DEVICE_ID" echo "start training for rank $RANK_ID, device $DEVICE_ID"
env > env.log env > env.log
taskset -c $cmdopt python train.py --do_train=True --device_id=$i --rank_id=$i --run_distribute=True --device_num=$DEVICE_NUM \ if [ $# == 2 ]
--pre_trained=$PATH2 &> log & then
taskset -c $cmdopt python train.py --do_train=True --device_id=$i --rank_id=$i --run_distribute=True --device_num=$DEVICE_NUM \
--pre_trained=$PATH2 &> log &
fi
if [ $# == 1 ]
then
taskset -c $cmdopt python train.py --do_train=True --device_id=$i --rank_id=$i --run_distribute=True --device_num=$DEVICE_NUM &> log &
fi
cd .. cd ..
done done

View File

@ -14,9 +14,9 @@
# limitations under the License. # limitations under the License.
# ============================================================================ # ============================================================================
if [ $# != 1 ] if [ $# != 1 ] && [ $# != 0 ]
then then
echo "Usage: sh run_standalone_train.sh [PRETRAINED_PATH]" echo "Usage: sh run_standalone_train.sh [PRETRAINED_PATH](optional)"
exit 1 exit 1
fi fi
@ -27,13 +27,11 @@ get_real_path(){
echo "$(realpath -m $PWD/$1)" echo "$(realpath -m $PWD/$1)"
fi fi
} }
PATH1=$(get_real_path $1)
echo $PATH1
if [ ! -f $PATH1 ] if [ $# == 1 ]
then then
echo "error: PRETRAINED_PATH=$PATH1 is not a file" PATH1=$(get_real_path $1)
exit 1 echo $PATH1
fi fi
ulimit -u unlimited ulimit -u unlimited
@ -53,5 +51,14 @@ cp -r ../src ./train
cd ./train || exit cd ./train || exit
echo "start training for device $DEVICE_ID" echo "start training for device $DEVICE_ID"
env > env.log env > env.log
python train.py --do_train=True --device_id=$DEVICE_ID --pre_trained=$PATH1 &> log & if [ $# == 1 ]
then
python train.py --do_train=True --device_id=$DEVICE_ID --pre_trained=$PATH1 &> log &
fi
if [ $# == 0 ]
then
python train.py --do_train=True --device_id=$DEVICE_ID &> log &
fi
cd .. cd ..

View File

@ -22,7 +22,6 @@ config = ed({
"img_height": 768, "img_height": 768,
"keep_ratio": True, "keep_ratio": True,
"flip_ratio": 0.5, "flip_ratio": 0.5,
"photo_ratio": 0.5,
"expand_ratio": 1.0, "expand_ratio": 1.0,
"max_instance_count": 128, "max_instance_count": 128,
@ -109,7 +108,6 @@ config = ed({
"test_max_per_img": 100, "test_max_per_img": 100,
"test_batch_size": 2, "test_batch_size": 2,
"rpn_head_loss_type": "CrossEntropyLoss",
"rpn_head_use_sigmoid": True, "rpn_head_use_sigmoid": True,
"rpn_head_weight": 1.0, "rpn_head_weight": 1.0,
"mask_thr_binary": 0.5, "mask_thr_binary": 0.5,
@ -119,7 +117,6 @@ config = ed({
"base_step": 58633, "base_step": 58633,
"total_epoch": 13, "total_epoch": 13,
"warmup_step": 500, "warmup_step": 500,
"warmup_mode": "linear",
"warmup_ratio": 1/3.0, "warmup_ratio": 1/3.0,
"sgd_momentum": 0.9, "sgd_momentum": 0.9,
@ -131,7 +128,7 @@ config = ed({
"pretrain_epoch_size": 0, "pretrain_epoch_size": 0,
"epoch_size": 12, "epoch_size": 12,
"save_checkpoint": True, "save_checkpoint": True,
"save_checkpoint_epochs": 1, "save_checkpoint_epochs": 12,
"keep_checkpoint_max": 12, "keep_checkpoint_max": 12,
"save_checkpoint_path": "./", "save_checkpoint_path": "./",