!6934 mobilenetv2 debug

Merge pull request !6934 from yepei6/master_debug
2020-09-27 22:02:33 +08:00 · 2020-09-27 22:02:33 +08:00 · 825961aa43
parent 57ecb40022 f7667f58b2
commit 825961aa43
5 changed files with 9 additions and 6 deletions
--- a/model_zoo/official/cv/mobilenetv2/scripts/run_train.sh
+++ b/model_zoo/official/cv/mobilenetv2/scripts/run_train.sh
@ -111,7 +111,9 @@ if [ $# -gt 7 ] || [ $# -lt 4 ]
 then
    echo "Usage:
          Ascend: sh run_train.sh Ascend [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [RANK_TABLE_FILE] [DATASET_PATH] [CKPT_PATH] [FREEZE_LAYER]
+          Ascend: sh run_train.sh Ascend [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [RANK_TABLE_FILE] [DATASET_PATH] 
          GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH] [FREEZE_LAYER]
+          GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] 
          CPU: sh run_train.sh CPU [DATASET_PATH] [CKPT_PATH] [FREEZE_LAYER]"
 exit 1
 fi
--- a/model_zoo/official/cv/mobilenetv2/src/args.py
+++ b/model_zoo/official/cv/mobilenetv2/src/args.py
@ -41,9 +41,9 @@ def train_parse_args():
    train_parser.add_argument('--platform', type=str, default="Ascend", choices=("CPU", "GPU", "Ascend"), \
        help='run platform, only support CPU, GPU and Ascend')
    train_parser.add_argument('--dataset_path', type=str, required=True, help='Dataset path')
-    train_parser.add_argument('--pretrain_ckpt', type=str, default=None, help='Pretrained checkpoint path \
+    train_parser.add_argument('--pretrain_ckpt', type=str, default="", help='Pretrained checkpoint path \
        for fine tune or incremental learning')
-    train_parser.add_argument('--freeze_layer', type=str, default=None, choices=["none", "backbone"], \
+    train_parser.add_argument('--freeze_layer', type=str, default="", choices=["", "none", "backbone"], \
        help="freeze the weights of network from start to which layers")
    train_parser.add_argument('--run_distribute', type=ast.literal_eval, default=True, help='Run distribute')
    train_args = train_parser.parse_args()
--- a/model_zoo/official/cv/mobilenetv2/src/dataset.py
+++ b/model_zoo/official/cv/mobilenetv2/src/dataset.py
@ -99,7 +99,7 @@ def create_dataset(dataset_path, do_train, config, repeat_num=1):


 def extract_features(net, dataset_path, config):
-    features_folder = os.path.abspath(dataset_path) + '_features'
+    features_folder = dataset_path + '_features'
    if not os.path.exists(features_folder):
        os.makedirs(features_folder)
    dataset = create_dataset(dataset_path=dataset_path,
--- a/model_zoo/official/cv/mobilenetv2/src/utils.py
+++ b/model_zoo/official/cv/mobilenetv2/src/utils.py
@ -49,7 +49,7 @@ def context_device_init(config):
        if config.run_distribute:
            context.set_auto_parallel_context(device_num=config.rank_size,
                                              parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              gradients_mean=True)
+                                              gradients_mean=True, set_all_reduce_fusion_split_indices=[140])
            init()
    else:
        raise ValueError("Only support CPU, GPU and Ascend.")
--- a/model_zoo/official/cv/mobilenetv2/train.py
+++ b/model_zoo/official/cv/mobilenetv2/train.py
@ -42,6 +42,7 @@ set_seed(1)

 if __name__ == '__main__':
    args_opt = train_parse_args()
+    args_opt.dataset_path = os.path.abspath(args_opt.dataset_path)
    config = set_config(args_opt)
    start = time.time()

@ -53,7 +54,7 @@ if __name__ == '__main__':
    # define network
    backbone_net, head_net, net = define_net(config, args_opt.is_training)

-    if args_opt.pretrain_ckpt and args_opt.freeze_layer == "backbone":
+    if args_opt.pretrain_ckpt != "" and args_opt.freeze_layer == "backbone":
        load_ckpt(backbone_net, args_opt.pretrain_ckpt, trainable=False)
        step_size = extract_features(backbone_net, args_opt.dataset_path, config)

@ -92,7 +93,7 @@ if __name__ == '__main__':
                       total_epochs=epoch_size,
                       steps_per_epoch=step_size))

-    if args_opt.pretrain_ckpt is None or args_opt.freeze_layer == "none":
+    if args_opt.pretrain_ckpt == "" or args_opt.freeze_layer == "none":
        loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
        opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, \
            config.weight_decay, config.loss_scale)