From 26327fa6387f584ca569be4f06b459386fc949d9 Mon Sep 17 00:00:00 2001 From: zhouyuanshen Date: Tue, 4 Aug 2020 11:27:57 +0800 Subject: [PATCH] remove redundent codes in eval.py --- model_zoo/official/cv/faster_rcnn/eval.py | 12 +-- .../scripts/run_distribute_train.sh | 2 +- .../scripts/run_standalone_train.sh | 2 +- model_zoo/official/cv/faster_rcnn/train.py | 97 +++++++++---------- 4 files changed, 56 insertions(+), 57 deletions(-) diff --git a/model_zoo/official/cv/faster_rcnn/eval.py b/model_zoo/official/cv/faster_rcnn/eval.py index 2c8be8d3542..20497350461 100644 --- a/model_zoo/official/cv/faster_rcnn/eval.py +++ b/model_zoo/official/cv/faster_rcnn/eval.py @@ -108,26 +108,26 @@ if __name__ == '__main__': prefix = "FasterRcnn_eval.mindrecord" mindrecord_dir = config.mindrecord_dir mindrecord_file = os.path.join(mindrecord_dir, prefix) - if args_opt.rank_id == 0 and not os.path.exists(mindrecord_file): + print("CHECKING MINDRECORD FILES ...") + + if not os.path.exists(mindrecord_file): if not os.path.isdir(mindrecord_dir): os.makedirs(mindrecord_dir) if args_opt.dataset == "coco": if os.path.isdir(config.coco_root): - print("Create Mindrecord.") + print("Create Mindrecord. It may take some time.") data_to_mindrecord_byte_image("coco", False, prefix, file_num=1) print("Create Mindrecord Done, at {}".format(mindrecord_dir)) else: print("coco_root not exits.") else: if os.path.isdir(config.IMAGE_DIR) and os.path.exists(config.ANNO_PATH): - print("Create Mindrecord.") + print("Create Mindrecord. It may take some time.") data_to_mindrecord_byte_image("other", False, prefix, file_num=1) print("Create Mindrecord Done, at {}".format(mindrecord_dir)) else: print("IMAGE_DIR or ANNO_PATH not exits.") - while not os.path.exists(mindrecord_file + ".db"): - time.sleep(5) - + print("CHECKING MINDRECORD FILES DONE!") print("Start Eval!") FasterRcnn_eval(mindrecord_file, args_opt.checkpoint_path, args_opt.ann_file) diff --git a/model_zoo/official/cv/faster_rcnn/scripts/run_distribute_train.sh b/model_zoo/official/cv/faster_rcnn/scripts/run_distribute_train.sh index 015730269a4..6dbf66cbe97 100755 --- a/model_zoo/official/cv/faster_rcnn/scripts/run_distribute_train.sh +++ b/model_zoo/official/cv/faster_rcnn/scripts/run_distribute_train.sh @@ -62,6 +62,6 @@ do cd ./train_parallel$i || exit echo "start training for rank $RANK_ID, device $DEVICE_ID" env > env.log - python train.py --do_train=True --device_id=$i --rank_id=$i --run_distribute=True --device_num=$DEVICE_NUM --pre_trained=$PATH2 &> log & + python train.py --device_id=$i --rank_id=$i --run_distribute=True --device_num=$DEVICE_NUM --pre_trained=$PATH2 &> log & cd .. done diff --git a/model_zoo/official/cv/faster_rcnn/scripts/run_standalone_train.sh b/model_zoo/official/cv/faster_rcnn/scripts/run_standalone_train.sh index 3239cabeb1f..92ea15c2cf4 100755 --- a/model_zoo/official/cv/faster_rcnn/scripts/run_standalone_train.sh +++ b/model_zoo/official/cv/faster_rcnn/scripts/run_standalone_train.sh @@ -54,5 +54,5 @@ cp -r ../src ./train cd ./train || exit echo "start training for device $DEVICE_ID" env > env.log -python train.py --do_train=True --device_id=$DEVICE_ID --pre_trained=$PATH1 &> log & +python train.py --device_id=$DEVICE_ID --pre_trained=$PATH1 &> log & cd .. diff --git a/model_zoo/official/cv/faster_rcnn/train.py b/model_zoo/official/cv/faster_rcnn/train.py index 8e3ccdfbbc7..d48466f6216 100644 --- a/model_zoo/official/cv/faster_rcnn/train.py +++ b/model_zoo/official/cv/faster_rcnn/train.py @@ -41,22 +41,18 @@ np.random.seed(1) de.config.set_seed(1) parser = argparse.ArgumentParser(description="FasterRcnn training") -parser.add_argument("--only_create_dataset", type=bool, default=False, help="If set it true, only create " - "Mindrecord, default is false.") -parser.add_argument("--run_distribute", type=bool, default=False, help="Run distribute, default is false.") -parser.add_argument("--do_train", type=bool, default=True, help="Do train or not, default is true.") -parser.add_argument("--do_eval", type=bool, default=False, help="Do eval or not, default is false.") -parser.add_argument("--dataset", type=str, default="coco", help="Dataset, default is coco.") -parser.add_argument("--pre_trained", type=str, default="", help="Pretrain file path.") -parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") -parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.") -parser.add_argument("--rank_id", type=int, default=0, help="Rank id, default is 0.") +parser.add_argument("--run_distribute", type=bool, default=False, help="Run distribute, default: false.") +parser.add_argument("--dataset", type=str, default="coco", help="Dataset name, default: coco.") +parser.add_argument("--pre_trained", type=str, default="", help="Pretrained file path.") +parser.add_argument("--device_id", type=int, default=0, help="Device id, default: 0.") +parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default: 1.") +parser.add_argument("--rank_id", type=int, default=0, help="Rank id, default: 0.") args_opt = parser.parse_args() context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) if __name__ == '__main__': - if not args_opt.do_eval and args_opt.run_distribute: + if args_opt.run_distribute: rank = args_opt.rank_id device_num = args_opt.device_num context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, @@ -73,19 +69,21 @@ if __name__ == '__main__': prefix = "FasterRcnn.mindrecord" mindrecord_dir = config.mindrecord_dir mindrecord_file = os.path.join(mindrecord_dir, prefix + "0") + print("CHECKING MINDRECORD FILES ...") + if rank == 0 and not os.path.exists(mindrecord_file): if not os.path.isdir(mindrecord_dir): os.makedirs(mindrecord_dir) if args_opt.dataset == "coco": if os.path.isdir(config.coco_root): - print("Create Mindrecord.") + print("Create Mindrecord. It may take some time.") data_to_mindrecord_byte_image("coco", True, prefix) print("Create Mindrecord Done, at {}".format(mindrecord_dir)) else: print("coco_root not exits.") else: if os.path.isdir(config.IMAGE_DIR) and os.path.exists(config.ANNO_PATH): - print("Create Mindrecord.") + print("Create Mindrecord. It may take some time.") data_to_mindrecord_byte_image("other", True, prefix) print("Create Mindrecord Done, at {}".format(mindrecord_dir)) else: @@ -94,47 +92,48 @@ if __name__ == '__main__': while not os.path.exists(mindrecord_file + ".db"): time.sleep(5) - if not args_opt.only_create_dataset: - loss_scale = float(config.loss_scale) + print("CHECKING MINDRECORD FILES DONE!") - # When create MindDataset, using the fitst mindrecord file, such as FasterRcnn.mindrecord0. - dataset = create_fasterrcnn_dataset(mindrecord_file, repeat_num=1, - batch_size=config.batch_size, device_num=device_num, rank_id=rank) + loss_scale = float(config.loss_scale) - dataset_size = dataset.get_dataset_size() - print("Create dataset done!") + # When create MindDataset, using the fitst mindrecord file, such as FasterRcnn.mindrecord0. + dataset = create_fasterrcnn_dataset(mindrecord_file, repeat_num=1, + batch_size=config.batch_size, device_num=device_num, rank_id=rank) - net = Faster_Rcnn_Resnet50(config=config) - net = net.set_train() + dataset_size = dataset.get_dataset_size() + print("Create dataset done!") - load_path = args_opt.pre_trained - if load_path != "": - param_dict = load_checkpoint(load_path) - for item in list(param_dict.keys()): - if not item.startswith('backbone'): - param_dict.pop(item) - load_param_into_net(net, param_dict) + net = Faster_Rcnn_Resnet50(config=config) + net = net.set_train() - loss = LossNet() - lr = Tensor(dynamic_lr(config, rank_size=device_num), mstype.float32) + load_path = args_opt.pre_trained + if load_path != "": + param_dict = load_checkpoint(load_path) + for item in list(param_dict.keys()): + if not item.startswith('backbone'): + param_dict.pop(item) + load_param_into_net(net, param_dict) - opt = SGD(params=net.trainable_params(), learning_rate=lr, momentum=config.momentum, - weight_decay=config.weight_decay, loss_scale=config.loss_scale) - net_with_loss = WithLossCell(net, loss) - if args_opt.run_distribute: - net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale, reduce_flag=True, - mean=True, degree=device_num) - else: - net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale) + loss = LossNet() + lr = Tensor(dynamic_lr(config, rank_size=device_num), mstype.float32) - time_cb = TimeMonitor(data_size=dataset_size) - loss_cb = LossCallBack() - cb = [time_cb, loss_cb] - if config.save_checkpoint: - ckptconfig = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * dataset_size, - keep_checkpoint_max=config.keep_checkpoint_max) - ckpoint_cb = ModelCheckpoint(prefix='faster_rcnn', directory=config.save_checkpoint_path, config=ckptconfig) - cb += [ckpoint_cb] + opt = SGD(params=net.trainable_params(), learning_rate=lr, momentum=config.momentum, + weight_decay=config.weight_decay, loss_scale=config.loss_scale) + net_with_loss = WithLossCell(net, loss) + if args_opt.run_distribute: + net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale, reduce_flag=True, + mean=True, degree=device_num) + else: + net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale) - model = Model(net) - model.train(config.epoch_size, dataset, callbacks=cb) + time_cb = TimeMonitor(data_size=dataset_size) + loss_cb = LossCallBack() + cb = [time_cb, loss_cb] + if config.save_checkpoint: + ckptconfig = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * dataset_size, + keep_checkpoint_max=config.keep_checkpoint_max) + ckpoint_cb = ModelCheckpoint(prefix='faster_rcnn', directory=config.save_checkpoint_path, config=ckptconfig) + cb += [ckpoint_cb] + + model = Model(net) + model.train(config.epoch_size, dataset, callbacks=cb)