diff --git a/example/yolov3_coco2017/dataset.py b/example/yolov3_coco2017/dataset.py index 826fe16c538..9c6a0f362d2 100644 --- a/example/yolov3_coco2017/dataset.py +++ b/example/yolov3_coco2017/dataset.py @@ -22,7 +22,6 @@ from PIL import Image from matplotlib.colors import rgb_to_hsv, hsv_to_rgb import mindspore.dataset as de from mindspore.mindrecord import FileWriter -import mindspore.dataset.transforms.vision.py_transforms as P import mindspore.dataset.transforms.vision.c_transforms as C from config import ConfigYOLOV3ResNet18 @@ -301,13 +300,12 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, repeat_num=10, device_num compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training)) if is_training: - hwc_to_chw = P.HWC2CHW() + hwc_to_chw = C.HWC2CHW() ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], columns_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], operations=compose_map_func, num_parallel_workers=num_parallel_workers) ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers) - ds = ds.shuffle(buffer_size=256) ds = ds.batch(batch_size, drop_remainder=True) ds = ds.repeat(repeat_num) else: diff --git a/example/yolov3_coco2017/run_distribute_train.sh b/example/yolov3_coco2017/run_distribute_train.sh index 0c43e776b95..201f19ca16f 100644 --- a/example/yolov3_coco2017/run_distribute_train.sh +++ b/example/yolov3_coco2017/run_distribute_train.sh @@ -19,6 +19,7 @@ echo "Please run the scipt as: " echo "sh run_distribute_train.sh DEVICE_NUM EPOCH_SIZE MINDRECORD_DIR IMAGE_DIR ANNO_PATH MINDSPORE_HCCL_CONFIG_PATH" echo "for example: sh run_distribute_train.sh 8 100 /data/Mindrecord_train /data /data/train.txt /data/hccl.json" echo "It is better to use absolute path." +echo "The learning rate is 0.005 as default, if you want other lr, please change the value in this script." echo "==============================================================================================================" EPOCH_SIZE=$2 @@ -38,6 +39,11 @@ export RANK_SIZE=$1 for((i=0;i env.log - python ../train.py \ + taskset -c $cmdopt python ../train.py \ --distribute=1 \ + --lr=0.005 \ --device_num=$RANK_SIZE \ --device_id=$DEVICE_ID \ --mindrecord_dir=$MINDRECORD_DIR \ diff --git a/example/yolov3_coco2017/train.py b/example/yolov3_coco2017/train.py index 121e2aa810f..c7d28a8350f 100644 --- a/example/yolov3_coco2017/train.py +++ b/example/yolov3_coco2017/train.py @@ -67,6 +67,7 @@ if __name__ == '__main__': parser.add_argument("--distribute", type=bool, default=False, help="Run distribute, default is false.") parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.") + parser.add_argument("--lr", type=float, default=0.001, help="Learning rate, default is 0.001.") parser.add_argument("--mode", type=str, default="sink", help="Run sink mode or not, default is sink") parser.add_argument("--epoch_size", type=int, default=10, help="Epoch size, default is 10") parser.add_argument("--batch_size", type=int, default=32, help="Batch size, default is 32.") @@ -137,8 +138,8 @@ if __name__ == '__main__': ckpt_config = CheckpointConfig(save_checkpoint_steps=dataset_size * args_opt.save_checkpoint_epochs) ckpoint_cb = ModelCheckpoint(prefix="yolov3", directory=None, config=ckpt_config) - lr = Tensor(get_lr(learning_rate=0.001, start_step=0, global_step=args_opt.epoch_size * dataset_size, - decay_step=1000, decay_rate=0.95)) + lr = Tensor(get_lr(learning_rate=args_opt.lr, start_step=0, global_step=args_opt.epoch_size * dataset_size, + decay_step=1000, decay_rate=0.95, steps=True)) opt = nn.Adam(filter(lambda x: x.requires_grad, net.get_parameters()), lr, loss_scale=loss_scale) net = TrainingWrapper(net, opt, loss_scale) diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py index 377ef19417e..c5ff312c2a7 100644 --- a/mindspore/ops/operations/nn_ops.py +++ b/mindspore/ops/operations/nn_ops.py @@ -2331,7 +2331,11 @@ class Adam(PrimitiveWithInfer): - **gradient** (Tensor) - Gradients. Outputs: - Tensor, has the same shape and data type as `var`. + Tuple of 3 Tensor, the updated parameters. + + - **var** (Tensor) - The same shape and data type as `var`. + - **m** (Tensor) - The same shape and data type as `m`. + - **v** (Tensor) - The same shape and data type as `v`. """ @prim_attr_register