From 79fdd144aa765f6280f8a4ed83b5f544e829b7ba Mon Sep 17 00:00:00 2001 From: linqingke Date: Tue, 29 Sep 2020 11:25:57 +0800 Subject: [PATCH] psenet update --- .../cv/psenet/scripts/run_distribute_train.sh | 6 +-- model_zoo/official/cv/psenet/src/config.py | 9 ++++- .../cv/psenet/src/generate_hccn_file.py | 2 +- .../official/cv/psenet/src/lr_schedule.py | 37 +++++++++++++++++++ model_zoo/official/cv/psenet/train.py | 8 +--- 5 files changed, 50 insertions(+), 12 deletions(-) create mode 100644 model_zoo/official/cv/psenet/src/lr_schedule.py diff --git a/model_zoo/official/cv/psenet/scripts/run_distribute_train.sh b/model_zoo/official/cv/psenet/scripts/run_distribute_train.sh index 147a36610cb..9c6eea4d2ee 100644 --- a/model_zoo/official/cv/psenet/scripts/run_distribute_train.sh +++ b/model_zoo/official/cv/psenet/scripts/run_distribute_train.sh @@ -41,9 +41,9 @@ fi python ${current_exec_path}/src/generate_hccn_file.py -export DEVICE_NUM=4 -export RANK_SIZE=4 -export RANK_TABLE_FILE=${current_exec_path}/rank_table_4p.json +export DEVICE_NUM=8 +export RANK_SIZE=8 +export RANK_TABLE_FILE=${current_exec_path}/rank_table_8p.json for((i=0; i<${DEVICE_NUM}; i++)) do diff --git a/model_zoo/official/cv/psenet/src/config.py b/model_zoo/official/cv/psenet/src/config.py index 8e969a8ac81..7a88ef4db27 100644 --- a/model_zoo/official/cv/psenet/src/config.py +++ b/model_zoo/official/cv/psenet/src/config.py @@ -29,6 +29,12 @@ config = ed({ # neck 'NECK_OUT_CHANNEL': 256, + # lr + "BASE_LR": 2e-3, + "TRAIN_TOTAL_ITER": 58000, + "WARMUP_STEP": 620, + "WARMUP_RATIO": 1/3, + # dataset for train "TRAIN_ROOT_DIR": 'psenet/ic15/', "TRAIN_IS_TRANSFORM": True, @@ -37,9 +43,8 @@ config = ed({ "TRAIN_MIN_SCALE": 0.4, "TRAIN_BUFFER_SIZE": 8, "TRAIN_BATCH_SIZE": 4, - "TRAIN_REPEAT_NUM": 608*4, + "TRAIN_REPEAT_NUM": 1800, "TRAIN_DROP_REMAINDER": True, - "TRAIN_TOTAL_ITER": 152000, "TRAIN_MODEL_SAVE_PATH": './checkpoints/', # dataset for test diff --git a/model_zoo/official/cv/psenet/src/generate_hccn_file.py b/model_zoo/official/cv/psenet/src/generate_hccn_file.py index 07a4a23a7ee..514ca5e74bc 100644 --- a/model_zoo/official/cv/psenet/src/generate_hccn_file.py +++ b/model_zoo/official/cv/psenet/src/generate_hccn_file.py @@ -17,7 +17,7 @@ import os import socket -RANK_TABLE_SAVE_PATH = './rank_table_4p.json' +RANK_TABLE_SAVE_PATH = './rank_table_8p.json' def main(): diff --git a/model_zoo/official/cv/psenet/src/lr_schedule.py b/model_zoo/official/cv/psenet/src/lr_schedule.py new file mode 100644 index 00000000000..a9c2b4afac0 --- /dev/null +++ b/model_zoo/official/cv/psenet/src/lr_schedule.py @@ -0,0 +1,37 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""lr generator for psenet""" +import math + +def linear_warmup_learning_rate(current_step, warmup_steps, base_lr, init_lr): + lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps) + learning_rate = float(init_lr) + lr_inc * current_step + return learning_rate + +def a_cosine_learning_rate(current_step, base_lr, warmup_steps, decay_steps): + base = float(current_step - warmup_steps) / float(decay_steps) + learning_rate = (1 + math.cos(base * math.pi)) / 2 * base_lr + return learning_rate + +def dynamic_lr(base_lr, total_steps, warmup_steps, warmup_ratio=1/3): + """dynamic learning rate generator""" + lr = [] + for i in range(total_steps): + if i < warmup_steps: + lr.append(linear_warmup_learning_rate(i, warmup_steps, base_lr, base_lr * warmup_ratio)) + else: + lr.append(a_cosine_learning_rate(i, base_lr, warmup_steps, total_steps)) + + return lr diff --git a/model_zoo/official/cv/psenet/train.py b/model_zoo/official/cv/psenet/train.py index 513e284950e..b508574ce8b 100644 --- a/model_zoo/official/cv/psenet/train.py +++ b/model_zoo/official/cv/psenet/train.py @@ -14,7 +14,6 @@ # ============================================================================ -import math import argparse import mindspore.nn as nn from mindspore import context @@ -29,6 +28,7 @@ from src.config import config from src.ETSNET.etsnet import ETSNet from src.ETSNET.dice_loss import DiceLoss from src.network_define import WithLossCell, TrainOneStepCell, LossCallBack +from src.lr_schedule import dynamic_lr parser = argparse.ArgumentParser(description='Hyperparams') parser.add_argument('--run_distribute', default=False, action='store_true', @@ -41,10 +41,6 @@ args = parser.parse_args() set_seed(1) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args.device_id) -def lr_generator(start_lr, lr_scale, total_iters): - lrs = [start_lr * (lr_scale ** math.floor(cur_iter * 1.0 / (total_iters / 3))) for cur_iter in range(total_iters)] - return lrs - def train(): rank_id = 0 if args.run_distribute: @@ -67,7 +63,7 @@ def train(): criterion = DiceLoss(batch_size=config.TRAIN_BATCH_SIZE) - lrs = lr_generator(start_lr=1e-3, lr_scale=0.1, total_iters=config.TRAIN_TOTAL_ITER) + lrs = dynamic_lr(config.BASE_LR, config.TRAIN_TOTAL_ITER, config.WARMUP_STEP, config.WARMUP_RATIO) opt = nn.SGD(params=net.trainable_params(), learning_rate=lrs, momentum=0.99, weight_decay=5e-4) # warp model