optimize cnnctc learning rate strategy
This commit is contained in:
parent
305f08811d
commit
13b7cb2e68
|
@ -33,8 +33,10 @@ TRAIN_EPOCHS: 3
|
|||
run_distribute: False
|
||||
PRED_TRAINED: ""
|
||||
SAVE_PATH: "./"
|
||||
LR: 1e-4
|
||||
LR_PARA: 5e-4
|
||||
#LR
|
||||
base_lr: 0.0005
|
||||
warmup_step: 2000
|
||||
warmup_ratio: 0.0625
|
||||
MOMENTUM: 0.8
|
||||
LOSS_SCALE: 8096
|
||||
SAVE_CKPT_PER_N_STEP: 2000
|
||||
|
|
|
@ -35,12 +35,14 @@ class NormalizePAD():
|
|||
def __call__(self, img):
|
||||
# toTensor
|
||||
img = np.array(img, dtype=np.float32)
|
||||
# normalize
|
||||
means = [121.58949, 123.93914, 123.418655]
|
||||
stds = [65.70353, 65.142426, 68.61079]
|
||||
img = np.subtract(img, means)
|
||||
img = np.true_divide(img, stds)
|
||||
|
||||
img = img.transpose([2, 0, 1])
|
||||
img = img.astype(np.float)
|
||||
img = np.true_divide(img, 255)
|
||||
# normalize
|
||||
img = np.subtract(img, 0.5)
|
||||
img = np.true_divide(img, 0.5)
|
||||
|
||||
_, _, w = img.shape
|
||||
Pad_img = np.zeros(shape=self.max_size, dtype=np.float32)
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""lr generator for cnnctc"""
|
||||
import math
|
||||
|
||||
def linear_warmup_learning_rate(current_step, warmup_steps, base_lr, init_lr):
|
||||
lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps)
|
||||
learning_rate = float(init_lr) + lr_inc * current_step
|
||||
return learning_rate
|
||||
|
||||
def a_cosine_learning_rate(current_step, base_lr, warmup_steps, decay_steps):
|
||||
base = float(current_step - warmup_steps) / float(decay_steps)
|
||||
learning_rate = (1 + math.cos(base * math.pi)) / 2 * base_lr
|
||||
return learning_rate
|
||||
|
||||
def dynamic_lr(config, steps_per_epoch):
|
||||
"""dynamic learning rate generator"""
|
||||
base_lr = config.base_lr
|
||||
total_steps = steps_per_epoch * config.TRAIN_EPOCHS
|
||||
warmup_steps = int(config.warmup_step)
|
||||
decay_steps = total_steps - warmup_steps
|
||||
lr = []
|
||||
for i in range(total_steps):
|
||||
if i < warmup_steps:
|
||||
lr.append(linear_warmup_learning_rate(i, warmup_steps, base_lr, base_lr * config.warmup_ratio))
|
||||
else:
|
||||
lr.append(a_cosine_learning_rate(i, base_lr, warmup_steps, decay_steps))
|
||||
|
||||
return lr
|
|
@ -15,7 +15,6 @@
|
|||
"""cnnctc train"""
|
||||
|
||||
|
||||
import ast
|
||||
import numpy as np
|
||||
import mindspore
|
||||
import mindspore.common.dtype as mstype
|
||||
|
@ -30,6 +29,7 @@ from mindspore.train.serialization import load_checkpoint, load_param_into_net
|
|||
from src.callback import LossCallBack
|
||||
from src.cnn_ctc import CNNCTC_Model, ctc_loss, WithLossCell, CNNCTCTrainOneStepWithLossScaleCell
|
||||
from src.dataset import ST_MJ_Generator_batch_fixed_length, ST_MJ_Generator_batch_fixed_length_para
|
||||
from src.lr_schedule import dynamic_lr
|
||||
from src.model_utils.config import config
|
||||
from src.model_utils.device_adapter import get_device_id
|
||||
from src.model_utils.moxing_adapter import moxing_wrapper
|
||||
|
@ -88,9 +88,6 @@ def train():
|
|||
else:
|
||||
ckpt_save_dir = config.SAVE_PATH + "ckpt_standalone/"
|
||||
|
||||
config.LR = ast.literal_eval(config.LR)
|
||||
config.LR_PARA = ast.literal_eval(config.LR_PARA)
|
||||
|
||||
ds = dataset_creator(config.run_distribute)
|
||||
|
||||
net = CNNCTC_Model(config.NUM_CLASS, config.HIDDEN_SIZE, config.FINAL_FEATURE_WIDTH)
|
||||
|
@ -104,9 +101,11 @@ def train():
|
|||
print('train from scratch...')
|
||||
|
||||
criterion = ctc_loss()
|
||||
dataset_size = ds.get_dataset_size()
|
||||
lr = Tensor(dynamic_lr(config, dataset_size), mstype.float32)
|
||||
opt = mindspore.nn.RMSProp(params=net.trainable_params(),
|
||||
centered=True,
|
||||
learning_rate=config.LR_PARA,
|
||||
learning_rate=lr,
|
||||
momentum=config.MOMENTUM,
|
||||
loss_scale=config.LOSS_SCALE)
|
||||
|
||||
|
|
Loading…
Reference in New Issue