forked from mindspore-Ecosystem/mindspore
!8288 update transformer scripts
From: @pandoublefeng Reviewed-by: Signed-off-by:
This commit is contained in:
commit
cc7993355c
|
@ -41,8 +41,8 @@ Note that you can run the scripts based on the dataset mentioned in original pap
|
|||
|
||||
# [Environment Requirements](#contents)
|
||||
|
||||
- Hardware(Ascend)
|
||||
- Prepare hardware environment with Ascend processor. If you want to try Ascend , please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources.
|
||||
- Hardware(Ascend/GPU)
|
||||
- Prepare hardware environment with Ascend or GPU processor. If you want to try Ascend , please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources.
|
||||
- Framework
|
||||
- [MindSpore](https://gitee.com/mindspore/mindspore)
|
||||
- For more information, please check the resources below:
|
||||
|
|
|
@ -61,8 +61,8 @@ if cfg.transformer_network == 'large':
|
|||
num_attention_heads=16,
|
||||
intermediate_size=4096,
|
||||
hidden_act="relu",
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
hidden_dropout_prob=0.2,
|
||||
attention_probs_dropout_prob=0.2,
|
||||
max_position_embeddings=128,
|
||||
initializer_range=0.02,
|
||||
label_smoothing=0.1,
|
||||
|
|
|
@ -159,9 +159,11 @@ def run_transformer_train():
|
|||
|
||||
hidden_size = transformer_net_cfg.hidden_size if args.device_target == "Ascend" \
|
||||
else transformer_net_cfg_gpu.hidden_size
|
||||
learning_rate = cfg.lr_schedule.learning_rate if args.device_target == "Ascend" \
|
||||
else 1.0
|
||||
lr = Tensor(create_dynamic_lr(schedule="constant*rsqrt_hidden*linear_warmup*rsqrt_decay",
|
||||
training_steps=dataset.get_dataset_size()*args.epoch_size,
|
||||
learning_rate=cfg.lr_schedule.learning_rate,
|
||||
learning_rate=learning_rate,
|
||||
warmup_steps=cfg.lr_schedule.warmup_steps,
|
||||
hidden_size=hidden_size,
|
||||
start_decay_step=cfg.lr_schedule.start_decay_step,
|
||||
|
|
Loading…
Reference in New Issue