From 15b366307e9ad016976431cd0851940130fbcbb1 Mon Sep 17 00:00:00 2001 From: yuchaojie Date: Thu, 27 Aug 2020 09:31:44 +0800 Subject: [PATCH] change enable_data_sink value to true for transformer --- .../nlp/transformer/scripts/run_distribute_train_ascend.sh | 2 +- .../nlp/transformer/scripts/run_standalone_train_ascend.sh | 2 +- model_zoo/official/nlp/transformer/train.py | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/model_zoo/official/nlp/transformer/scripts/run_distribute_train_ascend.sh b/model_zoo/official/nlp/transformer/scripts/run_distribute_train_ascend.sh index ea6ea614dcc..c7315f1c3ea 100644 --- a/model_zoo/official/nlp/transformer/scripts/run_distribute_train_ascend.sh +++ b/model_zoo/official/nlp/transformer/scripts/run_distribute_train_ascend.sh @@ -52,7 +52,7 @@ do --enable_save_ckpt="true" \ --enable_lossscale="true" \ --do_shuffle="true" \ - --enable_data_sink="false" \ + --enable_data_sink="true" \ --checkpoint_path="" \ --save_checkpoint_steps=2500 \ --save_checkpoint_num=30 \ diff --git a/model_zoo/official/nlp/transformer/scripts/run_standalone_train_ascend.sh b/model_zoo/official/nlp/transformer/scripts/run_standalone_train_ascend.sh index 8e677191a8a..436742ad4a0 100644 --- a/model_zoo/official/nlp/transformer/scripts/run_standalone_train_ascend.sh +++ b/model_zoo/official/nlp/transformer/scripts/run_standalone_train_ascend.sh @@ -37,7 +37,7 @@ python train.py \ --enable_save_ckpt="true" \ --enable_lossscale="true" \ --do_shuffle="true" \ - --enable_data_sink="false" \ + --enable_data_sink="true" \ --checkpoint_path="" \ --save_checkpoint_steps=2500 \ --save_checkpoint_num=30 \ diff --git a/model_zoo/official/nlp/transformer/train.py b/model_zoo/official/nlp/transformer/train.py index 8b7dc434562..eccd3f48d9d 100644 --- a/model_zoo/official/nlp/transformer/train.py +++ b/model_zoo/official/nlp/transformer/train.py @@ -166,7 +166,8 @@ def run_transformer_train(): netwithgrads.set_train(True) model = Model(netwithgrads) - model.train(args.epoch_size, dataset, callbacks=callbacks, dataset_sink_mode=(args.enable_data_sink == "true")) + model.train(args.epoch_size, dataset, callbacks=callbacks, dataset_sink_mode=(args.enable_data_sink == "true"), + sink_size=args.save_checkpoint_steps) if __name__ == '__main__': run_transformer_train()