diff --git a/model_zoo/official/nlp/gnmt_v2/README.md b/model_zoo/official/nlp/gnmt_v2/README.md index e2622c786cf..25e2a229b07 100644 --- a/model_zoo/official/nlp/gnmt_v2/README.md +++ b/model_zoo/official/nlp/gnmt_v2/README.md @@ -39,7 +39,7 @@ Attention mechanism: uses the standardized Bahdanau attention mechanism. First, Note that you can run the scripts based on the dataset mentioned in original paper or widely used in relevant domain/network architecture. In the following sections, we will introduce how to run the scripts using the related dataset below. -- WMT Englis-German for training. +- WMT English-German for training. - WMT newstest2014 for evaluation. # [Environment Requirements](#contents) @@ -178,7 +178,7 @@ Almost all required options and parameters can be easily assigned, including the 'hidden_size': 1024 # the output's last dimension of dynamicRNN 'initializer_range': 0.1 # initializer range 'max_decode_length': 50 # max length of decoder - 'lr': 2e-1 # initial learning rate + 'lr': 2e-3 # initial learning rate 'lr_scheduler': 'WarmupMultiStepLR' # learning rate scheduler 'existed_ckpt': "" # the absolute full path to save the checkpoint file ``` @@ -242,7 +242,7 @@ The `VOCAB_ADDR` is the vocabulary address, `BPE_CODE_ADDR` is the bpe code addr | Resource | Ascend 910 | | uploaded Date | 11/06/2020 (month/day/year) | | MindSpore Version | 1.0.0 | -| Dataset | WMT Englis-German for training | +| Dataset | WMT English-German for training | | Training Parameters | epoch=6, batch_size=128 | | Optimizer | Adam | | Loss Function | Softmax Cross Entropy | diff --git a/model_zoo/official/nlp/gnmt_v2/config/config.json b/model_zoo/official/nlp/gnmt_v2/config/config.json index 0688df2a607..e1c6c3fab26 100644 --- a/model_zoo/official/nlp/gnmt_v2/config/config.json +++ b/model_zoo/official/nlp/gnmt_v2/config/config.json @@ -4,7 +4,7 @@ "epochs": 6, "batch_size": 128, "dataset_schema": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json", - "pre_train_dataset": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.tfrecord-001-of-001", + "pre_train_dataset": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.mindrecord", "fine_tune_dataset": null, "valid_dataset": null, "dataset_sink_mode": true diff --git a/model_zoo/official/nlp/gnmt_v2/config/config.py b/model_zoo/official/nlp/gnmt_v2/config/config.py index 3ca9d2d32a5..21f0b5f6fbb 100644 --- a/model_zoo/official/nlp/gnmt_v2/config/config.py +++ b/model_zoo/official/nlp/gnmt_v2/config/config.py @@ -93,7 +93,7 @@ class GNMTConfig: init_loss_scale (int): Initialized loss scale. loss_scale_factor (int): Loss scale factor. scale_window (int): Window size of loss scale. - lr_scheduler (str): Whether use lr_scheduler, only support "ISR" now. + lr_scheduler (str): Learning rate scheduler. Please see the Note as follow. optimizer (str): Optimizer for training, e.g. Adam, Lamb, momentum. Default: Adam. lr (float): Initial learning rate. min_lr (float): Minimum learning rate. diff --git a/model_zoo/official/nlp/gnmt_v2/config/config_test.json b/model_zoo/official/nlp/gnmt_v2/config/config_test.json index 90e6dc93cfe..bfc44438c05 100644 --- a/model_zoo/official/nlp/gnmt_v2/config/config_test.json +++ b/model_zoo/official/nlp/gnmt_v2/config/config_test.json @@ -6,7 +6,7 @@ "dataset_schema": "/home/workspace/dataset_menu/newstest2014.en.json", "pre_train_dataset": null, "fine_tune_dataset": null, - "test_dataset": "/home/workspace/dataset_menu/newstest2014.en.tfrecord-001-of-001", + "test_dataset": "/home/workspace/dataset_menu/newstest2014.en.mindrecord", "valid_dataset": null, "dataset_sink_mode": true }, diff --git a/model_zoo/official/nlp/gnmt_v2/scripts/run_distributed_train_ascend.sh b/model_zoo/official/nlp/gnmt_v2/scripts/run_distributed_train_ascend.sh index 7b149c928f3..34028a19bba 100644 --- a/model_zoo/official/nlp/gnmt_v2/scripts/run_distributed_train_ascend.sh +++ b/model_zoo/official/nlp/gnmt_v2/scripts/run_distributed_train_ascend.sh @@ -15,13 +15,13 @@ # ============================================================================ echo "==============================================================================================================" -echo "Please run the scipt as: " +echo "Please run the script as: " echo "sh run_distributed_train_ascend.sh RANK_TABLE_ADDR DATASET_SCHEMA_TRAIN PRE_TRAIN_DATASET" echo "for example:" echo "sh run_distributed_train_ascend.sh \ /home/workspace/rank_table_8p.json \ /home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json \ - /home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.tfrecord-001-of-001" + /home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.mindrecord" echo "It is better to use absolute path." echo "==============================================================================================================" diff --git a/model_zoo/official/nlp/gnmt_v2/scripts/run_standalone_eval_ascend.sh b/model_zoo/official/nlp/gnmt_v2/scripts/run_standalone_eval_ascend.sh index 7d2003d3cd6..5b7f56e5bcc 100644 --- a/model_zoo/official/nlp/gnmt_v2/scripts/run_standalone_eval_ascend.sh +++ b/model_zoo/official/nlp/gnmt_v2/scripts/run_standalone_eval_ascend.sh @@ -15,13 +15,13 @@ # ============================================================================ echo "==============================================================================================================" -echo "Please run the scipt as: " +echo "Please run the script as: " echo "sh run_standalone_eval_ascend.sh DATASET_SCHEMA_TEST TEST_DATASET EXISTED_CKPT_PATH \ VOCAB_ADDR BPE_CODE_ADDR TEST_TARGET" echo "for example:" echo "sh run_standalone_eval_ascend.sh \ /home/workspace/dataset_menu/newstest2014.en.json \ - /home/workspace/dataset_menu/newstest2014.en.tfrecord-001-of-001 \ + /home/workspace/dataset_menu/newstest2014.en.mindrecord \ /home/workspace/gnmt_v2/gnmt-6_3452.ckpt \ /home/workspace/wmt16_de_en/vocab.bpe.32000 \ /home/workspace/wmt16_de_en/bpe.32000 \ @@ -53,7 +53,7 @@ cp ../*.py ./eval cp -r ../src ./eval cp -r ../config ./eval cd ./eval || exit -echo "start eval for device $DEVICE_ID" +echo "start for evaluation" env > env.log python eval.py \ --config=${current_exec_path}/eval/config/config_test.json \ diff --git a/model_zoo/official/nlp/gnmt_v2/scripts/run_standalone_train_ascend.sh b/model_zoo/official/nlp/gnmt_v2/scripts/run_standalone_train_ascend.sh index 1037ce23c9b..6ed8a6dac8c 100644 --- a/model_zoo/official/nlp/gnmt_v2/scripts/run_standalone_train_ascend.sh +++ b/model_zoo/official/nlp/gnmt_v2/scripts/run_standalone_train_ascend.sh @@ -15,12 +15,12 @@ # ============================================================================ echo "==============================================================================================================" -echo "Please run the scipt as: " +echo "Please run the script as: " echo "sh run_standalone_train_ascend.sh DATASET_SCHEMA_TRAIN PRE_TRAIN_DATASET" echo "for example:" echo "sh run_standalone_train_ascend.sh \ /home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json \ - /home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.tfrecord-001-of-001" + /home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.mindrecord" echo "It is better to use absolute path." echo "==============================================================================================================" @@ -42,10 +42,10 @@ cp ../*.py ./train cp -r ../src ./train cp -r ../config ./train cd ./train || exit -echo "start training for device $DEVICE_ID" +echo "start for training" env > env.log python train.py \ --config=${current_exec_path}/train/config/config.json \ --dataset_schema_train=$DATASET_SCHEMA_TRAIN \ - --pre_train_dataset=$PRE_TRAIN_DATASET > log_gnmt_network${i}.log 2>&1 & + --pre_train_dataset=$PRE_TRAIN_DATASET > log_gnmt_network.log 2>&1 & cd .. diff --git a/model_zoo/official/nlp/gnmt_v2/src/dataset/bi_data_loader.py b/model_zoo/official/nlp/gnmt_v2/src/dataset/bi_data_loader.py index 4e8d23cb034..cd2016cb0ab 100644 --- a/model_zoo/official/nlp/gnmt_v2/src/dataset/bi_data_loader.py +++ b/model_zoo/official/nlp/gnmt_v2/src/dataset/bi_data_loader.py @@ -136,7 +136,7 @@ class BiLingualDataLoader(DataLoader): columns = ["src", "src_padding", "prev_opt", "target", "tgt_padding"] with open(self.schema_address, "w", encoding="utf-8") as f: f.write("{\n") - f.write(' "datasetType":"TF",\n') + f.write(' "datasetType":"MS",\n') f.write(' "numRows":%s,\n' % provlist[0]) f.write(' "columns":{\n') t = 1 @@ -211,7 +211,7 @@ class TextDataLoader(DataLoader): columns = ["src", "src_padding"] with open(self.schema_address, "w", encoding="utf-8") as f: f.write("{\n") - f.write(' "datasetType":"TF",\n') + f.write(' "datasetType":"MS",\n') f.write(' "numRows":%s,\n' % provlist[0]) f.write(' "columns":{\n') t = 1 diff --git a/model_zoo/official/nlp/gnmt_v2/src/utils/initializer.py b/model_zoo/official/nlp/gnmt_v2/src/utils/initializer.py index f2042ef1df9..85471c0649a 100644 --- a/model_zoo/official/nlp/gnmt_v2/src/utils/initializer.py +++ b/model_zoo/official/nlp/gnmt_v2/src/utils/initializer.py @@ -23,7 +23,7 @@ def _compute_fans(shape): Computes the number of input and output units for a weight shape. Args: - shape (tuple): Integer shape tuple or TF tensor shape. + shape (tuple): Integer shape tuple or MS tensor shape. Returns: tuple, integer scalars (fan_in, fan_out).