forked from mindspore-Ecosystem/mindspore
!9601 fix gnmtv2 doc issues
From: @zhaojichen Reviewed-by: Signed-off-by:
This commit is contained in:
commit
30ea5c3b28
|
@ -39,7 +39,7 @@ Attention mechanism: uses the standardized Bahdanau attention mechanism. First,
|
||||||
|
|
||||||
Note that you can run the scripts based on the dataset mentioned in original paper or widely used in relevant domain/network architecture. In the following sections, we will introduce how to run the scripts using the related dataset below.
|
Note that you can run the scripts based on the dataset mentioned in original paper or widely used in relevant domain/network architecture. In the following sections, we will introduce how to run the scripts using the related dataset below.
|
||||||
|
|
||||||
- WMT Englis-German for training.
|
- WMT English-German for training.
|
||||||
- WMT newstest2014 for evaluation.
|
- WMT newstest2014 for evaluation.
|
||||||
|
|
||||||
# [Environment Requirements](#contents)
|
# [Environment Requirements](#contents)
|
||||||
|
@ -178,7 +178,7 @@ Almost all required options and parameters can be easily assigned, including the
|
||||||
'hidden_size': 1024 # the output's last dimension of dynamicRNN
|
'hidden_size': 1024 # the output's last dimension of dynamicRNN
|
||||||
'initializer_range': 0.1 # initializer range
|
'initializer_range': 0.1 # initializer range
|
||||||
'max_decode_length': 50 # max length of decoder
|
'max_decode_length': 50 # max length of decoder
|
||||||
'lr': 2e-1 # initial learning rate
|
'lr': 2e-3 # initial learning rate
|
||||||
'lr_scheduler': 'WarmupMultiStepLR' # learning rate scheduler
|
'lr_scheduler': 'WarmupMultiStepLR' # learning rate scheduler
|
||||||
'existed_ckpt': "" # the absolute full path to save the checkpoint file
|
'existed_ckpt': "" # the absolute full path to save the checkpoint file
|
||||||
```
|
```
|
||||||
|
@ -242,7 +242,7 @@ The `VOCAB_ADDR` is the vocabulary address, `BPE_CODE_ADDR` is the bpe code addr
|
||||||
| Resource | Ascend 910 |
|
| Resource | Ascend 910 |
|
||||||
| uploaded Date | 11/06/2020 (month/day/year) |
|
| uploaded Date | 11/06/2020 (month/day/year) |
|
||||||
| MindSpore Version | 1.0.0 |
|
| MindSpore Version | 1.0.0 |
|
||||||
| Dataset | WMT Englis-German for training |
|
| Dataset | WMT English-German for training |
|
||||||
| Training Parameters | epoch=6, batch_size=128 |
|
| Training Parameters | epoch=6, batch_size=128 |
|
||||||
| Optimizer | Adam |
|
| Optimizer | Adam |
|
||||||
| Loss Function | Softmax Cross Entropy |
|
| Loss Function | Softmax Cross Entropy |
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
"epochs": 6,
|
"epochs": 6,
|
||||||
"batch_size": 128,
|
"batch_size": 128,
|
||||||
"dataset_schema": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json",
|
"dataset_schema": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json",
|
||||||
"pre_train_dataset": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.tfrecord-001-of-001",
|
"pre_train_dataset": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.mindrecord",
|
||||||
"fine_tune_dataset": null,
|
"fine_tune_dataset": null,
|
||||||
"valid_dataset": null,
|
"valid_dataset": null,
|
||||||
"dataset_sink_mode": true
|
"dataset_sink_mode": true
|
||||||
|
|
|
@ -93,7 +93,7 @@ class GNMTConfig:
|
||||||
init_loss_scale (int): Initialized loss scale.
|
init_loss_scale (int): Initialized loss scale.
|
||||||
loss_scale_factor (int): Loss scale factor.
|
loss_scale_factor (int): Loss scale factor.
|
||||||
scale_window (int): Window size of loss scale.
|
scale_window (int): Window size of loss scale.
|
||||||
lr_scheduler (str): Whether use lr_scheduler, only support "ISR" now.
|
lr_scheduler (str): Learning rate scheduler. Please see the Note as follow.
|
||||||
optimizer (str): Optimizer for training, e.g. Adam, Lamb, momentum. Default: Adam.
|
optimizer (str): Optimizer for training, e.g. Adam, Lamb, momentum. Default: Adam.
|
||||||
lr (float): Initial learning rate.
|
lr (float): Initial learning rate.
|
||||||
min_lr (float): Minimum learning rate.
|
min_lr (float): Minimum learning rate.
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
"dataset_schema": "/home/workspace/dataset_menu/newstest2014.en.json",
|
"dataset_schema": "/home/workspace/dataset_menu/newstest2014.en.json",
|
||||||
"pre_train_dataset": null,
|
"pre_train_dataset": null,
|
||||||
"fine_tune_dataset": null,
|
"fine_tune_dataset": null,
|
||||||
"test_dataset": "/home/workspace/dataset_menu/newstest2014.en.tfrecord-001-of-001",
|
"test_dataset": "/home/workspace/dataset_menu/newstest2014.en.mindrecord",
|
||||||
"valid_dataset": null,
|
"valid_dataset": null,
|
||||||
"dataset_sink_mode": true
|
"dataset_sink_mode": true
|
||||||
},
|
},
|
||||||
|
|
|
@ -15,13 +15,13 @@
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
echo "=============================================================================================================="
|
echo "=============================================================================================================="
|
||||||
echo "Please run the scipt as: "
|
echo "Please run the script as: "
|
||||||
echo "sh run_distributed_train_ascend.sh RANK_TABLE_ADDR DATASET_SCHEMA_TRAIN PRE_TRAIN_DATASET"
|
echo "sh run_distributed_train_ascend.sh RANK_TABLE_ADDR DATASET_SCHEMA_TRAIN PRE_TRAIN_DATASET"
|
||||||
echo "for example:"
|
echo "for example:"
|
||||||
echo "sh run_distributed_train_ascend.sh \
|
echo "sh run_distributed_train_ascend.sh \
|
||||||
/home/workspace/rank_table_8p.json \
|
/home/workspace/rank_table_8p.json \
|
||||||
/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json \
|
/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json \
|
||||||
/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.tfrecord-001-of-001"
|
/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.mindrecord"
|
||||||
echo "It is better to use absolute path."
|
echo "It is better to use absolute path."
|
||||||
echo "=============================================================================================================="
|
echo "=============================================================================================================="
|
||||||
|
|
||||||
|
|
|
@ -15,13 +15,13 @@
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
echo "=============================================================================================================="
|
echo "=============================================================================================================="
|
||||||
echo "Please run the scipt as: "
|
echo "Please run the script as: "
|
||||||
echo "sh run_standalone_eval_ascend.sh DATASET_SCHEMA_TEST TEST_DATASET EXISTED_CKPT_PATH \
|
echo "sh run_standalone_eval_ascend.sh DATASET_SCHEMA_TEST TEST_DATASET EXISTED_CKPT_PATH \
|
||||||
VOCAB_ADDR BPE_CODE_ADDR TEST_TARGET"
|
VOCAB_ADDR BPE_CODE_ADDR TEST_TARGET"
|
||||||
echo "for example:"
|
echo "for example:"
|
||||||
echo "sh run_standalone_eval_ascend.sh \
|
echo "sh run_standalone_eval_ascend.sh \
|
||||||
/home/workspace/dataset_menu/newstest2014.en.json \
|
/home/workspace/dataset_menu/newstest2014.en.json \
|
||||||
/home/workspace/dataset_menu/newstest2014.en.tfrecord-001-of-001 \
|
/home/workspace/dataset_menu/newstest2014.en.mindrecord \
|
||||||
/home/workspace/gnmt_v2/gnmt-6_3452.ckpt \
|
/home/workspace/gnmt_v2/gnmt-6_3452.ckpt \
|
||||||
/home/workspace/wmt16_de_en/vocab.bpe.32000 \
|
/home/workspace/wmt16_de_en/vocab.bpe.32000 \
|
||||||
/home/workspace/wmt16_de_en/bpe.32000 \
|
/home/workspace/wmt16_de_en/bpe.32000 \
|
||||||
|
@ -53,7 +53,7 @@ cp ../*.py ./eval
|
||||||
cp -r ../src ./eval
|
cp -r ../src ./eval
|
||||||
cp -r ../config ./eval
|
cp -r ../config ./eval
|
||||||
cd ./eval || exit
|
cd ./eval || exit
|
||||||
echo "start eval for device $DEVICE_ID"
|
echo "start for evaluation"
|
||||||
env > env.log
|
env > env.log
|
||||||
python eval.py \
|
python eval.py \
|
||||||
--config=${current_exec_path}/eval/config/config_test.json \
|
--config=${current_exec_path}/eval/config/config_test.json \
|
||||||
|
|
|
@ -15,12 +15,12 @@
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
echo "=============================================================================================================="
|
echo "=============================================================================================================="
|
||||||
echo "Please run the scipt as: "
|
echo "Please run the script as: "
|
||||||
echo "sh run_standalone_train_ascend.sh DATASET_SCHEMA_TRAIN PRE_TRAIN_DATASET"
|
echo "sh run_standalone_train_ascend.sh DATASET_SCHEMA_TRAIN PRE_TRAIN_DATASET"
|
||||||
echo "for example:"
|
echo "for example:"
|
||||||
echo "sh run_standalone_train_ascend.sh \
|
echo "sh run_standalone_train_ascend.sh \
|
||||||
/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json \
|
/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json \
|
||||||
/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.tfrecord-001-of-001"
|
/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.mindrecord"
|
||||||
echo "It is better to use absolute path."
|
echo "It is better to use absolute path."
|
||||||
echo "=============================================================================================================="
|
echo "=============================================================================================================="
|
||||||
|
|
||||||
|
@ -42,10 +42,10 @@ cp ../*.py ./train
|
||||||
cp -r ../src ./train
|
cp -r ../src ./train
|
||||||
cp -r ../config ./train
|
cp -r ../config ./train
|
||||||
cd ./train || exit
|
cd ./train || exit
|
||||||
echo "start training for device $DEVICE_ID"
|
echo "start for training"
|
||||||
env > env.log
|
env > env.log
|
||||||
python train.py \
|
python train.py \
|
||||||
--config=${current_exec_path}/train/config/config.json \
|
--config=${current_exec_path}/train/config/config.json \
|
||||||
--dataset_schema_train=$DATASET_SCHEMA_TRAIN \
|
--dataset_schema_train=$DATASET_SCHEMA_TRAIN \
|
||||||
--pre_train_dataset=$PRE_TRAIN_DATASET > log_gnmt_network${i}.log 2>&1 &
|
--pre_train_dataset=$PRE_TRAIN_DATASET > log_gnmt_network.log 2>&1 &
|
||||||
cd ..
|
cd ..
|
||||||
|
|
|
@ -136,7 +136,7 @@ class BiLingualDataLoader(DataLoader):
|
||||||
columns = ["src", "src_padding", "prev_opt", "target", "tgt_padding"]
|
columns = ["src", "src_padding", "prev_opt", "target", "tgt_padding"]
|
||||||
with open(self.schema_address, "w", encoding="utf-8") as f:
|
with open(self.schema_address, "w", encoding="utf-8") as f:
|
||||||
f.write("{\n")
|
f.write("{\n")
|
||||||
f.write(' "datasetType":"TF",\n')
|
f.write(' "datasetType":"MS",\n')
|
||||||
f.write(' "numRows":%s,\n' % provlist[0])
|
f.write(' "numRows":%s,\n' % provlist[0])
|
||||||
f.write(' "columns":{\n')
|
f.write(' "columns":{\n')
|
||||||
t = 1
|
t = 1
|
||||||
|
@ -211,7 +211,7 @@ class TextDataLoader(DataLoader):
|
||||||
columns = ["src", "src_padding"]
|
columns = ["src", "src_padding"]
|
||||||
with open(self.schema_address, "w", encoding="utf-8") as f:
|
with open(self.schema_address, "w", encoding="utf-8") as f:
|
||||||
f.write("{\n")
|
f.write("{\n")
|
||||||
f.write(' "datasetType":"TF",\n')
|
f.write(' "datasetType":"MS",\n')
|
||||||
f.write(' "numRows":%s,\n' % provlist[0])
|
f.write(' "numRows":%s,\n' % provlist[0])
|
||||||
f.write(' "columns":{\n')
|
f.write(' "columns":{\n')
|
||||||
t = 1
|
t = 1
|
||||||
|
|
|
@ -23,7 +23,7 @@ def _compute_fans(shape):
|
||||||
Computes the number of input and output units for a weight shape.
|
Computes the number of input and output units for a weight shape.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
shape (tuple): Integer shape tuple or TF tensor shape.
|
shape (tuple): Integer shape tuple or MS tensor shape.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tuple, integer scalars (fan_in, fan_out).
|
tuple, integer scalars (fan_in, fan_out).
|
||||||
|
|
Loading…
Reference in New Issue