forked from mindspore-Ecosystem/mindspore
!9601 fix gnmtv2 doc issues
From: @zhaojichen Reviewed-by: Signed-off-by:
This commit is contained in:
commit
30ea5c3b28
|
@ -39,7 +39,7 @@ Attention mechanism: uses the standardized Bahdanau attention mechanism. First,
|
|||
|
||||
Note that you can run the scripts based on the dataset mentioned in original paper or widely used in relevant domain/network architecture. In the following sections, we will introduce how to run the scripts using the related dataset below.
|
||||
|
||||
- WMT Englis-German for training.
|
||||
- WMT English-German for training.
|
||||
- WMT newstest2014 for evaluation.
|
||||
|
||||
# [Environment Requirements](#contents)
|
||||
|
@ -178,7 +178,7 @@ Almost all required options and parameters can be easily assigned, including the
|
|||
'hidden_size': 1024 # the output's last dimension of dynamicRNN
|
||||
'initializer_range': 0.1 # initializer range
|
||||
'max_decode_length': 50 # max length of decoder
|
||||
'lr': 2e-1 # initial learning rate
|
||||
'lr': 2e-3 # initial learning rate
|
||||
'lr_scheduler': 'WarmupMultiStepLR' # learning rate scheduler
|
||||
'existed_ckpt': "" # the absolute full path to save the checkpoint file
|
||||
```
|
||||
|
@ -242,7 +242,7 @@ The `VOCAB_ADDR` is the vocabulary address, `BPE_CODE_ADDR` is the bpe code addr
|
|||
| Resource | Ascend 910 |
|
||||
| uploaded Date | 11/06/2020 (month/day/year) |
|
||||
| MindSpore Version | 1.0.0 |
|
||||
| Dataset | WMT Englis-German for training |
|
||||
| Dataset | WMT English-German for training |
|
||||
| Training Parameters | epoch=6, batch_size=128 |
|
||||
| Optimizer | Adam |
|
||||
| Loss Function | Softmax Cross Entropy |
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
"epochs": 6,
|
||||
"batch_size": 128,
|
||||
"dataset_schema": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json",
|
||||
"pre_train_dataset": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.tfrecord-001-of-001",
|
||||
"pre_train_dataset": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.mindrecord",
|
||||
"fine_tune_dataset": null,
|
||||
"valid_dataset": null,
|
||||
"dataset_sink_mode": true
|
||||
|
|
|
@ -93,7 +93,7 @@ class GNMTConfig:
|
|||
init_loss_scale (int): Initialized loss scale.
|
||||
loss_scale_factor (int): Loss scale factor.
|
||||
scale_window (int): Window size of loss scale.
|
||||
lr_scheduler (str): Whether use lr_scheduler, only support "ISR" now.
|
||||
lr_scheduler (str): Learning rate scheduler. Please see the Note as follow.
|
||||
optimizer (str): Optimizer for training, e.g. Adam, Lamb, momentum. Default: Adam.
|
||||
lr (float): Initial learning rate.
|
||||
min_lr (float): Minimum learning rate.
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
"dataset_schema": "/home/workspace/dataset_menu/newstest2014.en.json",
|
||||
"pre_train_dataset": null,
|
||||
"fine_tune_dataset": null,
|
||||
"test_dataset": "/home/workspace/dataset_menu/newstest2014.en.tfrecord-001-of-001",
|
||||
"test_dataset": "/home/workspace/dataset_menu/newstest2014.en.mindrecord",
|
||||
"valid_dataset": null,
|
||||
"dataset_sink_mode": true
|
||||
},
|
||||
|
|
|
@ -15,13 +15,13 @@
|
|||
# ============================================================================
|
||||
|
||||
echo "=============================================================================================================="
|
||||
echo "Please run the scipt as: "
|
||||
echo "Please run the script as: "
|
||||
echo "sh run_distributed_train_ascend.sh RANK_TABLE_ADDR DATASET_SCHEMA_TRAIN PRE_TRAIN_DATASET"
|
||||
echo "for example:"
|
||||
echo "sh run_distributed_train_ascend.sh \
|
||||
/home/workspace/rank_table_8p.json \
|
||||
/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json \
|
||||
/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.tfrecord-001-of-001"
|
||||
/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.mindrecord"
|
||||
echo "It is better to use absolute path."
|
||||
echo "=============================================================================================================="
|
||||
|
||||
|
|
|
@ -15,13 +15,13 @@
|
|||
# ============================================================================
|
||||
|
||||
echo "=============================================================================================================="
|
||||
echo "Please run the scipt as: "
|
||||
echo "Please run the script as: "
|
||||
echo "sh run_standalone_eval_ascend.sh DATASET_SCHEMA_TEST TEST_DATASET EXISTED_CKPT_PATH \
|
||||
VOCAB_ADDR BPE_CODE_ADDR TEST_TARGET"
|
||||
echo "for example:"
|
||||
echo "sh run_standalone_eval_ascend.sh \
|
||||
/home/workspace/dataset_menu/newstest2014.en.json \
|
||||
/home/workspace/dataset_menu/newstest2014.en.tfrecord-001-of-001 \
|
||||
/home/workspace/dataset_menu/newstest2014.en.mindrecord \
|
||||
/home/workspace/gnmt_v2/gnmt-6_3452.ckpt \
|
||||
/home/workspace/wmt16_de_en/vocab.bpe.32000 \
|
||||
/home/workspace/wmt16_de_en/bpe.32000 \
|
||||
|
@ -53,7 +53,7 @@ cp ../*.py ./eval
|
|||
cp -r ../src ./eval
|
||||
cp -r ../config ./eval
|
||||
cd ./eval || exit
|
||||
echo "start eval for device $DEVICE_ID"
|
||||
echo "start for evaluation"
|
||||
env > env.log
|
||||
python eval.py \
|
||||
--config=${current_exec_path}/eval/config/config_test.json \
|
||||
|
|
|
@ -15,12 +15,12 @@
|
|||
# ============================================================================
|
||||
|
||||
echo "=============================================================================================================="
|
||||
echo "Please run the scipt as: "
|
||||
echo "Please run the script as: "
|
||||
echo "sh run_standalone_train_ascend.sh DATASET_SCHEMA_TRAIN PRE_TRAIN_DATASET"
|
||||
echo "for example:"
|
||||
echo "sh run_standalone_train_ascend.sh \
|
||||
/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json \
|
||||
/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.tfrecord-001-of-001"
|
||||
/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.mindrecord"
|
||||
echo "It is better to use absolute path."
|
||||
echo "=============================================================================================================="
|
||||
|
||||
|
@ -42,10 +42,10 @@ cp ../*.py ./train
|
|||
cp -r ../src ./train
|
||||
cp -r ../config ./train
|
||||
cd ./train || exit
|
||||
echo "start training for device $DEVICE_ID"
|
||||
echo "start for training"
|
||||
env > env.log
|
||||
python train.py \
|
||||
--config=${current_exec_path}/train/config/config.json \
|
||||
--dataset_schema_train=$DATASET_SCHEMA_TRAIN \
|
||||
--pre_train_dataset=$PRE_TRAIN_DATASET > log_gnmt_network${i}.log 2>&1 &
|
||||
--pre_train_dataset=$PRE_TRAIN_DATASET > log_gnmt_network.log 2>&1 &
|
||||
cd ..
|
||||
|
|
|
@ -136,7 +136,7 @@ class BiLingualDataLoader(DataLoader):
|
|||
columns = ["src", "src_padding", "prev_opt", "target", "tgt_padding"]
|
||||
with open(self.schema_address, "w", encoding="utf-8") as f:
|
||||
f.write("{\n")
|
||||
f.write(' "datasetType":"TF",\n')
|
||||
f.write(' "datasetType":"MS",\n')
|
||||
f.write(' "numRows":%s,\n' % provlist[0])
|
||||
f.write(' "columns":{\n')
|
||||
t = 1
|
||||
|
@ -211,7 +211,7 @@ class TextDataLoader(DataLoader):
|
|||
columns = ["src", "src_padding"]
|
||||
with open(self.schema_address, "w", encoding="utf-8") as f:
|
||||
f.write("{\n")
|
||||
f.write(' "datasetType":"TF",\n')
|
||||
f.write(' "datasetType":"MS",\n')
|
||||
f.write(' "numRows":%s,\n' % provlist[0])
|
||||
f.write(' "columns":{\n')
|
||||
t = 1
|
||||
|
|
|
@ -23,7 +23,7 @@ def _compute_fans(shape):
|
|||
Computes the number of input and output units for a weight shape.
|
||||
|
||||
Args:
|
||||
shape (tuple): Integer shape tuple or TF tensor shape.
|
||||
shape (tuple): Integer shape tuple or MS tensor shape.
|
||||
|
||||
Returns:
|
||||
tuple, integer scalars (fan_in, fan_out).
|
||||
|
|
Loading…
Reference in New Issue