forked from mindspore-Ecosystem/mindspore
!11086 fix fasttext doc issue
From: @zhaojichen Reviewed-by: @wuxuejian,@liangchenghui Signed-off-by: @liangchenghui
This commit is contained in:
commit
ee1c80abc6
|
@ -64,13 +64,13 @@ After dataset preparation, you can start training and evaluation as follows:
|
||||||
```bash
|
```bash
|
||||||
# run training example
|
# run training example
|
||||||
cd ./scripts
|
cd ./scripts
|
||||||
sh run_standalone_train.sh [TRAIN_DATASET]
|
sh run_standalone_train.sh [TRAIN_DATASET] [DEVICEID]
|
||||||
|
|
||||||
# run distributed training example
|
# run distributed training example
|
||||||
sh run_distribute_train.sh [TRAIN_DATASET] [RANK_TABLE_PATH]
|
sh run_distribute_train.sh [TRAIN_DATASET] [RANK_TABLE_PATH]
|
||||||
|
|
||||||
# run evaluation example
|
# run evaluation example
|
||||||
sh run_eval.sh [EVAL_DATASET_PATH] [DATASET_NAME] [MODEL_CKPT]
|
sh run_eval.sh [EVAL_DATASET_PATH] [DATASET_NAME] [MODEL_CKPT] [DEVICEID]
|
||||||
```
|
```
|
||||||
|
|
||||||
# [Script Description](#content)
|
# [Script Description](#content)
|
||||||
|
@ -116,6 +116,7 @@ Parameters for both training and evaluation can be set in config.py. All the dat
|
||||||
```text
|
```text
|
||||||
vocab_size # vocabulary size.
|
vocab_size # vocabulary size.
|
||||||
buckets # bucket sequence length.
|
buckets # bucket sequence length.
|
||||||
|
test_buckets # test dataset bucket sequence length
|
||||||
batch_size # batch size of input dataset.
|
batch_size # batch size of input dataset.
|
||||||
embedding_dims # The size of each embedding vector.
|
embedding_dims # The size of each embedding vector.
|
||||||
num_class # number of labels.
|
num_class # number of labels.
|
||||||
|
@ -134,7 +135,7 @@ Parameters for both training and evaluation can be set in config.py. All the dat
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd ./scripts
|
cd ./scripts
|
||||||
sh run_standalone_train.sh [DATASET_PATH]
|
sh run_standalone_train.sh [DATASET_PATH] [DEVICEID]
|
||||||
```
|
```
|
||||||
|
|
||||||
- Running scripts for distributed training of FastText. Task training on multiple device and run the following command in bash to be executed in `scripts/`:
|
- Running scripts for distributed training of FastText. Task training on multiple device and run the following command in bash to be executed in `scripts/`:
|
||||||
|
@ -150,7 +151,7 @@ Parameters for both training and evaluation can be set in config.py. All the dat
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
cd ./scripts
|
cd ./scripts
|
||||||
sh run_eval.sh [DATASET_PATH] [DATASET_NAME] [MODEL_CKPT]
|
sh run_eval.sh [DATASET_PATH] [DATASET_NAME] [MODEL_CKPT] [DEVICEID]
|
||||||
```
|
```
|
||||||
|
|
||||||
Note: The `DATASET_PATH` is path to mindrecord. eg. /dataset_path/*.mindrecord
|
Note: The `DATASET_PATH` is path to mindrecord. eg. /dataset_path/*.mindrecord
|
||||||
|
@ -167,13 +168,13 @@ Parameters for both training and evaluation can be set in config.py. All the dat
|
||||||
| uploaded Date | 12/21/2020 (month/day/year) |
|
| uploaded Date | 12/21/2020 (month/day/year) |
|
||||||
| MindSpore Version | 1.1.0 |
|
| MindSpore Version | 1.1.0 |
|
||||||
| Dataset | AG's News Topic Classification Dataset |
|
| Dataset | AG's News Topic Classification Dataset |
|
||||||
| Training Parameters | epoch=5, batch_size=128 |
|
| Training Parameters | epoch=5, batch_size=512 |
|
||||||
| Optimizer | Adam |
|
| Optimizer | Adam |
|
||||||
| Loss Function | Softmax Cross Entropy |
|
| Loss Function | Softmax Cross Entropy |
|
||||||
| outputs | probability |
|
| outputs | probability |
|
||||||
| Speed | 112ms/step (8pcs) |
|
| Speed | 10ms/step (1pcs) |
|
||||||
| Total Time | 66s (8pcs) |
|
| Epoch Time | 2.36s (1pcs) |
|
||||||
| Loss | 0.00082 |
|
| Loss | 0.0067 |
|
||||||
| Params (M) | 22 |
|
| Params (M) | 22 |
|
||||||
| Checkpoint for inference | 254M (.ckpt file) |
|
| Checkpoint for inference | 254M (.ckpt file) |
|
||||||
| Scripts | [fasttext](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/fasttext) |
|
| Scripts | [fasttext](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/fasttext) |
|
||||||
|
@ -184,13 +185,13 @@ Parameters for both training and evaluation can be set in config.py. All the dat
|
||||||
| uploaded Date | 11/21/2020 (month/day/year) |
|
| uploaded Date | 11/21/2020 (month/day/year) |
|
||||||
| MindSpore Version | 1.1.0 |
|
| MindSpore Version | 1.1.0 |
|
||||||
| Dataset | DBPedia Ontology Classification Dataset |
|
| Dataset | DBPedia Ontology Classification Dataset |
|
||||||
| Training Parameters | epoch=5, batch_size=128 |
|
| Training Parameters | epoch=5, batch_size=4096 |
|
||||||
| Optimizer | Adam |
|
| Optimizer | Adam |
|
||||||
| Loss Function | Softmax Cross Entropy |
|
| Loss Function | Softmax Cross Entropy |
|
||||||
| outputs | probability |
|
| outputs | probability |
|
||||||
| Speed | 60ms/step (8pcs) |
|
| Speed | 58ms/step (1pcs) |
|
||||||
| Total Time | 164s (8pcs) |
|
| Epoch Time | 8.15s (1pcs) |
|
||||||
| Loss | 2.6e-5 |
|
| Loss | 2.6e-4 |
|
||||||
| Params (M) | 106 |
|
| Params (M) | 106 |
|
||||||
| Checkpoint for inference | 1.2G (.ckpt file) |
|
| Checkpoint for inference | 1.2G (.ckpt file) |
|
||||||
| Scripts | [fasttext](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/fasttext) |
|
| Scripts | [fasttext](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/fasttext) |
|
||||||
|
@ -201,13 +202,13 @@ Parameters for both training and evaluation can be set in config.py. All the dat
|
||||||
| uploaded Date | 11/21/2020 (month/day/year) |
|
| uploaded Date | 11/21/2020 (month/day/year) |
|
||||||
| MindSpore Version | 1.1.0 |
|
| MindSpore Version | 1.1.0 |
|
||||||
| Dataset | Yelp Review Polarity Dataset |
|
| Dataset | Yelp Review Polarity Dataset |
|
||||||
| Training Parameters | epoch=5, batch_size=128 |
|
| Training Parameters | epoch=5, batch_size=2048 |
|
||||||
| Optimizer | Adam |
|
| Optimizer | Adam |
|
||||||
| Loss Function | Softmax Cross Entropy |
|
| Loss Function | Softmax Cross Entropy |
|
||||||
| outputs | probability |
|
| outputs | probability |
|
||||||
| Speed | 74ms/step (8pcs) |
|
| Speed | 101ms/step (1pcs) |
|
||||||
| Total Time | 195s (8pcs) |
|
| Epoch Time | 28s (1pcs) |
|
||||||
| Loss | 7.7e-4 |
|
| Loss | 0.062 |
|
||||||
| Params (M) | 103 |
|
| Params (M) | 103 |
|
||||||
| Checkpoint for inference | 1.2G (.ckpt file) |
|
| Checkpoint for inference | 1.2G (.ckpt file) |
|
||||||
| Scripts | [fasttext](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/fasttext) |
|
| Scripts | [fasttext](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/fasttext) |
|
||||||
|
@ -220,8 +221,8 @@ Parameters for both training and evaluation can be set in config.py. All the dat
|
||||||
| Uploaded Date | 12/21/2020 (month/day/year) |
|
| Uploaded Date | 12/21/2020 (month/day/year) |
|
||||||
| MindSpore Version | 1.1.0 |
|
| MindSpore Version | 1.1.0 |
|
||||||
| Dataset | AG's News Topic Classification Dataset |
|
| Dataset | AG's News Topic Classification Dataset |
|
||||||
| batch_size | 128 |
|
| batch_size | 512 |
|
||||||
| Total Time | 66s |
|
| Epoch Time | 2.36s |
|
||||||
| outputs | label index |
|
| outputs | label index |
|
||||||
| Accuracy | 92.53 |
|
| Accuracy | 92.53 |
|
||||||
| Model for inference | 254M (.ckpt file) |
|
| Model for inference | 254M (.ckpt file) |
|
||||||
|
@ -232,8 +233,8 @@ Parameters for both training and evaluation can be set in config.py. All the dat
|
||||||
| Uploaded Date | 12/21/2020 (month/day/year) |
|
| Uploaded Date | 12/21/2020 (month/day/year) |
|
||||||
| MindSpore Version | 1.1.0 |
|
| MindSpore Version | 1.1.0 |
|
||||||
| Dataset | DBPedia Ontology Classification Dataset |
|
| Dataset | DBPedia Ontology Classification Dataset |
|
||||||
| batch_size | 128 |
|
| batch_size | 4096 |
|
||||||
| Total Time | 164s |
|
| Epoch Time | 8.15s |
|
||||||
| outputs | label index |
|
| outputs | label index |
|
||||||
| Accuracy | 98.6 |
|
| Accuracy | 98.6 |
|
||||||
| Model for inference | 1.2G (.ckpt file) |
|
| Model for inference | 1.2G (.ckpt file) |
|
||||||
|
@ -244,8 +245,8 @@ Parameters for both training and evaluation can be set in config.py. All the dat
|
||||||
| Uploaded Date | 12/21/2020 (month/day/year) |
|
| Uploaded Date | 12/21/2020 (month/day/year) |
|
||||||
| MindSpore Version | 1.1.0 |
|
| MindSpore Version | 1.1.0 |
|
||||||
| Dataset | Yelp Review Polarity Dataset |
|
| Dataset | Yelp Review Polarity Dataset |
|
||||||
| batch_size | 128 |
|
| batch_size | 2048 |
|
||||||
| Total Time | 195s |
|
| Epoch Time | 28s |
|
||||||
| outputs | label index |
|
| outputs | label index |
|
||||||
| Accuracy | 95.7 |
|
| Accuracy | 95.7 |
|
||||||
| Model for inference | 1.2G (.ckpt file) |
|
| Model for inference | 1.2G (.ckpt file) |
|
||||||
|
|
|
@ -32,7 +32,6 @@ SOURCE_DATASET_PATH=$(get_real_path $1)
|
||||||
DATASET_NAME=$2
|
DATASET_NAME=$2
|
||||||
|
|
||||||
export DEVICE_NUM=1
|
export DEVICE_NUM=1
|
||||||
export DEVICE_ID=5
|
|
||||||
export RANK_ID=0
|
export RANK_ID=0
|
||||||
export RANK_SIZE=1
|
export RANK_SIZE=1
|
||||||
|
|
||||||
|
|
|
@ -32,8 +32,9 @@ DATASET=$(get_real_path $1)
|
||||||
echo $DATASET
|
echo $DATASET
|
||||||
DATANAME=$2
|
DATANAME=$2
|
||||||
MODEL_CKPT=$(get_real_path $3)
|
MODEL_CKPT=$(get_real_path $3)
|
||||||
|
DEVICEID=$4
|
||||||
export DEVICE_NUM=1
|
export DEVICE_NUM=1
|
||||||
export DEVICE_ID=5
|
export DEVICE_ID=$DEVICEID
|
||||||
export RANK_ID=0
|
export RANK_ID=0
|
||||||
export RANK_SIZE=1
|
export RANK_SIZE=1
|
||||||
|
|
||||||
|
|
|
@ -32,9 +32,10 @@ DATASET=$(get_real_path $1)
|
||||||
echo $DATASET
|
echo $DATASET
|
||||||
DATANAME=$(basename $DATASET)
|
DATANAME=$(basename $DATASET)
|
||||||
echo $DATANAME
|
echo $DATANAME
|
||||||
|
DEVICEID=$2
|
||||||
|
|
||||||
export DEVICE_NUM=1
|
export DEVICE_NUM=1
|
||||||
export DEVICE_ID=0
|
export DEVICE_ID=$DEVICEID
|
||||||
export RANK_ID=0
|
export RANK_ID=0
|
||||||
export RANK_SIZE=1
|
export RANK_SIZE=1
|
||||||
|
|
||||||
|
|
|
@ -141,12 +141,12 @@ def _build_training_pipeline(pre_dataset):
|
||||||
callbacks = [time_monitor, loss_monitor]
|
callbacks = [time_monitor, loss_monitor]
|
||||||
if rank_size is None or int(rank_size) == 1:
|
if rank_size is None or int(rank_size) == 1:
|
||||||
ckpt_callback = ModelCheckpoint(prefix='fasttext',
|
ckpt_callback = ModelCheckpoint(prefix='fasttext',
|
||||||
directory=os.path.join('./', 'ckpe_{}'.format(os.getenv("DEVICE_ID"))),
|
directory=os.path.join('./', 'ckpt_{}'.format(os.getenv("DEVICE_ID"))),
|
||||||
config=ckpt_config)
|
config=ckpt_config)
|
||||||
callbacks.append(ckpt_callback)
|
callbacks.append(ckpt_callback)
|
||||||
if rank_size is not None and int(rank_size) > 1 and MultiAscend.get_rank() % 8 == 0:
|
if rank_size is not None and int(rank_size) > 1 and MultiAscend.get_rank() % 8 == 0:
|
||||||
ckpt_callback = ModelCheckpoint(prefix='fasttext',
|
ckpt_callback = ModelCheckpoint(prefix='fasttext',
|
||||||
directory=os.path.join('./', 'ckpe_{}'.format(os.getenv("DEVICE_ID"))),
|
directory=os.path.join('./', 'ckpt_{}'.format(os.getenv("DEVICE_ID"))),
|
||||||
config=ckpt_config)
|
config=ckpt_config)
|
||||||
callbacks.append(ckpt_callback)
|
callbacks.append(ckpt_callback)
|
||||||
print("Prepare to Training....")
|
print("Prepare to Training....")
|
||||||
|
|
Loading…
Reference in New Issue