!11086 fix fasttext doc issue

From: @zhaojichen Reviewed-by: @wuxuejian,@liangchenghui Signed-off-by: @liangchenghui
2021-01-09 10:09:05 +08:00 · 2021-01-09 10:09:05 +08:00 · ee1c80abc6
parent a76e2b2649 756f67151b
commit ee1c80abc6
5 changed files with 29 additions and 27 deletions
--- a/model_zoo/official/nlp/fasttext/README.md
+++ b/model_zoo/official/nlp/fasttext/README.md
@ -64,13 +64,13 @@ After dataset preparation, you can start training and evaluation as follows:
 ```bash
 # run training example
 cd ./scripts
-sh run_standalone_train.sh [TRAIN_DATASET]
+sh run_standalone_train.sh [TRAIN_DATASET] [DEVICEID]

 # run distributed training example
 sh run_distribute_train.sh [TRAIN_DATASET] [RANK_TABLE_PATH]

 # run evaluation example
-sh run_eval.sh [EVAL_DATASET_PATH] [DATASET_NAME] [MODEL_CKPT]
+sh run_eval.sh [EVAL_DATASET_PATH] [DATASET_NAME] [MODEL_CKPT] [DEVICEID]
 ```

 # [Script Description](#content)
@ -116,6 +116,7 @@ Parameters for both training and evaluation can be set in config.py. All the dat
  ```text
     vocab_size               # vocabulary size.
     buckets                  # bucket sequence length.
+     test_buckets             # test dataset bucket sequence length
     batch_size               # batch size of input dataset.
     embedding_dims           # The size of each embedding vector.
     num_class                # number of labels.
@ -134,7 +135,7 @@ Parameters for both training and evaluation can be set in config.py. All the dat

    ```bash
    cd ./scripts
-    sh run_standalone_train.sh [DATASET_PATH]
+    sh run_standalone_train.sh [DATASET_PATH] [DEVICEID]
    ```

 - Running scripts for distributed training of FastText. Task training on multiple device and run the following command in bash to be executed in `scripts/`:
@ -150,7 +151,7 @@ Parameters for both training and evaluation can be set in config.py. All the dat

    ``` bash
    cd ./scripts
-    sh run_eval.sh [DATASET_PATH] [DATASET_NAME] [MODEL_CKPT]
+    sh run_eval.sh [DATASET_PATH] [DATASET_NAME] [MODEL_CKPT] [DEVICEID]
    ```

  Note: The `DATASET_PATH` is path to mindrecord. eg. /dataset_path/*.mindrecord
@ -167,13 +168,13 @@ Parameters for both training and evaluation can be set in config.py. All the dat
 | uploaded Date              | 12/21/2020 (month/day/year)                                    |
 | MindSpore Version          | 1.1.0                                                          |
 | Dataset                    | AG's News Topic Classification Dataset                                |
-| Training Parameters        | epoch=5, batch_size=128                                        |
+| Training Parameters        | epoch=5, batch_size=512                                        |
 | Optimizer                  | Adam                                                           |
 | Loss Function              | Softmax Cross Entropy                                          |
 | outputs                    | probability                                                    |
-| Speed                      | 112ms/step (8pcs)                                              |
-| Total Time                 | 66s (8pcs)                                                   |
-| Loss                       | 0.00082                                                          |
+| Speed                      | 10ms/step (1pcs)                                              |
+| Epoch Time                 | 2.36s (1pcs)                                                   |
+| Loss                       | 0.0067                                                          |
 | Params (M)                 | 22                                                            |
 | Checkpoint for inference   | 254M (.ckpt file)                                              |
 | Scripts                    | [fasttext](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/fasttext) |
@ -184,13 +185,13 @@ Parameters for both training and evaluation can be set in config.py. All the dat
 | uploaded Date              | 11/21/2020 (month/day/year)                                    |
 | MindSpore Version          | 1.1.0                                                          |
 | Dataset                    | DBPedia Ontology Classification Dataset                                |
-| Training Parameters        | epoch=5, batch_size=128                                        |
+| Training Parameters        | epoch=5, batch_size=4096                                        |
 | Optimizer                  | Adam                                                           |
 | Loss Function              | Softmax Cross Entropy                                          |
 | outputs                    | probability                                                    |
-| Speed                      | 60ms/step (8pcs)                                              |
-| Total Time                 | 164s (8pcs)                                                   |
-| Loss                       | 2.6e-5                                                          |
+| Speed                      | 58ms/step (1pcs)                                              |
+| Epoch Time                 | 8.15s (1pcs)                                                   |
+| Loss                       | 2.6e-4                                                          |
 | Params (M)                 | 106                                                            |
 | Checkpoint for inference   | 1.2G (.ckpt file)                                              |
 | Scripts                    | [fasttext](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/fasttext) |
@ -201,13 +202,13 @@ Parameters for both training and evaluation can be set in config.py. All the dat
 | uploaded Date              | 11/21/2020 (month/day/year)                                    |
 | MindSpore Version          | 1.1.0                                                          |
 | Dataset                    | Yelp Review Polarity Dataset                                |
-| Training Parameters        | epoch=5, batch_size=128                                        |
+| Training Parameters        | epoch=5, batch_size=2048                                        |
 | Optimizer                  | Adam                                                           |
 | Loss Function              | Softmax Cross Entropy                                          |
 | outputs                    | probability                                                    |
-| Speed                      | 74ms/step (8pcs)                                              |
-| Total Time                 | 195s (8pcs)                                                   |
-| Loss                       | 7.7e-4                                                          |
+| Speed                      | 101ms/step (1pcs)                                              |
+| Epoch Time                 | 28s (1pcs)                                                   |
+| Loss                       | 0.062                                                          |
 | Params (M)                 | 103                                                            |
 | Checkpoint for inference   | 1.2G (.ckpt file)                                              |
 | Scripts                    | [fasttext](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/fasttext) |
@ -220,8 +221,8 @@ Parameters for both training and evaluation can be set in config.py. All the dat
 | Uploaded Date       | 12/21/2020 (month/day/year) |
 | MindSpore Version   | 1.1.0                       |
 | Dataset             | AG's News Topic Classification Dataset            |
-| batch_size          | 128                         |
-| Total Time          | 66s                       |
+| batch_size          | 512                         |
+| Epoch Time          | 2.36s                       |
 | outputs             | label index                 |
 | Accuracy            | 92.53                        |
 | Model for inference | 254M (.ckpt file)           |
@ -232,8 +233,8 @@ Parameters for both training and evaluation can be set in config.py. All the dat
 | Uploaded Date       | 12/21/2020 (month/day/year) |
 | MindSpore Version   | 1.1.0                       |
 | Dataset             | DBPedia Ontology Classification Dataset            |
-| batch_size          | 128                         |
-| Total Time          | 164s                          |
+| batch_size          | 4096                         |
+| Epoch Time          | 8.15s                          |
 | outputs             | label index                 |
 | Accuracy            | 98.6                        |
 | Model for inference | 1.2G (.ckpt file)           |
@ -244,8 +245,8 @@ Parameters for both training and evaluation can be set in config.py. All the dat
 | Uploaded Date       | 12/21/2020 (month/day/year) |
 | MindSpore Version   | 1.1.0                       |
 | Dataset             | Yelp Review Polarity Dataset            |
-| batch_size          | 128                         |
-| Total Time          | 195s                         |
+| batch_size          | 2048                         |
+| Epoch Time          | 28s                         |
 | outputs             | label index                 |
 | Accuracy            | 95.7                        |
 | Model for inference | 1.2G (.ckpt file)           |
--- a/model_zoo/official/nlp/fasttext/scripts/create_dataset.sh
+++ b/model_zoo/official/nlp/fasttext/scripts/create_dataset.sh
@ -32,7 +32,6 @@ SOURCE_DATASET_PATH=$(get_real_path $1)
 DATASET_NAME=$2

 export DEVICE_NUM=1
-export DEVICE_ID=5
 export RANK_ID=0
 export RANK_SIZE=1

--- a/model_zoo/official/nlp/fasttext/scripts/run_eval.sh
+++ b/model_zoo/official/nlp/fasttext/scripts/run_eval.sh
@ -32,8 +32,9 @@ DATASET=$(get_real_path $1)
 echo $DATASET
 DATANAME=$2
 MODEL_CKPT=$(get_real_path $3)
+DEVICEID=$4
 export DEVICE_NUM=1
-export DEVICE_ID=5
+export DEVICE_ID=$DEVICEID
 export RANK_ID=0
 export RANK_SIZE=1

--- a/model_zoo/official/nlp/fasttext/scripts/run_standalone_train.sh
+++ b/model_zoo/official/nlp/fasttext/scripts/run_standalone_train.sh
@ -32,9 +32,10 @@ DATASET=$(get_real_path $1)
 echo $DATASET
 DATANAME=$(basename $DATASET)
 echo $DATANAME
+DEVICEID=$2

 export DEVICE_NUM=1
-export DEVICE_ID=0
+export DEVICE_ID=$DEVICEID
 export RANK_ID=0
 export RANK_SIZE=1

--- a/model_zoo/official/nlp/fasttext/train.py
+++ b/model_zoo/official/nlp/fasttext/train.py
@ -141,12 +141,12 @@ def _build_training_pipeline(pre_dataset):
    callbacks = [time_monitor, loss_monitor]
    if rank_size is None or int(rank_size) == 1:
        ckpt_callback = ModelCheckpoint(prefix='fasttext',
-                                        directory=os.path.join('./', 'ckpe_{}'.format(os.getenv("DEVICE_ID"))),
+                                        directory=os.path.join('./', 'ckpt_{}'.format(os.getenv("DEVICE_ID"))),
                                        config=ckpt_config)
        callbacks.append(ckpt_callback)
    if rank_size is not None and int(rank_size) > 1 and MultiAscend.get_rank() % 8 == 0:
        ckpt_callback = ModelCheckpoint(prefix='fasttext',
-                                        directory=os.path.join('./', 'ckpe_{}'.format(os.getenv("DEVICE_ID"))),
+                                        directory=os.path.join('./', 'ckpt_{}'.format(os.getenv("DEVICE_ID"))),
                                        config=ckpt_config)
        callbacks.append(ckpt_callback)
    print("Prepare to Training....")