diff --git a/model_zoo/official/nlp/fasttext/README.md b/model_zoo/official/nlp/fasttext/README.md index 77a4095ed8c..682dedc3497 100644 --- a/model_zoo/official/nlp/fasttext/README.md +++ b/model_zoo/official/nlp/fasttext/README.md @@ -64,13 +64,13 @@ After dataset preparation, you can start training and evaluation as follows: ```bash # run training example cd ./scripts -sh run_standalone_train.sh [TRAIN_DATASET] +sh run_standalone_train.sh [TRAIN_DATASET] [DEVICEID] # run distributed training example sh run_distribute_train.sh [TRAIN_DATASET] [RANK_TABLE_PATH] # run evaluation example -sh run_eval.sh [EVAL_DATASET_PATH] [DATASET_NAME] [MODEL_CKPT] +sh run_eval.sh [EVAL_DATASET_PATH] [DATASET_NAME] [MODEL_CKPT] [DEVICEID] ``` # [Script Description](#content) @@ -116,6 +116,7 @@ Parameters for both training and evaluation can be set in config.py. All the dat ```text vocab_size # vocabulary size. buckets # bucket sequence length. + test_buckets # test dataset bucket sequence length batch_size # batch size of input dataset. embedding_dims # The size of each embedding vector. num_class # number of labels. @@ -134,7 +135,7 @@ Parameters for both training and evaluation can be set in config.py. All the dat ```bash cd ./scripts - sh run_standalone_train.sh [DATASET_PATH] + sh run_standalone_train.sh [DATASET_PATH] [DEVICEID] ``` - Running scripts for distributed training of FastText. Task training on multiple device and run the following command in bash to be executed in `scripts/`: @@ -150,7 +151,7 @@ Parameters for both training and evaluation can be set in config.py. All the dat ``` bash cd ./scripts - sh run_eval.sh [DATASET_PATH] [DATASET_NAME] [MODEL_CKPT] + sh run_eval.sh [DATASET_PATH] [DATASET_NAME] [MODEL_CKPT] [DEVICEID] ``` Note: The `DATASET_PATH` is path to mindrecord. eg. /dataset_path/*.mindrecord @@ -167,13 +168,13 @@ Parameters for both training and evaluation can be set in config.py. All the dat | uploaded Date | 12/21/2020 (month/day/year) | | MindSpore Version | 1.1.0 | | Dataset | AG's News Topic Classification Dataset | -| Training Parameters | epoch=5, batch_size=128 | +| Training Parameters | epoch=5, batch_size=512 | | Optimizer | Adam | | Loss Function | Softmax Cross Entropy | | outputs | probability | -| Speed | 112ms/step (8pcs) | -| Total Time | 66s (8pcs) | -| Loss | 0.00082 | +| Speed | 10ms/step (1pcs) | +| Epoch Time | 2.36s (1pcs) | +| Loss | 0.0067 | | Params (M) | 22 | | Checkpoint for inference | 254M (.ckpt file) | | Scripts | [fasttext](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/fasttext) | @@ -184,13 +185,13 @@ Parameters for both training and evaluation can be set in config.py. All the dat | uploaded Date | 11/21/2020 (month/day/year) | | MindSpore Version | 1.1.0 | | Dataset | DBPedia Ontology Classification Dataset | -| Training Parameters | epoch=5, batch_size=128 | +| Training Parameters | epoch=5, batch_size=4096 | | Optimizer | Adam | | Loss Function | Softmax Cross Entropy | | outputs | probability | -| Speed | 60ms/step (8pcs) | -| Total Time | 164s (8pcs) | -| Loss | 2.6e-5 | +| Speed | 58ms/step (1pcs) | +| Epoch Time | 8.15s (1pcs) | +| Loss | 2.6e-4 | | Params (M) | 106 | | Checkpoint for inference | 1.2G (.ckpt file) | | Scripts | [fasttext](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/fasttext) | @@ -201,13 +202,13 @@ Parameters for both training and evaluation can be set in config.py. All the dat | uploaded Date | 11/21/2020 (month/day/year) | | MindSpore Version | 1.1.0 | | Dataset | Yelp Review Polarity Dataset | -| Training Parameters | epoch=5, batch_size=128 | +| Training Parameters | epoch=5, batch_size=2048 | | Optimizer | Adam | | Loss Function | Softmax Cross Entropy | | outputs | probability | -| Speed | 74ms/step (8pcs) | -| Total Time | 195s (8pcs) | -| Loss | 7.7e-4 | +| Speed | 101ms/step (1pcs) | +| Epoch Time | 28s (1pcs) | +| Loss | 0.062 | | Params (M) | 103 | | Checkpoint for inference | 1.2G (.ckpt file) | | Scripts | [fasttext](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/fasttext) | @@ -220,8 +221,8 @@ Parameters for both training and evaluation can be set in config.py. All the dat | Uploaded Date | 12/21/2020 (month/day/year) | | MindSpore Version | 1.1.0 | | Dataset | AG's News Topic Classification Dataset | -| batch_size | 128 | -| Total Time | 66s | +| batch_size | 512 | +| Epoch Time | 2.36s | | outputs | label index | | Accuracy | 92.53 | | Model for inference | 254M (.ckpt file) | @@ -232,8 +233,8 @@ Parameters for both training and evaluation can be set in config.py. All the dat | Uploaded Date | 12/21/2020 (month/day/year) | | MindSpore Version | 1.1.0 | | Dataset | DBPedia Ontology Classification Dataset | -| batch_size | 128 | -| Total Time | 164s | +| batch_size | 4096 | +| Epoch Time | 8.15s | | outputs | label index | | Accuracy | 98.6 | | Model for inference | 1.2G (.ckpt file) | @@ -244,8 +245,8 @@ Parameters for both training and evaluation can be set in config.py. All the dat | Uploaded Date | 12/21/2020 (month/day/year) | | MindSpore Version | 1.1.0 | | Dataset | Yelp Review Polarity Dataset | -| batch_size | 128 | -| Total Time | 195s | +| batch_size | 2048 | +| Epoch Time | 28s | | outputs | label index | | Accuracy | 95.7 | | Model for inference | 1.2G (.ckpt file) | diff --git a/model_zoo/official/nlp/fasttext/scripts/create_dataset.sh b/model_zoo/official/nlp/fasttext/scripts/create_dataset.sh index 555ba613699..4573b438b9f 100644 --- a/model_zoo/official/nlp/fasttext/scripts/create_dataset.sh +++ b/model_zoo/official/nlp/fasttext/scripts/create_dataset.sh @@ -32,7 +32,6 @@ SOURCE_DATASET_PATH=$(get_real_path $1) DATASET_NAME=$2 export DEVICE_NUM=1 -export DEVICE_ID=5 export RANK_ID=0 export RANK_SIZE=1 diff --git a/model_zoo/official/nlp/fasttext/scripts/run_eval.sh b/model_zoo/official/nlp/fasttext/scripts/run_eval.sh index c44d1cc5f4b..85b90f4fb20 100644 --- a/model_zoo/official/nlp/fasttext/scripts/run_eval.sh +++ b/model_zoo/official/nlp/fasttext/scripts/run_eval.sh @@ -32,8 +32,9 @@ DATASET=$(get_real_path $1) echo $DATASET DATANAME=$2 MODEL_CKPT=$(get_real_path $3) +DEVICEID=$4 export DEVICE_NUM=1 -export DEVICE_ID=5 +export DEVICE_ID=$DEVICEID export RANK_ID=0 export RANK_SIZE=1 diff --git a/model_zoo/official/nlp/fasttext/scripts/run_standalone_train.sh b/model_zoo/official/nlp/fasttext/scripts/run_standalone_train.sh index eeace93d012..fd62e6c410e 100644 --- a/model_zoo/official/nlp/fasttext/scripts/run_standalone_train.sh +++ b/model_zoo/official/nlp/fasttext/scripts/run_standalone_train.sh @@ -32,9 +32,10 @@ DATASET=$(get_real_path $1) echo $DATASET DATANAME=$(basename $DATASET) echo $DATANAME +DEVICEID=$2 export DEVICE_NUM=1 -export DEVICE_ID=0 +export DEVICE_ID=$DEVICEID export RANK_ID=0 export RANK_SIZE=1 diff --git a/model_zoo/official/nlp/fasttext/train.py b/model_zoo/official/nlp/fasttext/train.py index 2f93bb9cd3b..a399502af2e 100644 --- a/model_zoo/official/nlp/fasttext/train.py +++ b/model_zoo/official/nlp/fasttext/train.py @@ -141,12 +141,12 @@ def _build_training_pipeline(pre_dataset): callbacks = [time_monitor, loss_monitor] if rank_size is None or int(rank_size) == 1: ckpt_callback = ModelCheckpoint(prefix='fasttext', - directory=os.path.join('./', 'ckpe_{}'.format(os.getenv("DEVICE_ID"))), + directory=os.path.join('./', 'ckpt_{}'.format(os.getenv("DEVICE_ID"))), config=ckpt_config) callbacks.append(ckpt_callback) if rank_size is not None and int(rank_size) > 1 and MultiAscend.get_rank() % 8 == 0: ckpt_callback = ModelCheckpoint(prefix='fasttext', - directory=os.path.join('./', 'ckpe_{}'.format(os.getenv("DEVICE_ID"))), + directory=os.path.join('./', 'ckpt_{}'.format(os.getenv("DEVICE_ID"))), config=ckpt_config) callbacks.append(ckpt_callback) print("Prepare to Training....")