diff --git a/model_zoo/official/cv/crnn/README.md b/model_zoo/official/cv/crnn/README.md index 9285f90ff1e..216bf4f2879 100644 --- a/model_zoo/official/cv/crnn/README.md +++ b/model_zoo/official/cv/crnn/README.md @@ -43,7 +43,7 @@ We use five datasets mentioned in the paper.For training, we use the synthetic d ### [Dataset Prepare](#content) -For datset `IC03`, `IIIT5k` and `SVT`, the original dataset from the official website can not be used directly in CRNN. +For dataset `IC03`, `IIIT5k` and `SVT`, the original dataset from the official website can not be used directly in CRNN. - `IC03`, the text need to be cropped from the original image according to the words.xml. - `IIIT5k`, the annotation need to be extracted from the matlib data file. diff --git a/model_zoo/official/nlp/bert/README.md b/model_zoo/official/nlp/bert/README.md index 41ce5e40c05..e91dbbda2d4 100644 --- a/model_zoo/official/nlp/bert/README.md +++ b/model_zoo/official/nlp/bert/README.md @@ -1,5 +1,7 @@ # Contents +[查看中文](./README_CN.md) + - [Contents](#contents) - [BERT Description](#bert-description) - [Model Architecture](#model-architecture) @@ -197,7 +199,7 @@ For example, the schema file of cn-wiki-128 dataset for pretraining shows as fol ├─scripts ├─ascend_distributed_launcher ├─__init__.py - ├─hyper_parameter_config.ini # hyper paramter for distributed pretraining + ├─hyper_parameter_config.ini # hyper parameter for distributed pretraining ├─get_distribute_pretrain_cmd.py # script for distributed pretraining ├─README.md ├─run_classifier.sh # shell script for standalone classifier task on ascend or gpu @@ -247,7 +249,7 @@ usage: run_pretrain.py [--distribute DISTRIBUTE] [--epoch_size N] [----device_n options: --device_target device where the code will be implemented: "Ascend" | "GPU", default is "Ascend" - --distribute pre_training by serveral devices: "true"(training by more than 1 device) | "false", default is "false" + --distribute pre_training by several devices: "true"(training by more than 1 device) | "false", default is "false" --epoch_size epoch size: N, default is 1 --device_num number of used devices: N, default is 1 --device_id device id: N, default is 0 @@ -380,7 +382,7 @@ config for lossscale and etc. ```text Parameters for dataset and network (Pre-Training/Fine-Tuning/Evaluation): seq_length length of input sequence: N, default is 128 - vocab_size size of each embedding vector: N, must be consistant with the dataset you use. Default is 21128. + vocab_size size of each embedding vector: N, must be consistent with the dataset you use. Default is 21128. Usually, we use 21128 for CN vocabs and 30522 for EN vocabs according to the origin paper. hidden_size size of bert encoder layers: N, default is 768 num_hidden_layers number of hidden layers: N, default is 12 @@ -433,8 +435,8 @@ The command above will run in the background, you can view training logs in pret ```text # grep "epoch" pretraining_log.txt -epoch: 0.0, current epoch percent: 0.000, step: 1, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.0856101e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) -epoch: 0.0, current epoch percent: 0.000, step: 2, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.0821701e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.000, step: 1, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.0856101e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.000, step: 2, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.0821701e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) ... ``` @@ -448,8 +450,8 @@ The command above will run in the background, you can view the results the file ```bash # grep "epoch" pretraining_log.txt -epoch: 0.0, current epoch percent: 0.000, step: 1, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.0856101e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) -epoch: 0.0, current epoch percent: 0.000, step: 2, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.0821701e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.000, step: 1, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.0856101e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.000, step: 2, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.0821701e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) ... ``` @@ -478,11 +480,11 @@ The command above will run in the background, you can view training logs in pret ```bash # grep "epoch" LOG*/pretraining_log.txt -epoch: 0.0, current epoch percent: 0.001, step: 100, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.08209e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) -epoch: 0.0, current epoch percent: 0.002, step: 200, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.07566e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.001, step: 100, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.08209e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.002, step: 200, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.07566e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) ... -epoch: 0.0, current epoch percent: 0.001, step: 100, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.08218e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) -epoch: 0.0, current epoch percent: 0.002, step: 200, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.07770e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.001, step: 100, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.08218e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.002, step: 200, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.07770e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) ... ``` @@ -496,11 +498,11 @@ The command above will run in the background, you can view the results the file ```bash # grep "epoch" LOG*/pretraining_log.txt -epoch: 0.0, current epoch percent: 0.001, step: 100, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.08209e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) -epoch: 0.0, current epoch percent: 0.002, step: 200, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.07566e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.001, step: 100, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.08209e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.002, step: 200, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.07566e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) ... -epoch: 0.0, current epoch percent: 0.001, step: 100, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.08218e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) -epoch: 0.0, current epoch percent: 0.002, step: 200, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.07770e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.001, step: 100, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.08218e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.002, step: 200, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.07770e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) ... ``` diff --git a/model_zoo/official/nlp/bert/README_CN.md b/model_zoo/official/nlp/bert/README_CN.md index 02c142fd972..8ae3d576bb7 100644 --- a/model_zoo/official/nlp/bert/README_CN.md +++ b/model_zoo/official/nlp/bert/README_CN.md @@ -1,6 +1,8 @@ # 目录 +[View English](./README.md) + - [目录](#目录) @@ -430,8 +432,8 @@ bash scripts/run_standalone_pretrain_ascend.sh 0 1 /path/cn-wiki-128 ```text # grep "epoch" pretraining_log.txt -epoch: 0.0, current epoch percent: 0.000, step: 1, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.0856101e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) -epoch: 0.0, current epoch percent: 0.000, step: 2, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.0821701e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.000, step: 1, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.0856101e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.000, step: 2, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.0821701e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) ... ``` @@ -460,11 +462,11 @@ bash scripts/run_distributed_pretrain_ascend.sh /path/cn-wiki-128 /path/hccl.jso ```text # grep "epoch" LOG*/pretraining_log.txt -epoch: 0.0, current epoch percent: 0.001, step: 100, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.08209e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) -epoch: 0.0, current epoch percent: 0.002, step: 200, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.07566e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.001, step: 100, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.08209e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.002, step: 200, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.07566e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) ... -epoch: 0.0, current epoch percent: 0.001, step: 100, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.08218e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) -epoch: 0.0, current epoch percent: 0.002, step: 200, outpus are (Tensor(shape=[1], dtype=Float32, [ 1.07770e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.001, step: 100, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.08218e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) +epoch: 0.0, current epoch percent: 0.002, step: 200, outputs are (Tensor(shape=[1], dtype=Float32, [ 1.07770e+01]), Tensor(shape=[], dtype=Bool, False), Tensor(shape=[], dtype=Float32, 65536)) ... ``` diff --git a/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/get_distribute_pretrain_cmd.py b/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/get_distribute_pretrain_cmd.py index b4338eeaa58..c20b98787c8 100644 --- a/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/get_distribute_pretrain_cmd.py +++ b/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/get_distribute_pretrain_cmd.py @@ -57,9 +57,51 @@ def append_cmd(cmd, s): cmd += "\n" return cmd + def append_cmd_env(cmd, key, value): return append_cmd(cmd, "export " + str(key) + "=" + str(value)) + +def set_envs(cmd, logic_id, rank_id): + """ + Set environment variables. + """ + cmd = append_cmd_env(cmd, "DEVICE_ID", str(logic_id)) + cmd = append_cmd_env(cmd, "RANK_ID", str(rank_id)) + cmd = append_cmd_env(cmd, "DEPLOY_MODE", '0') + cmd = append_cmd_env(cmd, "GE_USE_STATIC_MEMORY", '1') + return cmd + + +def make_dirs(cmd, logic_id): + """ + Make directories and change path. + """ + cmd = append_cmd(cmd, "rm -rf LOG" + str(logic_id)) + cmd = append_cmd(cmd, "mkdir ./LOG" + str(logic_id)) + cmd = append_cmd(cmd, "cp *.py ./LOG" + str(logic_id)) + cmd = append_cmd(cmd, "mkdir -p ./LOG" + str(logic_id) + "/ms_log") + cmd = append_cmd(cmd, "env > ./LOG" + str(logic_id) + "/env.log") + cur_dir = os.getcwd() + cmd = append_cmd_env(cmd, "GLOG_log_dir", cur_dir + "/LOG" + str(logic_id) + "/ms_log") + cmd = append_cmd_env(cmd, "GLOG_logtostderr", "0") + cmd = append_cmd(cmd, "cd " + cur_dir + "/LOG" + str(logic_id)) + return cmd + + +def print_info(rank_id, device_id, logic_id, cmdopt, epoch_size, data_dir, cur_dir): + """ + Print some information about scripts. + """ + print("\nstart training for rank " + str(rank_id) + ", device " + str(device_id) + ":") + print("rank_id:", rank_id) + print("device_id:", device_id) + print("logic_id", logic_id) + print("core_nums:", cmdopt) + print("epoch_size:", epoch_size) + print("data_dir:", data_dir) + print("log_file_dir: " + cur_dir + "/LOG" + str(logic_id) + "/pretraining_log.txt") + def distribute_pretrain(): """ distribute pretrain scripts. The number of Ascend accelerators can be automatically allocated @@ -116,42 +158,22 @@ def distribute_pretrain(): count = 0 for instance in this_server["device"]: - # device_id is the physical id, we use logic id to sepcific the selected device. + # device_id is the physical id, we use logic id to specific the selected device. # While running on a server with 8 pcs, the logic ids are equal to the device ids. device_id = instance["device_id"] rank_id = instance["rank_id"] logic_id = physic_logic_ids[device_id] - print("\nstart training for rank " + str(rank_id) + ", device " + str(device_id) + ":") - print("rank_id:", rank_id) - print("device_id:", device_id) - print("logic_id", logic_id) - start = count * int(avg_core_per_rank) count += 1 end = start + core_gap cmdopt = str(start) + "-" + str(end) - - cmd = append_cmd_env(cmd, "DEVICE_ID", str(logic_id)) - cmd = append_cmd_env(cmd, "RANK_ID", str(rank_id)) - cmd = append_cmd_env(cmd, "DEPLOY_MODE", '0') - cmd = append_cmd_env(cmd, "GE_USE_STATIC_MEMORY", '1') - - cmd = append_cmd(cmd, "rm -rf LOG" + str(logic_id)) - cmd = append_cmd(cmd, "mkdir ./LOG" + str(logic_id)) - cmd = append_cmd(cmd, "cp *.py ./LOG" + str(logic_id)) - cmd = append_cmd(cmd, "mkdir -p ./LOG" + str(logic_id) + "/ms_log") - cmd = append_cmd(cmd, "env > ./LOG" + str(logic_id) + "/env.log") - cur_dir = os.getcwd() - cmd = append_cmd_env(cmd, "GLOG_log_dir", cur_dir + "/LOG" + str(logic_id) + "/ms_log") - cmd = append_cmd_env(cmd, "GLOG_logtostderr", "0") - print("core_nums:", cmdopt) - print("epoch_size:", str(cfg['epoch_size'])) - print("data_dir:", data_dir) - print("log_file_dir: " + cur_dir + "/LOG" + str(logic_id) + "/pretraining_log.txt") + cmd = set_envs(cmd, logic_id, rank_id) + cmd = make_dirs(cmd, logic_id) - cmd = append_cmd(cmd, "cd " + cur_dir + "/LOG" + str(logic_id)) + print_info(rank_id=rank_id, device_id=device_id, logic_id=logic_id, cmdopt=cmdopt, cur_dir=cur_dir, + epoch_size=str(cfg['epoch_size']), data_dir=data_dir) run_cmd = 'taskset -c ' + cmdopt + ' nohup python ' + run_script + " " opt = " ".join(["--" + key + "=" + str(cfg[key]) for key in cfg.keys()]) diff --git a/model_zoo/official/nlp/bert/scripts/run_classifier.sh b/model_zoo/official/nlp/bert/scripts/run_classifier.sh index 70b3f7d80b8..f967397c90c 100644 --- a/model_zoo/official/nlp/bert/scripts/run_classifier.sh +++ b/model_zoo/official/nlp/bert/scripts/run_classifier.sh @@ -15,7 +15,7 @@ # ============================================================================ echo "==============================================================================================================" -echo "Please run the scipt as: " +echo "Please run the script as: " echo "bash scripts/run_classifier.sh" echo "for example: bash scripts/run_classifier.sh" echo "assessment_method include: [MCC, Spearman_correlation ,Accuracy]" diff --git a/model_zoo/official/nlp/bert/scripts/run_distributed_pretrain_ascend.sh b/model_zoo/official/nlp/bert/scripts/run_distributed_pretrain_ascend.sh index d69190af81f..ef5a75f289f 100644 --- a/model_zoo/official/nlp/bert/scripts/run_distributed_pretrain_ascend.sh +++ b/model_zoo/official/nlp/bert/scripts/run_distributed_pretrain_ascend.sh @@ -15,7 +15,7 @@ # ============================================================================ echo "==============================================================================================================" -echo "Please run the scipt as: " +echo "Please run the script as: " echo "bash run_distributed_pretrain_ascend.sh DATA_DIR RANK_TABLE_FILE" echo "for example: bash run_distributed_pretrain_ascend.sh /path/dataset /path/hccl.json" echo "It is better to use absolute path." diff --git a/model_zoo/official/nlp/bert/scripts/run_distributed_pretrain_for_gpu.sh b/model_zoo/official/nlp/bert/scripts/run_distributed_pretrain_for_gpu.sh index ff54a331dbe..8195ff6ae9c 100644 --- a/model_zoo/official/nlp/bert/scripts/run_distributed_pretrain_for_gpu.sh +++ b/model_zoo/official/nlp/bert/scripts/run_distributed_pretrain_for_gpu.sh @@ -15,7 +15,7 @@ # ============================================================================ echo "==============================================================================================================" -echo "Please run the scipt as: " +echo "Please run the script as: " echo "bash run_distributed_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_DIR SCHEMA_DIR" echo "for example: bash run_distributed_pretrain.sh 8 40 /path/zh-wiki/ /path/Schema.json" echo "It is better to use absolute path." diff --git a/model_zoo/official/nlp/bert/scripts/run_ner.sh b/model_zoo/official/nlp/bert/scripts/run_ner.sh index 3a4c3fe47c3..9bb8b78370d 100644 --- a/model_zoo/official/nlp/bert/scripts/run_ner.sh +++ b/model_zoo/official/nlp/bert/scripts/run_ner.sh @@ -15,7 +15,7 @@ # ============================================================================ echo "==============================================================================================================" -echo "Please run the scipt as: " +echo "Please run the script as: " echo "bash scripts/run_ner.sh" echo "for example: bash scripts/run_ner.sh" echo "assessment_method include: [F1, clue_benchmark]" diff --git a/model_zoo/official/nlp/bert/scripts/run_squad.sh b/model_zoo/official/nlp/bert/scripts/run_squad.sh index a4d4606af87..241baa152cc 100644 --- a/model_zoo/official/nlp/bert/scripts/run_squad.sh +++ b/model_zoo/official/nlp/bert/scripts/run_squad.sh @@ -15,7 +15,7 @@ # ============================================================================ echo "==============================================================================================================" -echo "Please run the scipt as: " +echo "Please run the script as: " echo "bash scripts/run_squad.sh" echo "for example: bash scripts/run_squad.sh" echo "assessment_method include: [Accuracy]" diff --git a/model_zoo/official/nlp/bert/scripts/run_standalone_pretrain_ascend.sh b/model_zoo/official/nlp/bert/scripts/run_standalone_pretrain_ascend.sh index 1cbfababfde..a4830b2d843 100644 --- a/model_zoo/official/nlp/bert/scripts/run_standalone_pretrain_ascend.sh +++ b/model_zoo/official/nlp/bert/scripts/run_standalone_pretrain_ascend.sh @@ -15,7 +15,7 @@ # ============================================================================ echo "==============================================================================================================" -echo "Please run the scipt as: " +echo "Please run the script as: " echo "bash run_standalone_pretrain_ascend.sh DEVICE_ID EPOCH_SIZE DATA_DIR SCHEMA_DIR" echo "for example: bash run_standalone_pretrain_ascend.sh 0 40 /path/zh-wiki/ /path/Schema.json" echo "==============================================================================================================" diff --git a/model_zoo/official/nlp/bert/scripts/run_standalone_pretrain_for_gpu.sh b/model_zoo/official/nlp/bert/scripts/run_standalone_pretrain_for_gpu.sh index bd42ebf744b..c2d01889f29 100644 --- a/model_zoo/official/nlp/bert/scripts/run_standalone_pretrain_for_gpu.sh +++ b/model_zoo/official/nlp/bert/scripts/run_standalone_pretrain_for_gpu.sh @@ -15,7 +15,7 @@ # ============================================================================ echo "==============================================================================================================" -echo "Please run the scipt as: " +echo "Please run the script as: " echo "bash run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR SCHEMA_DIR" echo "for example: bash run_standalone_pretrain.sh 0 40 /path/zh-wiki/ /path/Schema.json" echo "==============================================================================================================" diff --git a/model_zoo/official/nlp/bert/src/CRF.py b/model_zoo/official/nlp/bert/src/CRF.py index ef6cb412973..26685b1bcaf 100644 --- a/model_zoo/official/nlp/bert/src/CRF.py +++ b/model_zoo/official/nlp/bert/src/CRF.py @@ -30,7 +30,7 @@ class CRF(nn.Cell): Args: tag_to_index: The dict for tag to index mapping with extra "" and ""sign. batch_size: Batch size, i.e., the length of the first dimension. - seq_length: Sequence length, i.e., the length of the second dimention. + seq_length: Sequence length, i.e., the length of the second dimension. is_training: Specifies whether to use training mode. Returns: Training mode: Tensor, total loss. diff --git a/model_zoo/official/nlp/bert/src/bert_for_finetune.py b/model_zoo/official/nlp/bert/src/bert_for_finetune.py index d8b37da5796..03bd3a1535b 100644 --- a/model_zoo/official/nlp/bert/src/bert_for_finetune.py +++ b/model_zoo/official/nlp/bert/src/bert_for_finetune.py @@ -49,7 +49,7 @@ def _tensor_grad_overflow(grad): class BertFinetuneCell(nn.Cell): """ - Especifically defined for finetuning where only four inputs tensor are needed. + Especially defined for finetuning where only four inputs tensor are needed. Append an optimizer to the training network after that the construct function can be called to create the backward graph. diff --git a/model_zoo/official/nlp/bert/src/finetune_eval_model.py b/model_zoo/official/nlp/bert/src/finetune_eval_model.py index 7ee101438c8..b3f4c201415 100644 --- a/model_zoo/official/nlp/bert/src/finetune_eval_model.py +++ b/model_zoo/official/nlp/bert/src/finetune_eval_model.py @@ -26,7 +26,7 @@ class BertCLSModel(nn.Cell): """ This class is responsible for classification task evaluation, i.e. XNLI(num_labels=3), LCQMC(num_labels=2), Chnsenti(num_labels=2). The returned output represents the final - logits as the results of log_softmax is propotional to that of softmax. + logits as the results of log_softmax is proportional to that of softmax. """ def __init__(self, config, is_training, num_labels=2, dropout_prob=0.0, use_one_hot_embeddings=False, assessment_method=""): @@ -87,7 +87,7 @@ class BertSquadModel(nn.Cell): class BertNERModel(nn.Cell): """ This class is responsible for sequence labeling task evaluation, i.e. NER(num_labels=11). - The returned output represents the final logits as the results of log_softmax is propotional to that of softmax. + The returned output represents the final logits as the results of log_softmax is proportional to that of softmax. """ def __init__(self, config, is_training, num_labels=11, use_crf=False, dropout_prob=0.0, use_one_hot_embeddings=False): diff --git a/model_zoo/official/nlp/bert/src/tokenization.py b/model_zoo/official/nlp/bert/src/tokenization.py index d437843f0b5..49b11279a8a 100644 --- a/model_zoo/official/nlp/bert/src/tokenization.py +++ b/model_zoo/official/nlp/bert/src/tokenization.py @@ -289,7 +289,7 @@ class WordpieceTokenizer(): def _is_whitespace(char): """Checks whether `chars` is a whitespace character.""" - # \t, \n, and \r are technically contorl characters but we treat them + # \t, \n, and \r are technically control characters but we treat them # as whitespace since they are generally considered as such. whitespace_char = [" ", "\t", "\n", "\r"] if char in whitespace_char: diff --git a/model_zoo/official/nlp/bert/src/utils.py b/model_zoo/official/nlp/bert/src/utils.py index 94ce01b17b0..e03d221cc2f 100644 --- a/model_zoo/official/nlp/bert/src/utils.py +++ b/model_zoo/official/nlp/bert/src/utils.py @@ -63,7 +63,7 @@ def make_directory(path: str): """Make directory.""" if path is None or not isinstance(path, str) or path.strip() == "": logger.error("The path(%r) is invalid type.", path) - raise TypeError("Input path is invaild type") + raise TypeError("Input path is invalid type") # convert the relative paths path = os.path.realpath(path)