imporve convergence of loss in bert

2021-03-18 10:37:29 +08:00 · 2021-03-18 10:37:29 +08:00 · 15d37e5db9
parent 4e1e16c335
commit 15d37e5db9
4 changed files with 8 additions and 6 deletions
--- a/model_zoo/official/cv/ssd/README.md
+++ b/model_zoo/official/cv/ssd/README.md
@ -317,8 +317,9 @@ You can train your own model based on either pretrained classification model or

 1. Convert your own dataset to COCO or VOC style. Otherwise you have to add your own data preprocess code.
 2. Change config.py according to your own dataset, especially the `num_classes`.
-3. Set argument `filter_weight` to `True` while calling `train.py`, this will filter the final detection box weight from the pretrained model.
-4. Build your own bash scripts using new config and arguments for further convenient.
+3. Prepare a pretrained checkpoint. You can load the pretrained checkpoint by `pre_trained` argument. Transfer training means a new training job, so just keep `pre_trained_epoch_size`  same as default value `0`.
+4. Set argument `filter_weight` to `True` while calling `train.py`, this will filter the final detection box weight from the pretrained model.
+5. Build your own bash scripts using new config and arguments for further convenient.

 ### [Evaluation Process](#contents)

--- a/model_zoo/official/nlp/bert/src/bert_for_pre_training.py
+++ b/model_zoo/official/nlp/bert/src/bert_for_pre_training.py
@ -599,7 +599,7 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
            scaling_sens = sens
        # alloc status and clear should be right before gradoperation
        init = self.alloc_status()
-        init = F.depend(loss, init)
+        init = F.depend(init, loss)
        clear_status = self.clear_status(init)
        scaling_sens = F.depend(scaling_sens, clear_status)
        # update accumulation parameters
--- a/model_zoo/official/nlp/bert/src/bert_model.py
+++ b/model_zoo/official/nlp/bert/src/bert_model.py
@ -804,7 +804,8 @@ class BertModel(nn.Cell):
        self.bert_embedding_lookup = nn.Embedding(
            vocab_size=config.vocab_size,
            embedding_size=self.embedding_size,
-            use_one_hot=use_one_hot_embeddings)
+            use_one_hot=use_one_hot_embeddings,
+            embedding_table=TruncatedNormal(config.initializer_range))

        self.bert_embedding_postprocessor = EmbeddingPostprocessor(
            embedding_size=self.embedding_size,
--- a/model_zoo/official/nlp/bert/src/config.py
+++ b/model_zoo/official/nlp/bert/src/config.py
@ -36,9 +36,9 @@ cfg = edict({
        'warmup_steps': 10000,
    }),
    'Lamb': edict({
-        'learning_rate': 3e-5,
+        'learning_rate': 3e-4,
        'end_learning_rate': 0.0,
-        'power': 5.0,
+        'power': 2.0,
        'warmup_steps': 10000,
        'weight_decay': 0.01,
        'decay_filter': lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower(),