hide doc comment of the construct

2021-08-31 17:20:47 +08:00 · 2021-08-31 17:20:47 +08:00 · 75d16a4a97
parent 6248117b1c
commit 75d16a4a97
3 changed files with 11 additions and 23 deletions
--- a/mindspore/parallel/nn/op_parallel_config.py
+++ b/mindspore/parallel/nn/op_parallel_config.py
@ -145,10 +145,10 @@ def _check_config(config):

    # dp * pp * pipeline_stage <= device_num
    if config.data_parallel * config.model_parallel * pipeline_stage > device_num:
-        raise ValueError("The product of the data parallel {config.data_parallel},"
-                         "model parallel {config.model_parallel}"
-                         "pipeline stages {pipeline_stage}"
-                         "should be less than device_num {device_num}")
+        raise ValueError(f"The product of the data parallel {config.data_parallel}, "
+                         f"model parallel {config.model_parallel} "
+                         f"pipeline stages {pipeline_stage} "
+                         f"should be less than device_num {device_num}.")

    # the config optimizer_shard is same with context.optimizer_shard
    if hasattr(config, "optimizer_shard") and optimizer_shard and optimizer_shard != config.optimizer_shard:
@ -160,5 +160,5 @@ def _check_config(config):
    if hasattr(config, 'pipeline_stage') and hasattr(config, 'micro_batch_num')\
            and config.pipeline_stage < config.micro_batch_num:
        raise ValueError(
-            f"The pipeline stage {config.pipeline_stage} should be greater than the micro_batch_num"
+            f"The pipeline stage {config.pipeline_stage} should be greater than the micro_batch_num "
            f"{config.micro_batch_num}.")
--- a/mindspore/parallel/nn/transformer.py
+++ b/mindspore/parallel/nn/transformer.py
@ -680,21 +680,6 @@ class MultiHeadAttention(Cell):

    def construct(self, query_tensor, key_tensor, value_tensor, attention_mask, key_past=None,
                  value_past=None, batch_valid_length=None):
-        """
-        multi head attention
-
-        Inputs:
-            from_tensor: output of previous layer
-            attention_mask: the attention mask matrix with shape (batch_size,
-            seq_length, seq_length)
-            key_past: previous saved key state
-            value_past: previous saved value state
-            batch_valid_length: the valid input seq_length without padding
-
-        Returns:
-            output: Tensor, the output logits of this layer
-            layer_present: Tensor, the feature map of current layer
-        """
        self._check_inputs(query_tensor, key_tensor, value_tensor, attention_mask, key_past,
                           value_past, batch_valid_length)
        query_tensor_original_shape = F.shape(query_tensor)
--- a/model_zoo/official/nlp/pangu_alpha/scripts/run_distribute_train_gpu.sh
+++ b/model_zoo/official/nlp/pangu_alpha/scripts/run_distribute_train_gpu.sh
@ -16,8 +16,8 @@

 echo "=============================================================================================================="
 echo "Please run the script as: "
-echo "bash run_distributed_train_gpu.sh RANK_SIZE HOSTFILE DATASET MODE"
-echo "for example: bash run_distributed_train_gpu.sh 16 hostfile_16p /mass_dataset/train_data/ 2.6B"
+echo "bash run_distributed_train_gpu.sh RANK_SIZE HOSTFILE DATASET PER_BATCH_SIZE MODE"
+echo "for example: bash run_distributed_train_gpu.sh 16 hostfile_16p /mass_dataset/train_data/ 16 2.6B"
 echo "It is better to use absolute path."
 echo "=============================================================================================================="

@ -26,7 +26,9 @@ self_path=$(dirname "${script_self}")
 RANK_SIZE=$1
 HOSTFILE=$2
 DATASET=$3
-MODE=$4
+PER_BATCH_SIZE=$4
+MODE=$5
+

 mpirun --allow-run-as-root -x PATH -x LD_LIBRARY_PATH -x PYTHONPATH -x NCCL_DEBUG -x GLOG_v -n $RANK_SIZE --hostfile $HOSTFILE --output-filename log_output --merge-stderr-to-stdout \
    python -s ${self_path}/../train.py  \
@ -35,4 +37,5 @@ mpirun --allow-run-as-root -x PATH -x LD_LIBRARY_PATH -x PYTHONPATH -x NCCL_DEBU
      --device_target="GPU"             \
      --data_url=$DATASET               \
      --mode=$MODE                      \
+      --per_batch_size=$PER_BATCH_SIZE  \
      --run_type=train > train_log.txt 2>&1 &