hide doc comment of the construct

This commit is contained in:
huangxinjing 2021-08-31 17:20:47 +08:00
parent 6248117b1c
commit 75d16a4a97
3 changed files with 11 additions and 23 deletions

View File

@ -145,10 +145,10 @@ def _check_config(config):
# dp * pp * pipeline_stage <= device_num
if config.data_parallel * config.model_parallel * pipeline_stage > device_num:
raise ValueError("The product of the data parallel {config.data_parallel},"
"model parallel {config.model_parallel}"
"pipeline stages {pipeline_stage}"
"should be less than device_num {device_num}")
raise ValueError(f"The product of the data parallel {config.data_parallel}, "
f"model parallel {config.model_parallel} "
f"pipeline stages {pipeline_stage} "
f"should be less than device_num {device_num}.")
# the config optimizer_shard is same with context.optimizer_shard
if hasattr(config, "optimizer_shard") and optimizer_shard and optimizer_shard != config.optimizer_shard:
@ -160,5 +160,5 @@ def _check_config(config):
if hasattr(config, 'pipeline_stage') and hasattr(config, 'micro_batch_num')\
and config.pipeline_stage < config.micro_batch_num:
raise ValueError(
f"The pipeline stage {config.pipeline_stage} should be greater than the micro_batch_num"
f"The pipeline stage {config.pipeline_stage} should be greater than the micro_batch_num "
f"{config.micro_batch_num}.")

View File

@ -680,21 +680,6 @@ class MultiHeadAttention(Cell):
def construct(self, query_tensor, key_tensor, value_tensor, attention_mask, key_past=None,
value_past=None, batch_valid_length=None):
"""
multi head attention
Inputs:
from_tensor: output of previous layer
attention_mask: the attention mask matrix with shape (batch_size,
seq_length, seq_length)
key_past: previous saved key state
value_past: previous saved value state
batch_valid_length: the valid input seq_length without padding
Returns:
output: Tensor, the output logits of this layer
layer_present: Tensor, the feature map of current layer
"""
self._check_inputs(query_tensor, key_tensor, value_tensor, attention_mask, key_past,
value_past, batch_valid_length)
query_tensor_original_shape = F.shape(query_tensor)

View File

@ -16,8 +16,8 @@
echo "=============================================================================================================="
echo "Please run the script as: "
echo "bash run_distributed_train_gpu.sh RANK_SIZE HOSTFILE DATASET MODE"
echo "for example: bash run_distributed_train_gpu.sh 16 hostfile_16p /mass_dataset/train_data/ 2.6B"
echo "bash run_distributed_train_gpu.sh RANK_SIZE HOSTFILE DATASET PER_BATCH_SIZE MODE"
echo "for example: bash run_distributed_train_gpu.sh 16 hostfile_16p /mass_dataset/train_data/ 16 2.6B"
echo "It is better to use absolute path."
echo "=============================================================================================================="
@ -26,7 +26,9 @@ self_path=$(dirname "${script_self}")
RANK_SIZE=$1
HOSTFILE=$2
DATASET=$3
MODE=$4
PER_BATCH_SIZE=$4
MODE=$5
mpirun --allow-run-as-root -x PATH -x LD_LIBRARY_PATH -x PYTHONPATH -x NCCL_DEBUG -x GLOG_v -n $RANK_SIZE --hostfile $HOSTFILE --output-filename log_output --merge-stderr-to-stdout \
python -s ${self_path}/../train.py \
@ -35,4 +37,5 @@ mpirun --allow-run-as-root -x PATH -x LD_LIBRARY_PATH -x PYTHONPATH -x NCCL_DEBU
--device_target="GPU" \
--data_url=$DATASET \
--mode=$MODE \
--per_batch_size=$PER_BATCH_SIZE \
--run_type=train > train_log.txt 2>&1 &