forked from mindspore-Ecosystem/mindspore
hide doc comment of the construct
This commit is contained in:
parent
6248117b1c
commit
75d16a4a97
|
@ -145,10 +145,10 @@ def _check_config(config):
|
|||
|
||||
# dp * pp * pipeline_stage <= device_num
|
||||
if config.data_parallel * config.model_parallel * pipeline_stage > device_num:
|
||||
raise ValueError("The product of the data parallel {config.data_parallel},"
|
||||
"model parallel {config.model_parallel}"
|
||||
"pipeline stages {pipeline_stage}"
|
||||
"should be less than device_num {device_num}")
|
||||
raise ValueError(f"The product of the data parallel {config.data_parallel}, "
|
||||
f"model parallel {config.model_parallel} "
|
||||
f"pipeline stages {pipeline_stage} "
|
||||
f"should be less than device_num {device_num}.")
|
||||
|
||||
# the config optimizer_shard is same with context.optimizer_shard
|
||||
if hasattr(config, "optimizer_shard") and optimizer_shard and optimizer_shard != config.optimizer_shard:
|
||||
|
@ -160,5 +160,5 @@ def _check_config(config):
|
|||
if hasattr(config, 'pipeline_stage') and hasattr(config, 'micro_batch_num')\
|
||||
and config.pipeline_stage < config.micro_batch_num:
|
||||
raise ValueError(
|
||||
f"The pipeline stage {config.pipeline_stage} should be greater than the micro_batch_num"
|
||||
f"The pipeline stage {config.pipeline_stage} should be greater than the micro_batch_num "
|
||||
f"{config.micro_batch_num}.")
|
||||
|
|
|
@ -680,21 +680,6 @@ class MultiHeadAttention(Cell):
|
|||
|
||||
def construct(self, query_tensor, key_tensor, value_tensor, attention_mask, key_past=None,
|
||||
value_past=None, batch_valid_length=None):
|
||||
"""
|
||||
multi head attention
|
||||
|
||||
Inputs:
|
||||
from_tensor: output of previous layer
|
||||
attention_mask: the attention mask matrix with shape (batch_size,
|
||||
seq_length, seq_length)
|
||||
key_past: previous saved key state
|
||||
value_past: previous saved value state
|
||||
batch_valid_length: the valid input seq_length without padding
|
||||
|
||||
Returns:
|
||||
output: Tensor, the output logits of this layer
|
||||
layer_present: Tensor, the feature map of current layer
|
||||
"""
|
||||
self._check_inputs(query_tensor, key_tensor, value_tensor, attention_mask, key_past,
|
||||
value_past, batch_valid_length)
|
||||
query_tensor_original_shape = F.shape(query_tensor)
|
||||
|
|
|
@ -16,8 +16,8 @@
|
|||
|
||||
echo "=============================================================================================================="
|
||||
echo "Please run the script as: "
|
||||
echo "bash run_distributed_train_gpu.sh RANK_SIZE HOSTFILE DATASET MODE"
|
||||
echo "for example: bash run_distributed_train_gpu.sh 16 hostfile_16p /mass_dataset/train_data/ 2.6B"
|
||||
echo "bash run_distributed_train_gpu.sh RANK_SIZE HOSTFILE DATASET PER_BATCH_SIZE MODE"
|
||||
echo "for example: bash run_distributed_train_gpu.sh 16 hostfile_16p /mass_dataset/train_data/ 16 2.6B"
|
||||
echo "It is better to use absolute path."
|
||||
echo "=============================================================================================================="
|
||||
|
||||
|
@ -26,7 +26,9 @@ self_path=$(dirname "${script_self}")
|
|||
RANK_SIZE=$1
|
||||
HOSTFILE=$2
|
||||
DATASET=$3
|
||||
MODE=$4
|
||||
PER_BATCH_SIZE=$4
|
||||
MODE=$5
|
||||
|
||||
|
||||
mpirun --allow-run-as-root -x PATH -x LD_LIBRARY_PATH -x PYTHONPATH -x NCCL_DEBUG -x GLOG_v -n $RANK_SIZE --hostfile $HOSTFILE --output-filename log_output --merge-stderr-to-stdout \
|
||||
python -s ${self_path}/../train.py \
|
||||
|
@ -35,4 +37,5 @@ mpirun --allow-run-as-root -x PATH -x LD_LIBRARY_PATH -x PYTHONPATH -x NCCL_DEBU
|
|||
--device_target="GPU" \
|
||||
--data_url=$DATASET \
|
||||
--mode=$MODE \
|
||||
--per_batch_size=$PER_BATCH_SIZE \
|
||||
--run_type=train > train_log.txt 2>&1 &
|
||||
|
|
Loading…
Reference in New Issue