forked from mindspore-Ecosystem/mindspore
!10052 set HCCL_CONNECT_TIMEOUT=600 for transformer distribute training
From: @yuchaojie Reviewed-by: @linqingke,@liangchenghui Signed-off-by: @linqingke
This commit is contained in:
commit
b2e98083c6
|
@ -28,6 +28,7 @@ cd run_distribute_train || exit
|
||||||
EPOCH_SIZE=$2
|
EPOCH_SIZE=$2
|
||||||
DATA_PATH=$3
|
DATA_PATH=$3
|
||||||
|
|
||||||
|
export HCCL_CONNECT_TIMEOUT=600
|
||||||
export RANK_TABLE_FILE=$4
|
export RANK_TABLE_FILE=$4
|
||||||
export RANK_SIZE=$1
|
export RANK_SIZE=$1
|
||||||
export HCCL_FLAG=1
|
export HCCL_FLAG=1
|
||||||
|
|
Loading…
Reference in New Issue