diff --git a/model_zoo/official/cv/maskrcnn/README.md b/model_zoo/official/cv/maskrcnn/README.md index 016895f52eb..89a123b6ca7 100644 --- a/model_zoo/official/cv/maskrcnn/README.md +++ b/model_zoo/official/cv/maskrcnn/README.md @@ -94,7 +94,7 @@ sh run_distribute_train.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL] sh run_standalone_train.sh [PRETRAINED_MODEL] ``` -> hccl.json which is specified by MINDSPORE_HCCL_CONFIG_PATH is needed when you are running a distribute task. You can generate it by using the [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools). +> hccl.json which is specified by RANK_TABLE_FILE is needed when you are running a distribute task. You can generate it by using the [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools). > As for PRETRAINED_MODEL,if not set, the model will be trained from the very beginning.Ready-made pretrained_models are not available now. Stay tuned. #### Result diff --git a/model_zoo/official/cv/mobilenetv3/src/launch.py b/model_zoo/official/cv/mobilenetv3/src/launch.py index aba74379f72..df5f4e65f04 100644 --- a/model_zoo/official/cv/mobilenetv3/src/launch.py +++ b/model_zoo/official/cv/mobilenetv3/src/launch.py @@ -139,7 +139,6 @@ def main(): env['DEVICE_ID'] = str(device_id) if args.nproc_per_node > 1: env['RANK_TABLE_FILE'] = table_fn - env['RANK_TABLE_FILE'] = table_fn if os.path.exists(device_dir): shutil.rmtree(device_dir) os.mkdir(device_dir) diff --git a/model_zoo/official/cv/resnet_thor/scripts/run_distribute_train.sh b/model_zoo/official/cv/resnet_thor/scripts/run_distribute_train.sh index 1fa72768ae4..63d192bfa13 100644 --- a/model_zoo/official/cv/resnet_thor/scripts/run_distribute_train.sh +++ b/model_zoo/official/cv/resnet_thor/scripts/run_distribute_train.sh @@ -22,7 +22,7 @@ fi if [ ! -f $1 ] then - echo "error: DRANK_TABLE_FILE=$1 is not a file" + echo "error: RANK_TABLE_FILE=$1 is not a file" exit 1 fi diff --git a/model_zoo/official/nlp/bert/README.md b/model_zoo/official/nlp/bert/README.md index a54c1faf991..6803fc5fced 100644 --- a/model_zoo/official/nlp/bert/README.md +++ b/model_zoo/official/nlp/bert/README.md @@ -21,7 +21,7 @@ This example implements pre-training, fine-tuning and evaluation of [BERT-base]( - Run `run_distribute_pretrain.sh` for distributed pre-training of BERT-base and BERT-NEZHA model. ``` bash - sh scripts/run_distribute_pretrain.sh DATA_DIR MINDSPORE_HCCL_CONFIG_PATH + sh scripts/run_distribute_pretrain.sh DATA_DIR RANK_TABLE_FILE ``` ### Fine-Tuning and Evaluation diff --git a/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/run_distribute_pretrain.py b/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/run_distribute_pretrain.py index b230f71fad2..efc97e0fbee 100644 --- a/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/run_distribute_pretrain.py +++ b/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/run_distribute_pretrain.py @@ -62,7 +62,6 @@ def distribute_pretrain(): cfg = dict(cf.items("config")) print("hccl_config_dir:", args.hccl_config_dir) - os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = args.hccl_config_dir os.environ['RANK_TABLE_FILE'] = args.hccl_config_dir cores = multiprocessing.cpu_count() diff --git a/model_zoo/official/nlp/bert/scripts/run_distribute_pretrain.sh b/model_zoo/official/nlp/bert/scripts/run_distribute_pretrain.sh index 422309fea45..be910fb8441 100644 --- a/model_zoo/official/nlp/bert/scripts/run_distribute_pretrain.sh +++ b/model_zoo/official/nlp/bert/scripts/run_distribute_pretrain.sh @@ -16,7 +16,7 @@ echo "==============================================================================================================" echo "Please run the scipt as: " -echo "bash run_distribute_pretrain.sh DATA_DIR MINDSPORE_HCCL_CONFIG_PATH" +echo "bash run_distribute_pretrain.sh DATA_DIR RANK_TABLE_FILE" echo "for example: bash run_distribute_pretrain.sh /path/dataset /path/hccl.json" echo "It is better to use absolute path." echo "For hyper parameter, please note that you should customize the scripts: diff --git a/model_zoo/official/nlp/tinybert/README.md b/model_zoo/official/nlp/tinybert/README.md index aa96d246e50..c00f88f0596 100644 --- a/model_zoo/official/nlp/tinybert/README.md +++ b/model_zoo/official/nlp/tinybert/README.md @@ -21,7 +21,7 @@ - Run `run_distribute_gd.sh` for distributed general distill of BERT-base model. ``` bash - bash scripts/run_distribute_gd.sh DEVICE_NUM EPOCH_SIZE MINDSPORE_HCCL_CONFIG_PATH + bash scripts/run_distribute_gd.sh DEVICE_NUM EPOCH_SIZE RANK_TABLE_FILE ``` ### Task Distill diff --git a/model_zoo/official/nlp/tinybert/scripts/run_distribute_gd.sh b/model_zoo/official/nlp/tinybert/scripts/run_distribute_gd.sh index d45c280723a..667d7da6faf 100644 --- a/model_zoo/official/nlp/tinybert/scripts/run_distribute_gd.sh +++ b/model_zoo/official/nlp/tinybert/scripts/run_distribute_gd.sh @@ -16,7 +16,7 @@ echo "==============================================================================================================" echo "Please run the scipt as: " -echo "bash scripts/run_distribute_gd.sh DEVICE_NUM EPOCH_SIZE MINDSPORE_HCCL_CONFIG_PATH" +echo "bash scripts/run_distribute_gd.sh DEVICE_NUM EPOCH_SIZE RANK_TABLE_FILE" echo "for example: bash scripts/run_distribute_gd.sh 8 40 /path/hccl.json" echo "It is better to use absolute path." echo "running....... please see details by LOG{}/log.txt" @@ -25,7 +25,6 @@ echo "========================================================================== EPOCH_SIZE=$2 PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd) -export MINDSPORE_HCCL_CONFIG_PATH=$3 export RANK_TABLE_FILE=$3 export RANK_SIZE=$1 cores=`cat /proc/cpuinfo|grep "processor" |wc -l` diff --git a/model_zoo/utils/ascend_distributed_launcher/run_distribute_pretrain.py b/model_zoo/utils/ascend_distributed_launcher/run_distribute_pretrain.py index b230f71fad2..efc97e0fbee 100644 --- a/model_zoo/utils/ascend_distributed_launcher/run_distribute_pretrain.py +++ b/model_zoo/utils/ascend_distributed_launcher/run_distribute_pretrain.py @@ -62,7 +62,6 @@ def distribute_pretrain(): cfg = dict(cf.items("config")) print("hccl_config_dir:", args.hccl_config_dir) - os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = args.hccl_config_dir os.environ['RANK_TABLE_FILE'] = args.hccl_config_dir cores = multiprocessing.cpu_count()