!3795 remove old MINDSPORE_HCCL_CONFIG_PATH in model zoo 2

Merge pull request !3795 from panbingao/master
This commit is contained in:
mindspore-ci-bot 2020-08-03 09:31:49 +08:00 committed by Gitee
commit 59530248a3
9 changed files with 6 additions and 10 deletions

View File

@ -94,7 +94,7 @@ sh run_distribute_train.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL]
sh run_standalone_train.sh [PRETRAINED_MODEL] sh run_standalone_train.sh [PRETRAINED_MODEL]
``` ```
> hccl.json which is specified by MINDSPORE_HCCL_CONFIG_PATH is needed when you are running a distribute task. You can generate it by using the [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools). > hccl.json which is specified by RANK_TABLE_FILE is needed when you are running a distribute task. You can generate it by using the [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools).
> As for PRETRAINED_MODELif not set, the model will be trained from the very beginning.Ready-made pretrained_models are not available now. Stay tuned. > As for PRETRAINED_MODELif not set, the model will be trained from the very beginning.Ready-made pretrained_models are not available now. Stay tuned.
#### Result #### Result

View File

@ -139,7 +139,6 @@ def main():
env['DEVICE_ID'] = str(device_id) env['DEVICE_ID'] = str(device_id)
if args.nproc_per_node > 1: if args.nproc_per_node > 1:
env['RANK_TABLE_FILE'] = table_fn env['RANK_TABLE_FILE'] = table_fn
env['RANK_TABLE_FILE'] = table_fn
if os.path.exists(device_dir): if os.path.exists(device_dir):
shutil.rmtree(device_dir) shutil.rmtree(device_dir)
os.mkdir(device_dir) os.mkdir(device_dir)

View File

@ -22,7 +22,7 @@ fi
if [ ! -f $1 ] if [ ! -f $1 ]
then then
echo "error: DRANK_TABLE_FILE=$1 is not a file" echo "error: RANK_TABLE_FILE=$1 is not a file"
exit 1 exit 1
fi fi

View File

@ -21,7 +21,7 @@ This example implements pre-training, fine-tuning and evaluation of [BERT-base](
- Run `run_distribute_pretrain.sh` for distributed pre-training of BERT-base and BERT-NEZHA model. - Run `run_distribute_pretrain.sh` for distributed pre-training of BERT-base and BERT-NEZHA model.
``` bash ``` bash
sh scripts/run_distribute_pretrain.sh DATA_DIR MINDSPORE_HCCL_CONFIG_PATH sh scripts/run_distribute_pretrain.sh DATA_DIR RANK_TABLE_FILE
``` ```
### Fine-Tuning and Evaluation ### Fine-Tuning and Evaluation

View File

@ -62,7 +62,6 @@ def distribute_pretrain():
cfg = dict(cf.items("config")) cfg = dict(cf.items("config"))
print("hccl_config_dir:", args.hccl_config_dir) print("hccl_config_dir:", args.hccl_config_dir)
os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = args.hccl_config_dir
os.environ['RANK_TABLE_FILE'] = args.hccl_config_dir os.environ['RANK_TABLE_FILE'] = args.hccl_config_dir
cores = multiprocessing.cpu_count() cores = multiprocessing.cpu_count()

View File

@ -16,7 +16,7 @@
echo "==============================================================================================================" echo "=============================================================================================================="
echo "Please run the scipt as: " echo "Please run the scipt as: "
echo "bash run_distribute_pretrain.sh DATA_DIR MINDSPORE_HCCL_CONFIG_PATH" echo "bash run_distribute_pretrain.sh DATA_DIR RANK_TABLE_FILE"
echo "for example: bash run_distribute_pretrain.sh /path/dataset /path/hccl.json" echo "for example: bash run_distribute_pretrain.sh /path/dataset /path/hccl.json"
echo "It is better to use absolute path." echo "It is better to use absolute path."
echo "For hyper parameter, please note that you should customize the scripts: echo "For hyper parameter, please note that you should customize the scripts:

View File

@ -21,7 +21,7 @@
- Run `run_distribute_gd.sh` for distributed general distill of BERT-base model. - Run `run_distribute_gd.sh` for distributed general distill of BERT-base model.
``` bash ``` bash
bash scripts/run_distribute_gd.sh DEVICE_NUM EPOCH_SIZE MINDSPORE_HCCL_CONFIG_PATH bash scripts/run_distribute_gd.sh DEVICE_NUM EPOCH_SIZE RANK_TABLE_FILE
``` ```
### Task Distill ### Task Distill

View File

@ -16,7 +16,7 @@
echo "==============================================================================================================" echo "=============================================================================================================="
echo "Please run the scipt as: " echo "Please run the scipt as: "
echo "bash scripts/run_distribute_gd.sh DEVICE_NUM EPOCH_SIZE MINDSPORE_HCCL_CONFIG_PATH" echo "bash scripts/run_distribute_gd.sh DEVICE_NUM EPOCH_SIZE RANK_TABLE_FILE"
echo "for example: bash scripts/run_distribute_gd.sh 8 40 /path/hccl.json" echo "for example: bash scripts/run_distribute_gd.sh 8 40 /path/hccl.json"
echo "It is better to use absolute path." echo "It is better to use absolute path."
echo "running....... please see details by LOG{}/log.txt" echo "running....... please see details by LOG{}/log.txt"
@ -25,7 +25,6 @@ echo "==========================================================================
EPOCH_SIZE=$2 EPOCH_SIZE=$2
PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd) PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
export MINDSPORE_HCCL_CONFIG_PATH=$3
export RANK_TABLE_FILE=$3 export RANK_TABLE_FILE=$3
export RANK_SIZE=$1 export RANK_SIZE=$1
cores=`cat /proc/cpuinfo|grep "processor" |wc -l` cores=`cat /proc/cpuinfo|grep "processor" |wc -l`

View File

@ -62,7 +62,6 @@ def distribute_pretrain():
cfg = dict(cf.items("config")) cfg = dict(cf.items("config"))
print("hccl_config_dir:", args.hccl_config_dir) print("hccl_config_dir:", args.hccl_config_dir)
os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = args.hccl_config_dir
os.environ['RANK_TABLE_FILE'] = args.hccl_config_dir os.environ['RANK_TABLE_FILE'] = args.hccl_config_dir
cores = multiprocessing.cpu_count() cores = multiprocessing.cpu_count()