forked from mindspore-Ecosystem/mindspore
remove old MINDSPORE_HCCL_CONFIG_PATH in model zoo 2
This commit is contained in:
parent
6e23d76b1e
commit
98b76b9020
|
@ -94,7 +94,7 @@ sh run_distribute_train.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL]
|
||||||
sh run_standalone_train.sh [PRETRAINED_MODEL]
|
sh run_standalone_train.sh [PRETRAINED_MODEL]
|
||||||
```
|
```
|
||||||
|
|
||||||
> hccl.json which is specified by MINDSPORE_HCCL_CONFIG_PATH is needed when you are running a distribute task. You can generate it by using the [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools).
|
> hccl.json which is specified by RANK_TABLE_FILE is needed when you are running a distribute task. You can generate it by using the [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools).
|
||||||
> As for PRETRAINED_MODEL,if not set, the model will be trained from the very beginning.Ready-made pretrained_models are not available now. Stay tuned.
|
> As for PRETRAINED_MODEL,if not set, the model will be trained from the very beginning.Ready-made pretrained_models are not available now. Stay tuned.
|
||||||
|
|
||||||
#### Result
|
#### Result
|
||||||
|
|
|
@ -139,7 +139,6 @@ def main():
|
||||||
env['DEVICE_ID'] = str(device_id)
|
env['DEVICE_ID'] = str(device_id)
|
||||||
if args.nproc_per_node > 1:
|
if args.nproc_per_node > 1:
|
||||||
env['RANK_TABLE_FILE'] = table_fn
|
env['RANK_TABLE_FILE'] = table_fn
|
||||||
env['RANK_TABLE_FILE'] = table_fn
|
|
||||||
if os.path.exists(device_dir):
|
if os.path.exists(device_dir):
|
||||||
shutil.rmtree(device_dir)
|
shutil.rmtree(device_dir)
|
||||||
os.mkdir(device_dir)
|
os.mkdir(device_dir)
|
||||||
|
|
|
@ -22,7 +22,7 @@ fi
|
||||||
|
|
||||||
if [ ! -f $1 ]
|
if [ ! -f $1 ]
|
||||||
then
|
then
|
||||||
echo "error: DRANK_TABLE_FILE=$1 is not a file"
|
echo "error: RANK_TABLE_FILE=$1 is not a file"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@ This example implements pre-training, fine-tuning and evaluation of [BERT-base](
|
||||||
- Run `run_distribute_pretrain.sh` for distributed pre-training of BERT-base and BERT-NEZHA model.
|
- Run `run_distribute_pretrain.sh` for distributed pre-training of BERT-base and BERT-NEZHA model.
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
sh scripts/run_distribute_pretrain.sh DATA_DIR MINDSPORE_HCCL_CONFIG_PATH
|
sh scripts/run_distribute_pretrain.sh DATA_DIR RANK_TABLE_FILE
|
||||||
```
|
```
|
||||||
|
|
||||||
### Fine-Tuning and Evaluation
|
### Fine-Tuning and Evaluation
|
||||||
|
|
|
@ -62,7 +62,6 @@ def distribute_pretrain():
|
||||||
cfg = dict(cf.items("config"))
|
cfg = dict(cf.items("config"))
|
||||||
|
|
||||||
print("hccl_config_dir:", args.hccl_config_dir)
|
print("hccl_config_dir:", args.hccl_config_dir)
|
||||||
os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = args.hccl_config_dir
|
|
||||||
os.environ['RANK_TABLE_FILE'] = args.hccl_config_dir
|
os.environ['RANK_TABLE_FILE'] = args.hccl_config_dir
|
||||||
|
|
||||||
cores = multiprocessing.cpu_count()
|
cores = multiprocessing.cpu_count()
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
|
|
||||||
echo "=============================================================================================================="
|
echo "=============================================================================================================="
|
||||||
echo "Please run the scipt as: "
|
echo "Please run the scipt as: "
|
||||||
echo "bash run_distribute_pretrain.sh DATA_DIR MINDSPORE_HCCL_CONFIG_PATH"
|
echo "bash run_distribute_pretrain.sh DATA_DIR RANK_TABLE_FILE"
|
||||||
echo "for example: bash run_distribute_pretrain.sh /path/dataset /path/hccl.json"
|
echo "for example: bash run_distribute_pretrain.sh /path/dataset /path/hccl.json"
|
||||||
echo "It is better to use absolute path."
|
echo "It is better to use absolute path."
|
||||||
echo "For hyper parameter, please note that you should customize the scripts:
|
echo "For hyper parameter, please note that you should customize the scripts:
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
- Run `run_distribute_gd.sh` for distributed general distill of BERT-base model.
|
- Run `run_distribute_gd.sh` for distributed general distill of BERT-base model.
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
bash scripts/run_distribute_gd.sh DEVICE_NUM EPOCH_SIZE MINDSPORE_HCCL_CONFIG_PATH
|
bash scripts/run_distribute_gd.sh DEVICE_NUM EPOCH_SIZE RANK_TABLE_FILE
|
||||||
```
|
```
|
||||||
|
|
||||||
### Task Distill
|
### Task Distill
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
|
|
||||||
echo "=============================================================================================================="
|
echo "=============================================================================================================="
|
||||||
echo "Please run the scipt as: "
|
echo "Please run the scipt as: "
|
||||||
echo "bash scripts/run_distribute_gd.sh DEVICE_NUM EPOCH_SIZE MINDSPORE_HCCL_CONFIG_PATH"
|
echo "bash scripts/run_distribute_gd.sh DEVICE_NUM EPOCH_SIZE RANK_TABLE_FILE"
|
||||||
echo "for example: bash scripts/run_distribute_gd.sh 8 40 /path/hccl.json"
|
echo "for example: bash scripts/run_distribute_gd.sh 8 40 /path/hccl.json"
|
||||||
echo "It is better to use absolute path."
|
echo "It is better to use absolute path."
|
||||||
echo "running....... please see details by LOG{}/log.txt"
|
echo "running....... please see details by LOG{}/log.txt"
|
||||||
|
@ -25,7 +25,6 @@ echo "==========================================================================
|
||||||
EPOCH_SIZE=$2
|
EPOCH_SIZE=$2
|
||||||
|
|
||||||
PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
|
PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
|
||||||
export MINDSPORE_HCCL_CONFIG_PATH=$3
|
|
||||||
export RANK_TABLE_FILE=$3
|
export RANK_TABLE_FILE=$3
|
||||||
export RANK_SIZE=$1
|
export RANK_SIZE=$1
|
||||||
cores=`cat /proc/cpuinfo|grep "processor" |wc -l`
|
cores=`cat /proc/cpuinfo|grep "processor" |wc -l`
|
||||||
|
|
|
@ -62,7 +62,6 @@ def distribute_pretrain():
|
||||||
cfg = dict(cf.items("config"))
|
cfg = dict(cf.items("config"))
|
||||||
|
|
||||||
print("hccl_config_dir:", args.hccl_config_dir)
|
print("hccl_config_dir:", args.hccl_config_dir)
|
||||||
os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = args.hccl_config_dir
|
|
||||||
os.environ['RANK_TABLE_FILE'] = args.hccl_config_dir
|
os.environ['RANK_TABLE_FILE'] = args.hccl_config_dir
|
||||||
|
|
||||||
cores = multiprocessing.cpu_count()
|
cores = multiprocessing.cpu_count()
|
||||||
|
|
Loading…
Reference in New Issue