forked from mindspore-Ecosystem/mindspore
remove old MINDSPORE_HCCL_CONFIG_PATH in model zoo 2
This commit is contained in:
parent
6e23d76b1e
commit
98b76b9020
|
@ -94,7 +94,7 @@ sh run_distribute_train.sh [RANK_TABLE_FILE] [PRETRAINED_MODEL]
|
|||
sh run_standalone_train.sh [PRETRAINED_MODEL]
|
||||
```
|
||||
|
||||
> hccl.json which is specified by MINDSPORE_HCCL_CONFIG_PATH is needed when you are running a distribute task. You can generate it by using the [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools).
|
||||
> hccl.json which is specified by RANK_TABLE_FILE is needed when you are running a distribute task. You can generate it by using the [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools).
|
||||
> As for PRETRAINED_MODEL,if not set, the model will be trained from the very beginning.Ready-made pretrained_models are not available now. Stay tuned.
|
||||
|
||||
#### Result
|
||||
|
|
|
@ -139,7 +139,6 @@ def main():
|
|||
env['DEVICE_ID'] = str(device_id)
|
||||
if args.nproc_per_node > 1:
|
||||
env['RANK_TABLE_FILE'] = table_fn
|
||||
env['RANK_TABLE_FILE'] = table_fn
|
||||
if os.path.exists(device_dir):
|
||||
shutil.rmtree(device_dir)
|
||||
os.mkdir(device_dir)
|
||||
|
|
|
@ -22,7 +22,7 @@ fi
|
|||
|
||||
if [ ! -f $1 ]
|
||||
then
|
||||
echo "error: DRANK_TABLE_FILE=$1 is not a file"
|
||||
echo "error: RANK_TABLE_FILE=$1 is not a file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ This example implements pre-training, fine-tuning and evaluation of [BERT-base](
|
|||
- Run `run_distribute_pretrain.sh` for distributed pre-training of BERT-base and BERT-NEZHA model.
|
||||
|
||||
``` bash
|
||||
sh scripts/run_distribute_pretrain.sh DATA_DIR MINDSPORE_HCCL_CONFIG_PATH
|
||||
sh scripts/run_distribute_pretrain.sh DATA_DIR RANK_TABLE_FILE
|
||||
```
|
||||
|
||||
### Fine-Tuning and Evaluation
|
||||
|
|
|
@ -62,7 +62,6 @@ def distribute_pretrain():
|
|||
cfg = dict(cf.items("config"))
|
||||
|
||||
print("hccl_config_dir:", args.hccl_config_dir)
|
||||
os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = args.hccl_config_dir
|
||||
os.environ['RANK_TABLE_FILE'] = args.hccl_config_dir
|
||||
|
||||
cores = multiprocessing.cpu_count()
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
echo "=============================================================================================================="
|
||||
echo "Please run the scipt as: "
|
||||
echo "bash run_distribute_pretrain.sh DATA_DIR MINDSPORE_HCCL_CONFIG_PATH"
|
||||
echo "bash run_distribute_pretrain.sh DATA_DIR RANK_TABLE_FILE"
|
||||
echo "for example: bash run_distribute_pretrain.sh /path/dataset /path/hccl.json"
|
||||
echo "It is better to use absolute path."
|
||||
echo "For hyper parameter, please note that you should customize the scripts:
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
- Run `run_distribute_gd.sh` for distributed general distill of BERT-base model.
|
||||
|
||||
``` bash
|
||||
bash scripts/run_distribute_gd.sh DEVICE_NUM EPOCH_SIZE MINDSPORE_HCCL_CONFIG_PATH
|
||||
bash scripts/run_distribute_gd.sh DEVICE_NUM EPOCH_SIZE RANK_TABLE_FILE
|
||||
```
|
||||
|
||||
### Task Distill
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
echo "=============================================================================================================="
|
||||
echo "Please run the scipt as: "
|
||||
echo "bash scripts/run_distribute_gd.sh DEVICE_NUM EPOCH_SIZE MINDSPORE_HCCL_CONFIG_PATH"
|
||||
echo "bash scripts/run_distribute_gd.sh DEVICE_NUM EPOCH_SIZE RANK_TABLE_FILE"
|
||||
echo "for example: bash scripts/run_distribute_gd.sh 8 40 /path/hccl.json"
|
||||
echo "It is better to use absolute path."
|
||||
echo "running....... please see details by LOG{}/log.txt"
|
||||
|
@ -25,7 +25,6 @@ echo "==========================================================================
|
|||
EPOCH_SIZE=$2
|
||||
|
||||
PROJECT_DIR=$(cd "$(dirname "$0")" || exit; pwd)
|
||||
export MINDSPORE_HCCL_CONFIG_PATH=$3
|
||||
export RANK_TABLE_FILE=$3
|
||||
export RANK_SIZE=$1
|
||||
cores=`cat /proc/cpuinfo|grep "processor" |wc -l`
|
||||
|
|
|
@ -62,7 +62,6 @@ def distribute_pretrain():
|
|||
cfg = dict(cf.items("config"))
|
||||
|
||||
print("hccl_config_dir:", args.hccl_config_dir)
|
||||
os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = args.hccl_config_dir
|
||||
os.environ['RANK_TABLE_FILE'] = args.hccl_config_dir
|
||||
|
||||
cores = multiprocessing.cpu_count()
|
||||
|
|
Loading…
Reference in New Issue