From 8aae0a18c7ef1df1ddfa81bebc82fbdd1f807019 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 29 May 2020 02:08:42 +0800 Subject: [PATCH] add scripts --- .../scripts/run_distribute_pretrain.sh | 66 +++++++++++++++++++ model_zoo/deeplabv3/scripts/run_eval.sh | 31 +++++++++ .../deeplabv3/scripts/run_standalone_train.sh | 37 +++++++++++ 3 files changed, 134 insertions(+) create mode 100644 model_zoo/deeplabv3/scripts/run_distribute_pretrain.sh create mode 100644 model_zoo/deeplabv3/scripts/run_eval.sh create mode 100644 model_zoo/deeplabv3/scripts/run_standalone_train.sh diff --git a/model_zoo/deeplabv3/scripts/run_distribute_pretrain.sh b/model_zoo/deeplabv3/scripts/run_distribute_pretrain.sh new file mode 100644 index 00000000000..de6f508051f --- /dev/null +++ b/model_zoo/deeplabv3/scripts/run_distribute_pretrain.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +echo "==============================================================================================================" +echo "Please run the scipt as: " +echo "bash run_distribute_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_DIR MINDSPORE_HCCL_CONFIG_PATH" +echo "for example: bash run_distribute_train.sh 8 40 /path/zh-wiki/ /path/hccl.json" +echo "It is better to use absolute path." +echo "==============================================================================================================" + +EPOCH_SIZE=$2 +DATA_DIR=$3 + +export MINDSPORE_HCCL_CONFIG_PATH=$4 +export RANK_TABLE_FILE=$4 +export RANK_SIZE=$1 +cores=`cat /proc/cpuinfo|grep "processor" |wc -l` +echo "the number of logical core" $cores +avg_core_per_rank=`expr $cores \/ $RANK_SIZE` +core_gap=`expr $avg_core_per_rank \- 1` +echo "avg_core_per_rank" $avg_core_per_rank +echo "core_gap" $core_gap +for((i=0;i env.log + taskset -c $cmdopt python ../train.py \ + --distribute="true" \ + --epoch_size=$EPOCH_SIZE \ + --device_id=$DEVICE_ID \ + --enable_save_ckpt="true" \ + --checkpoint_url="/store1/deeplabv3/deeplabv3_split_url/train/checkpoint/CKP-12_732.ckpt" \ + --save_checkpoint_steps=10000 \ + --save_checkpoint_num=1 \ + --data_url=$DATA_DIR > log.txt 2>&1 & + cd ../ +done \ No newline at end of file diff --git a/model_zoo/deeplabv3/scripts/run_eval.sh b/model_zoo/deeplabv3/scripts/run_eval.sh new file mode 100644 index 00000000000..a3596e0be34 --- /dev/null +++ b/model_zoo/deeplabv3/scripts/run_eval.sh @@ -0,0 +1,31 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the License); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# httpwww.apache.orglicensesLICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +echo "==============================================================================================================" +echo "Please run the scipt as: " +echo "bash run_eval.sh DEVICE_ID EPOCH_SIZE DATA_DIR" +echo "for example: bash run_eval.sh 0 /path/zh-wiki/ " +echo "==============================================================================================================" + +DEVICE_ID=$1 +DATA_DIR=$2 + +mkdir -p ms_log +CUR_DIR=`pwd` +export GLOG_log_dir=${CUR_DIR}/ms_log +export GLOG_logtostderr=0 +python evaluation.py \ + --device_id=$DEVICE_ID \ + --checkpoint_url="/store1/deeplabv3/deeplabv3_split_url/train/checkpoint/CKP-12_732.ckpt" \ + --data_url=$DATA_DIR > log.txt 2>&1 & \ No newline at end of file diff --git a/model_zoo/deeplabv3/scripts/run_standalone_train.sh b/model_zoo/deeplabv3/scripts/run_standalone_train.sh new file mode 100644 index 00000000000..91fa03194e9 --- /dev/null +++ b/model_zoo/deeplabv3/scripts/run_standalone_train.sh @@ -0,0 +1,37 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the License); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# httpwww.apache.orglicensesLICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +echo "==============================================================================================================" +echo "Please run the scipt as: " +echo "bash run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR" +echo "for example: bash run_standalone_train.sh 0 40 /path/zh-wiki/ " +echo "==============================================================================================================" + +DEVICE_ID=$1 +EPOCH_SIZE=$2 +DATA_DIR=$3 + +mkdir -p ms_log +CUR_DIR=`pwd` +export GLOG_log_dir=${CUR_DIR}/ms_log +export GLOG_logtostderr=0 +python train.py \ + --distribute="false" \ + --epoch_size=$EPOCH_SIZE \ + --device_id=$DEVICE_ID \ + --enable_save_ckpt="true" \ + --checkpoint_url="/store1/deeplabv3/deeplabv3_split_url/train/checkpoint/CKP-12_732.ckpt" \ + --save_checkpoint_steps=10000 \ + --save_checkpoint_num=1 \ + --data_url=$DATA_DIR > log.txt 2>&1 & \ No newline at end of file