forked from mindspore-Ecosystem/mindspore
!22460 fix bugs in modelzoo
Merge pull request !22460 from gengdongjie/code_docs_fix_issues
This commit is contained in:
commit
67f6d6220e
|
@ -77,8 +77,8 @@ This project is inherited by [huoyijie/AdvancedEAST](https://github.com/huoyijie
|
|||
├── export.py # export model for inference
|
||||
├── prepare_data.py # exec data preprocessing
|
||||
├── eval.py # eval net
|
||||
├── train.py # train net
|
||||
└── train_mindrecord.py # train net on user specified mindrecord
|
||||
├── train.py # train net on multi-size input
|
||||
└── train_single_size.py # train net on fix-size input
|
||||
```
|
||||
|
||||
# [Dataset](#contents)
|
||||
|
@ -140,14 +140,14 @@ python train.py --device_target="GPU" --is_distributed=0 --device_id=0 > outpu
|
|||
- single device with specific size
|
||||
|
||||
```bash
|
||||
python train_mindrecord.py --device_target="Ascend" --is_distributed=0 --device_id=2 --size=256 > output.train.log 2>&1 &
|
||||
python train_single_size.py --device_target="Ascend" --is_distributed=0 --device_id=2 --size=256 > output.train.log 2>&1 &
|
||||
```
|
||||
|
||||
- multi Ascends
|
||||
|
||||
```bash
|
||||
# running on distributed environment(8p)
|
||||
bash scripts/run_distribute_train.sh
|
||||
bash run_distribute_train.sh [DATSET_PATH] [RANK_TABLE_FILE]
|
||||
```
|
||||
|
||||
The detailed training parameters are in /src/config.py。
|
||||
|
@ -268,4 +268,4 @@ On the default
|
|||
|
||||
# [ModelZoo Homepage](#contents)
|
||||
|
||||
Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo).
|
||||
Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo).
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
tqdm
|
||||
shapely
|
||||
opencv
|
|
@ -1,4 +1,5 @@
|
|||
numpy
|
||||
opencv
|
||||
pillow
|
||||
tqdm
|
||||
shapely
|
||||
tqdm
|
||||
|
|
|
@ -13,13 +13,14 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
PATH1=$1
|
||||
PATH2=$2
|
||||
|
||||
|
||||
ulimit -u unlimited
|
||||
export DEVICE_NUM=2
|
||||
export RANK_SIZE=2
|
||||
export RANK_TABLE_FILE=$PATH1
|
||||
export DEVICE_NUM=8
|
||||
export RANK_SIZE=8
|
||||
export RANK_TABLE_FILE=$PATH2
|
||||
|
||||
for ((i = 0; i < ${DEVICE_NUM}; i++)); do
|
||||
export DEVICE_ID=$i
|
||||
|
@ -32,6 +33,6 @@ for ((i = 0; i < ${DEVICE_NUM}; i++)); do
|
|||
cd ./train_parallel$i || exit
|
||||
echo "start training for rank $RANK_ID, device $DEVICE_ID"
|
||||
env >env.log
|
||||
python train_mindrecord.py --device_target Ascend --is_distributed 1 --device_id $i --data_path /disk1/adenew/icpr/advanced-east_448.mindrecord > log.txt 2>&1 &
|
||||
python train_single_size.py --device_target Ascend --is_distributed 1 --device_id $i --data_path $PATH1 > log.txt 2>&1 &
|
||||
cd ..
|
||||
done
|
||||
done
|
||||
|
|
|
@ -20,5 +20,5 @@ echo "bash run_standalone_train_ascend.sh"
|
|||
echo "for example: bash run_standalone_train_ascend.sh"
|
||||
echo "=============================================================================================================="
|
||||
|
||||
python train_mindrecord.py \
|
||||
python train_single_size.py \
|
||||
--device_target="Ascend" > output.train.log 2>&1 &
|
||||
|
|
|
@ -22,16 +22,16 @@ import time
|
|||
import ast
|
||||
|
||||
from mindspore import context, Model
|
||||
from mindspore.communication.management import init, get_group_size, get_rank
|
||||
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
|
||||
from mindspore.context import ParallelMode
|
||||
from mindspore.train.serialization import load_param_into_net, load_checkpoint
|
||||
from mindspore.common import set_seed
|
||||
from mindspore.communication.management import init, get_rank, get_group_size
|
||||
from mindspore.context import ParallelMode
|
||||
from mindspore.nn.optim import AdamWeightDecay
|
||||
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
|
||||
from mindspore.train.serialization import load_param_into_net, load_checkpoint
|
||||
from src.logger import get_logger
|
||||
from src.config import config as cfg
|
||||
from src.dataset import load_adEAST_dataset
|
||||
from src.model import get_AdvancedEast_net
|
||||
from src.config import config as cfg
|
||||
|
||||
set_seed(1)
|
||||
|
||||
|
|
|
@ -28,7 +28,6 @@ from mindspore.nn.optim import AdamWeightDecay
|
|||
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
|
||||
from mindspore.train.serialization import load_param_into_net, load_checkpoint
|
||||
from src.logger import get_logger
|
||||
|
||||
from src.config import config as cfg
|
||||
from src.dataset import load_adEAST_dataset
|
||||
from src.model import get_AdvancedEast_net
|
|
@ -19,7 +19,6 @@ DATA_DIR=$2
|
|||
PATH_CHECKPOINT=$3
|
||||
|
||||
python ./eval.py \
|
||||
--device_target=Ascend \
|
||||
--device_id=$DEVICE_ID \
|
||||
--checkpoint_path=$PATH_CHECKPOINT \
|
||||
--dataset_path=$DATA_DIR > eval.log 2>&1 &
|
||||
|
|
|
@ -309,4 +309,10 @@ bash run_infer_310.sh [MINDIR_PATH] [DATA_FILE_PATH] [NEED_PREPROCESS] [DEVICE_I
|
|||
|
||||
# ModelZoo主页
|
||||
|
||||
请浏览官网[主页](https://gitee.com/mindspore/mindspore/tree/master/model_zoo)。
|
||||
请浏览官网[主页](https://gitee.com/mindspore/mindspore/tree/master/model_zoo)。
|
||||
|
||||
# FAQ
|
||||
|
||||
优先参考[ModelZoo FAQ](https://gitee.com/mindspore/mindspore/tree/master/model_zoo#FAQ)来查找一些常见的公共问题。
|
||||
|
||||
- **Q: 使用PYNATIVE_MODE发生内存溢出怎么办?** **A**:内存溢出通常是因为PYNATIVE_MODE需要更多的内存, 传入训练参数 --batch_size 18 将 batch size设置为 18 降低内存消耗,可进行网络训练。
|
||||
|
|
Loading…
Reference in New Issue