!20749 DeepText GPU version
Merge pull request !20749 from Mina/deeptextgpu
This commit is contained in:
commit
f7ebff8aea
|
@ -73,9 +73,13 @@ Here we used 4 datasets for training, and 1 datasets for Evaluation.
|
|||
└─moxing_adapter.py # Moxing adapter for ModelArts
|
||||
├─scripts
|
||||
├─run_standalone_train_ascend.sh # launch standalone training with ascend platform(1p)
|
||||
├─run_standalone_train_gpu.sh # launch standalone training with GPU platform(1p)
|
||||
├─run_distribute_train_ascend.sh # launch distributed training with ascend platform(8p)
|
||||
├─run_distribute_train_gpu.sh # launch distributed training with GPU platform(8p)
|
||||
├─run_infer_310.sh # shell script for 310 inference
|
||||
├─run_eval_gpu.sh # launch evaluation with GPU platform
|
||||
└─run_eval_ascend.sh # launch evaluating with ascend platform
|
||||
|
||||
├─src
|
||||
├─DeepText
|
||||
├─__init__.py # package init file
|
||||
|
@ -115,6 +119,17 @@ bash run_standalone_train_ascend.sh [IMGS_PATH] [ANNOS_PATH] [PRETRAINED_PATH] [
|
|||
bash run_eval_ascend.sh [IMGS_PATH] [ANNOS_PATH] [CHECKPOINT_PATH] [COCO_TEXT_PARSER_PATH] [DEVICE_ID]
|
||||
```
|
||||
|
||||
- GPU:
|
||||
|
||||
```bash
|
||||
# distribute training example(8p)
|
||||
sh run_distribute_train_gpu.sh [IMGS_PATH] [ANNOS_PATH] [PRETRAINED_PATH] [COCO_TEXT_PARSER_PATH]
|
||||
# standalone training
|
||||
sh run_standalone_train_gpu.sh [IMGS_PATH] [ANNOS_PATH] [PRETRAINED_PATH] [COCO_TEXT_PARSER_PATH] [DEVICE_ID]
|
||||
# evaluation:
|
||||
sh run_eval_gpu.sh [IMGS_PATH] [ANNOS_PATH] [CHECKPOINT_PATH] [COCO_TEXT_PARSER_PATH] [DEVICE_ID]
|
||||
```
|
||||
|
||||
> Notes:
|
||||
> RANK_TABLE_FILE can refer to [Link](https://www.mindspore.cn/docs/programming_guide/en/master/distributed_training_ascend.html) , and the device_ip can be got as [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools). For large models like InceptionV4, it's better to export an external environment variable `export HCCL_CONNECT_TIMEOUT=600` to extend hccl connection checking time from the default 120 seconds to 600 seconds. Otherwise, the connection could be timeout since compiling time increases with the growth of model size.
|
||||
>
|
||||
|
@ -287,6 +302,14 @@ Evaluation result will be stored in the example path, you can find result like t
|
|||
class 1 precision is 88.01%, recall is 82.77%
|
||||
```
|
||||
|
||||
Evaluation result on GPU will be as follows:
|
||||
|
||||
```python
|
||||
========================================
|
||||
|
||||
class 1 precision is 84.49%, recall is 88.28%
|
||||
```
|
||||
|
||||
## Model Export
|
||||
|
||||
```shell
|
||||
|
@ -322,34 +345,34 @@ class 1 precision is 84.24%, recall is 87.40%, F1 is 85.79%
|
|||
|
||||
### Training Performance
|
||||
|
||||
| Parameters | Ascend |
|
||||
| -------------------------- | ------------------------------------------------------------ |
|
||||
| Model Version | Deeptext |
|
||||
| Resource | Ascend 910; cpu 2.60GHz, 192cores; memory 755G; OS Euler2.8 |
|
||||
| uploaded Date | 12/26/2020 |
|
||||
| MindSpore Version | 1.1.0 |
|
||||
| Dataset | 66040 images |
|
||||
| Batch_size | 2 |
|
||||
| Training Parameters | src/config.py |
|
||||
| Optimizer | Momentum |
|
||||
| Loss Function | SoftmaxCrossEntropyWithLogits for classification, SmoothL2Loss for bbox regression|
|
||||
| Loss | ~0.008 |
|
||||
| Total time (8p) | 4h |
|
||||
| Scripts | [deeptext script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/deeptext) |
|
||||
| Parameters | Ascend | GPU |
|
||||
| -------------------------- | --------------------------------------------------------------------------------------------------- |--------------------------------------- |
|
||||
| Model Version | Deeptext | Deeptext |
|
||||
| Resource | Ascend 910; cpu 2.60GHz, 192cores; memory 755G; OS Euler2.8 | Tesla V100 PCIe 32GB; CPU 2.70GHz; 52cores; Memory 1510G; OS Ubuntu 18.04.5 |
|
||||
| uploaded Date | 12/26/2020 | 7/29/2021 (month/day/year) |
|
||||
| MindSpore Version | 1.1.0 | 1.3.0 |
|
||||
| Dataset | 66040 images | 66040 images |
|
||||
| Batch_size | 2 | 2 |
|
||||
| Training Parameters | src/config.py | src/config.py |
|
||||
| Optimizer | Momentum | Momentum |
|
||||
| Loss Function | SoftmaxCrossEntropyWithLogits for classification, SmoothL2Loss for bbox regression | SoftmaxCrossEntropyWithLogits for classification, SmoothL2Loss for bbox regression |
|
||||
| Loss | ~0.008 | ~0.116 |
|
||||
| Total time (8p) | 4h | 9h |
|
||||
| Scripts | [deeptext script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/deeptext) | [deeptext script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/deeptext) |
|
||||
|
||||
#### Inference Performance
|
||||
|
||||
| Parameters | Ascend |
|
||||
| ------------------- | --------------------------- |
|
||||
| Model Version | Deeptext |
|
||||
| Resource | Ascend 910; cpu 2.60GHz, 192cores; memory 755G; OS Euler2.8 |
|
||||
| Uploaded Date | 12/26/2020 |
|
||||
| MindSpore Version | 1.1.0 |
|
||||
| Dataset | 229 images |
|
||||
| Batch_size | 2 |
|
||||
| Accuracy | F1 score is 84.50% |
|
||||
| Total time | 1 min |
|
||||
| Model for inference | 3492M (.ckpt file) |
|
||||
| Parameters | Ascend | GPU |
|
||||
| ------------------- | -------------------------------------------------------------| --------------------------- |
|
||||
| Model Version | Deeptext | Deeptext
|
||||
| Resource | Ascend 910; cpu 2.60GHz, 192cores; memory 755G; OS Euler2.8 | Tesla V100 PCIe 32GB; CPU 2.70GHz; 52cores; Memory 1510G; OS Ubuntu 18.04.5 |
|
||||
| Uploaded Date | 12/26/2020 | 7/29/2021 (month/day/year) |
|
||||
| MindSpore Version | 1.1.0 | 1.3.0 |
|
||||
| Dataset | 229 images | 229 images |
|
||||
| Batch_size | 2 | 2 |
|
||||
| Accuracy | F1 score is 84.50% | F1 score is 86.34% |
|
||||
| Total time | 1 min | 1 min |
|
||||
| Model for inference | 3492M (.ckpt file) | 3492M (.ckpt) |
|
||||
|
||||
#### Training performance results
|
||||
|
||||
|
@ -359,7 +382,15 @@ class 1 precision is 84.24%, recall is 87.40%, F1 is 85.79%
|
|||
|
||||
| **Ascend** | train performance |
|
||||
| :--------: | :---------------: |
|
||||
| 8p | 50 img/s |
|
||||
| 8p | 50 img/s |
|
||||
|
||||
| **GPU** | train performance |
|
||||
| :---------: | :---------------: |
|
||||
| 1p | 5 img/s |
|
||||
|
||||
| **GPU** | train performance |
|
||||
| :---------: | :-----------------: |
|
||||
| 8p | 25 img/s |
|
||||
|
||||
# [Description of Random Situation](#contents)
|
||||
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
if [ $# -ne 4 ]
|
||||
then
|
||||
echo "Usage: sh run_distribute_train_gpu.sh [IMGS_PATH] [ANNOS_PATH] [PRETRAINED_PATH] [COCO_TEXT_PARSER_PATH]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
get_real_path(){
|
||||
if [ "${1:0:1}" == "/" ]; then
|
||||
echo "$1"
|
||||
else
|
||||
echo "$(realpath -m $PWD/$1)"
|
||||
fi
|
||||
}
|
||||
|
||||
PATH1=$(get_real_path $1)
|
||||
echo $PATH1
|
||||
PATH2=$(get_real_path $2)
|
||||
echo $PATH2
|
||||
PATH3=$(get_real_path $3)
|
||||
echo $PATH3
|
||||
PATH4=$(get_real_path $4)
|
||||
echo $PATH4
|
||||
|
||||
if [ ! -f $PATH3 ]
|
||||
then
|
||||
echo "error: PRETRAINED_PATH=$PATH3 is not a file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f $PATH4 ]
|
||||
then
|
||||
echo "error: COCO_TEXT_PARSER_PATH=$PATH4 is not a file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
rm -rf ./train_parallel
|
||||
mkdir ./train_parallel
|
||||
cp ../*.py ./train_parallel
|
||||
cp ../*.yaml ./train_parallel
|
||||
cp *.sh ./train_parallel
|
||||
cp -r ../src ./train_parallel
|
||||
cp -r ../model_utils ./train_parallel
|
||||
cd ./train_parallel || exit
|
||||
|
||||
export RANK_SIZE=8
|
||||
cp $PATH4 ../src/
|
||||
|
||||
echo "======start training======"
|
||||
|
||||
mpirun -n $RANK_SIZE python train.py --imgs_path=$PATH1 --annos_path=$PATH2 --run_distribute=True --device_target="GPU" --pre_trained=$PATH3 > log &
|
||||
cd ..
|
|
@ -0,0 +1,70 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
if [ $# != 5 ]
|
||||
then
|
||||
echo "Usage: sh run_eval_gpu.sh [IMGS_PATH] [ANNOS_PATH] [CHECKPOINT_PATH] [COCO_TEXT_PARSER_PATH] [DEVICE_ID]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
get_real_path(){
|
||||
if [ "${1:0:1}" == "/" ]; then
|
||||
echo "$1"
|
||||
else
|
||||
echo "$(realpath -m $PWD/$1)"
|
||||
fi
|
||||
}
|
||||
PATH1=$(get_real_path $1)
|
||||
PATH2=$(get_real_path $2)
|
||||
PATH3=$(get_real_path $3)
|
||||
PATH4=$(get_real_path $4)
|
||||
echo $PATH1
|
||||
echo $PATH2
|
||||
echo $PATH3
|
||||
echo $PATH4
|
||||
|
||||
|
||||
if [ ! -f $PATH3 ]
|
||||
then
|
||||
echo "error: CHECKPOINT_PATH=$PATH3 is not a file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f $PATH4 ]
|
||||
then
|
||||
echo "error: COCO_TEXT_PARSER_PATH=$PATH4 is not a file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
export DEVICE_NUM=1
|
||||
export RANK_SIZE=$DEVICE_NUM
|
||||
export RANK_ID=0
|
||||
cp $PATH4 ../src/
|
||||
if [ -d "eval" ];
|
||||
then
|
||||
rm -rf ./eval
|
||||
fi
|
||||
mkdir ./eval
|
||||
cp ../*.py ./eval
|
||||
cp ../*.yaml ./eval
|
||||
cp *.sh ./eval
|
||||
cp -r ../src ./eval
|
||||
cp -r ../model_utils ./eval
|
||||
cd ./eval || exit
|
||||
env > env.log
|
||||
echo "start eval for device $DEVICE_ID"
|
||||
CUDA_VISIBLE_DEVICE=$DEVICE_ID python eval.py --imgs_path=$PATH1 --annos_path=$PATH2 --checkpoint_path=$PATH3 &> log &
|
||||
cd ..
|
|
@ -0,0 +1,66 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
if [ $# -ne 5 ]
|
||||
then
|
||||
echo "Usage: sh run_distribute_train_gpu.sh [IMGS_PATH] [ANNOS_PATH] [PRETRAINED_PATH] [COCO_TEXT_PARSER_PATH] [DEVICE_ID]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
get_real_path(){
|
||||
if [ "${1:0:1}" == "/" ]; then
|
||||
echo "$1"
|
||||
else
|
||||
echo "$(realpath -m $PWD/$1)"
|
||||
fi
|
||||
}
|
||||
|
||||
PATH1=$(get_real_path $1)
|
||||
echo $PATH1
|
||||
PATH2=$(get_real_path $2)
|
||||
echo $PATH2
|
||||
PATH3=$(get_real_path $3)
|
||||
echo $PATH3
|
||||
PATH4=$(get_real_path $4)
|
||||
echo $PATH4
|
||||
|
||||
if [ ! -f $PATH3 ]
|
||||
then
|
||||
echo "error: PRETRAINED_PATH=$PATH3 is not a file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f $PATH4 ]
|
||||
then
|
||||
echo "error: COCO_TEXT_PARSER_PATH=$PATH4 is not a file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
rm -rf ./train
|
||||
mkdir ./train
|
||||
cp ../*.py ./train
|
||||
cp ../*.yaml ./train
|
||||
cp *.sh ./train
|
||||
cp -r ../src ./train
|
||||
cp -r ../model_utils ./train
|
||||
cd ./train_parallel || exit
|
||||
|
||||
export RANK_SIZE=1
|
||||
cp $PATH4 ../src/
|
||||
|
||||
echo "======start training======"
|
||||
|
||||
CUDA_VISIBLE_DEVICE=$DEVICE_ID python train.py --imgs_path=$PATH1 --annos_path=$PATH2 --run_distribute=False --device_target="GPU" --pre_trained=$PATH3 > log &
|
|
@ -19,9 +19,6 @@ import mindspore.nn as nn
|
|||
import mindspore.common.dtype as mstype
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore import Tensor
|
||||
from mindspore import context
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
|
||||
|
||||
|
||||
class Proposal(nn.Cell):
|
||||
|
|
|
@ -21,7 +21,12 @@ from mindspore.ops import operations as P
|
|||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.common.initializer import initializer
|
||||
from mindspore.common.parameter import Parameter
|
||||
from model_utils.config import config as default_config
|
||||
|
||||
if default_config.export_device_target == "Ascend":
|
||||
mtype = mstype.float16
|
||||
else:
|
||||
mtype = mstype.float32
|
||||
|
||||
class DenseNoTranpose(nn.Cell):
|
||||
"""Dense method"""
|
||||
|
@ -38,8 +43,8 @@ class DenseNoTranpose(nn.Cell):
|
|||
self.cast = P.Cast()
|
||||
|
||||
def construct(self, x):
|
||||
x = self.cast(x, mstype.float16)
|
||||
weight = self.cast(self.weight, mstype.float16)
|
||||
x = self.cast(x, mtype)
|
||||
weight = self.cast(self.weight, mtype)
|
||||
output = self.bias_add(self.matmul(x, weight), self.bias)
|
||||
return output
|
||||
|
||||
|
|
|
@ -29,9 +29,9 @@ from model_utils.moxing_adapter import moxing_wrapper
|
|||
from model_utils.device_adapter import get_device_id, get_device_num, get_rank_id
|
||||
|
||||
import mindspore.common.dtype as mstype
|
||||
from mindspore import context, Tensor
|
||||
from mindspore import context, Tensor, Parameter
|
||||
from mindspore.common import set_seed
|
||||
from mindspore.communication.management import init
|
||||
from mindspore.communication.management import init, get_group_size, get_rank
|
||||
from mindspore.context import ParallelMode
|
||||
from mindspore.nn import Momentum
|
||||
from mindspore.train import Model
|
||||
|
@ -42,7 +42,8 @@ np.set_printoptions(threshold=np.inf)
|
|||
|
||||
set_seed(1)
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=get_device_id())
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, device_id=get_device_id())
|
||||
|
||||
|
||||
def modelarts_pre_process():
|
||||
'''modelarts pre process function.'''
|
||||
|
@ -54,8 +55,7 @@ def modelarts_pre_process():
|
|||
if zip_isexist:
|
||||
fz = zipfile.ZipFile(zip_file, 'r')
|
||||
data_num = len(fz.namelist())
|
||||
print("Extract Start...", flush=True)
|
||||
print("unzip file num: {}".format(data_num), flush=True)
|
||||
print("Extract Start. unzip file num: {}".format(data_num), flush=True)
|
||||
data_print = int(data_num / 100) if data_num > 100 else 1
|
||||
i = 0
|
||||
for file in fz.namelist():
|
||||
|
@ -100,12 +100,21 @@ def modelarts_pre_process():
|
|||
|
||||
@moxing_wrapper(pre_process=modelarts_pre_process)
|
||||
def run_train():
|
||||
device_type = "Ascend" if context.get_context("device_target") == "Ascend" else "GPU"
|
||||
if config.run_distribute:
|
||||
rank = get_rank_id()
|
||||
device_num = get_device_num()
|
||||
if device_type == "Ascend":
|
||||
rank = get_rank_id()
|
||||
device_num = get_device_num()
|
||||
|
||||
else:
|
||||
context.reset_auto_parallel_context()
|
||||
rank = get_rank()
|
||||
device_num = get_group_size()
|
||||
|
||||
context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
|
||||
gradients_mean=True)
|
||||
init()
|
||||
|
||||
else:
|
||||
rank = get_rank_id()
|
||||
device_num = 1
|
||||
|
@ -151,9 +160,13 @@ def run_train():
|
|||
load_path = config.pre_trained
|
||||
if load_path != "":
|
||||
param_dict = load_checkpoint(load_path)
|
||||
if device_type == "GPU":
|
||||
print("Converting pretrained checkpoint from fp16 to fp32", flush=True)
|
||||
for key, value in param_dict.items():
|
||||
tensor = value.asnumpy().astype(np.float32)
|
||||
param_dict[key] = Parameter(tensor, key)
|
||||
load_param_into_net(net, param_dict)
|
||||
|
||||
device_type = "Ascend" if context.get_context("device_target") == "Ascend" else "Others"
|
||||
if device_type == "Ascend":
|
||||
net.to_float(mstype.float16)
|
||||
|
||||
|
|
Loading…
Reference in New Issue