fcn-4 gpu compatibility

This commit is contained in:
maijianqiang 2021-08-10 09:23:27 +08:00
parent 1b91c025fe
commit 2e00d43f45
7 changed files with 101 additions and 22 deletions

View File

@ -188,6 +188,8 @@ SLOG_PRINT_TO_STDOUT=1 python eval.py --device_id 0
│ ├──run_train.sh // shell script for distributed on Ascend
│ ├──run_eval.sh // shell script for evaluation on Ascend
│ ├──run_process_data.sh // shell script for convert audio clips to mindrecord
│ ├──run_train_gpu.sh // shell script for distributed on GPU
│ ├──run_eval_gpu.sh // shell script for evaluation on GPU
├── src
│ ├──dataset.py // creating dataset
│ ├──pre_process_data.py // pre-process dataset
@ -253,7 +255,13 @@ Parameters for both training and evaluation can be set in default_config.yaml
- running on Ascend
```shell
python train.py > train.log 2>&1 &
python train.py --device_target Ascend > train.log 2>&1 &
```
- running on GPU
```shell
python train.py --device_target GPU --data_dir [dataset dir path] --checkpoint_path [chekpoint save dir] > train.log 2>&1 &
```
The python command above will run in the background, you can view the results through the file `train.log`.
@ -310,21 +318,21 @@ AUC: 0.90995
#### Evaluation Performance
| Parameters | Ascend |
| -------------------------- | ----------------------------------------------------------- |
| Model Version | FCN-4 |
| Resource | Ascend 910; CPU 2.60GHz, 56cores; Memory 314G; OS Euler2.8 |
| uploaded Date | 07/05/2021 (month/day/year) |
| MindSpore Version | 1.3.0 |
| Training Parameters | epoch=10, steps=534, batch_size = 32, lr=0.005 |
| Optimizer | Adam |
| Loss Function | Binary cross entropy |
| outputs | probability |
| Loss | AUC 0.909 |
| Speed | 1pc: 160 samples/sec; |
| Total time | 1pc: 20 mins; |
| Checkpoint for Fine tuning | 198.73M(.ckpt file) |
| Scripts | [music_auto_tagging script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/audio/fcn-4) |
| Parameters | Ascend | GPU |
| -------------------------- | ----------------------------------------------------------- | ----------------------------------------------------------- |
| Model Version | FCN-4 | FCN-4 |
| Resource | Ascend 910; CPU 2.60GHz, 56cores; Memory 314G; OS Euler2.8 | Tesla V100-PICE-32G |
| uploaded Date | 07/05/2021 (month/day/year) | 07/26/2021 (month/day/year) |
| MindSpore Version | 1.3.0 | 1.3.0 |
| Training Parameters | epoch=10, steps=534, batch_size = 32, lr=0.005 | epoch=10, steps=534, batch_size = 32, lr=0.005 |
| Optimizer | Adam | Adam |
| Loss Function | Binary cross entropy | Binary cross entropy |
| outputs | probability | probability |
| Loss | AUC 0.909 | AUC 0.909 |
| Speed | 1pc: 160 samples/sec; | 1pc: 160 samples/sec; |
| Total time | 1pc: 20 mins; | 1pc: 20 mins; |
| Checkpoint for Fine tuning | 198.73M(.ckpt file) | 198.73M(.ckpt file) |
| Scripts | [music_auto_tagging script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/audio/fcn-4) |
## [ModelZoo Homepage](#contents)

View File

@ -6,7 +6,7 @@ checkpoint_url: ""
data_path: "/cache/data"
output_path: "/cache/train"
load_path: "/cache/checkpoint_path"
device_target: Ascend
device_target: "Ascend"
enable_profiling: False
# ==============================================================================

View File

@ -18,13 +18,11 @@ python eval.py
'''
import numpy as np
from src.model_utils.config import config
from src.model_utils.moxing_adapter import moxing_wrapper
from src.model_utils.device_adapter import get_device_id
from src.musictagger import MusicTaggerCNN
from src.dataset import create_dataset
import mindspore.common.dtype as mstype
from mindspore import context
from mindspore import Tensor
@ -113,12 +111,15 @@ def validation(net, model_path, data_dir, filename, num_consumer, batch):
def modelarts_process():
pass
@moxing_wrapper(pre_process=modelarts_process)
def fcn4_eval():
"""
eval network
"""
context.set_context(device_target=config.device_target, mode=context.GRAPH_MODE, device_id=get_device_id())
context.set_context(device_target=config.device_target, mode=context.GRAPH_MODE)
if config.device_target == 'Ascend':
context.set_context(device_id=get_device_id())
network = MusicTaggerCNN(in_classes=[1, 128, 384, 768, 2048],
kernel_size=[3, 3, 3, 3, 3],

View File

@ -0,0 +1,27 @@
#!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
echo "run as sh run_train_gpu.sh [CUDA_VISIBLE_DEVICES] [DATA_PATH] [CKPT_PATH]"
echo "for example sh run_train_gpu.sh 0 /home/dataset/Music-Tagging /home/fcn-4/"
export CUDA_VISIBLE_DEVICES=$1
DATA_PATH=$2
CKPT_PATH=$3
export SLOG_PRINT_TO_STDOUT=1
rm -rf eval_gpu
mkdir eval_gpu
python ../eval.py --data_dir=$DATA_PATH --checkpoint_path=$CKPT_PATH --device_target=GPU > eval_gpu/eval.log 2>&1 &

View File

@ -0,0 +1,37 @@
#!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
echo "run as sh run_train_gpu.sh [CUDA_VISIBLE_DEVICES] [DATA_PATH] [CKPT_PATH(options)]"
echo "for example sh run_train_gpu.sh 0 /home/dataset/Music-Tagging /home/fcn-4/(options)"
export CUDA_VISIBLE_DEVICES=$1
DATA_PATH=$2
CKPT_PATH="./"
PRE_TRAINED=False
export SLOG_PRINT_TO_STDOUT=1
if [ $# == 3 ]
then
CKPT_PATH=$3
PRE_TRAINED=True
fi
rm -rf train_gpu
mkdir train_gpu
echo "start training"
python ../train.py --data_dir=$DATA_PATH --checkpoint_path=$CKPT_PATH \
--pre_trained=$PRE_TRAINED \
--device_target=GPU > train_gpu/train.log 2>&1 &

View File

@ -124,4 +124,5 @@ def get_config():
final_config = merge(args, default)
return Config(final_config)
config = get_config()

View File

@ -16,7 +16,7 @@
##############train models#################
python train.py
'''
import os
from mindspore import context, nn
from mindspore.train import Model
from mindspore.common import set_seed
@ -35,6 +35,7 @@ from src.loss import BCELoss
def modelarts_pre_process():
pass
@moxing_wrapper(pre_process=modelarts_pre_process)
def train(model, dataset_direct, filename, columns_list, num_consumer=4,
batch=16, epoch=50, save_checkpoint_steps=2172, keep_checkpoint_max=50,
@ -58,8 +59,12 @@ def train(model, dataset_direct, filename, columns_list, num_consumer=4,
if __name__ == "__main__":
set_seed(1)
context.set_context(device_target='Ascend', mode=context.GRAPH_MODE, device_id=get_device_id())
config.checkpoint_path = os.path.abspath(config.checkpoint_path)
context.set_context(device_target=config.device_target, mode=context.GRAPH_MODE)
context.set_context(enable_auto_mixed_precision=config.mixed_precision)
if config.device_target == 'Ascend':
context.set_context(device_id=get_device_id())
network = MusicTaggerCNN(in_classes=[1, 128, 384, 768, 2048],
kernel_size=[3, 3, 3, 3, 3],
padding=[0] * 5,