fcn-4 gpu compatibility

This commit is contained in:
maijianqiang 2021-08-10 09:23:27 +08:00
parent 1b91c025fe
commit 2e00d43f45
7 changed files with 101 additions and 22 deletions

View File

@ -188,6 +188,8 @@ SLOG_PRINT_TO_STDOUT=1 python eval.py --device_id 0
│ ├──run_train.sh // shell script for distributed on Ascend │ ├──run_train.sh // shell script for distributed on Ascend
│ ├──run_eval.sh // shell script for evaluation on Ascend │ ├──run_eval.sh // shell script for evaluation on Ascend
│ ├──run_process_data.sh // shell script for convert audio clips to mindrecord │ ├──run_process_data.sh // shell script for convert audio clips to mindrecord
│ ├──run_train_gpu.sh // shell script for distributed on GPU
│ ├──run_eval_gpu.sh // shell script for evaluation on GPU
├── src ├── src
│ ├──dataset.py // creating dataset │ ├──dataset.py // creating dataset
│ ├──pre_process_data.py // pre-process dataset │ ├──pre_process_data.py // pre-process dataset
@ -253,7 +255,13 @@ Parameters for both training and evaluation can be set in default_config.yaml
- running on Ascend - running on Ascend
```shell ```shell
python train.py > train.log 2>&1 & python train.py --device_target Ascend > train.log 2>&1 &
```
- running on GPU
```shell
python train.py --device_target GPU --data_dir [dataset dir path] --checkpoint_path [chekpoint save dir] > train.log 2>&1 &
``` ```
The python command above will run in the background, you can view the results through the file `train.log`. The python command above will run in the background, you can view the results through the file `train.log`.
@ -310,20 +318,20 @@ AUC: 0.90995
#### Evaluation Performance #### Evaluation Performance
| Parameters | Ascend | | Parameters | Ascend | GPU |
| -------------------------- | ----------------------------------------------------------- | | -------------------------- | ----------------------------------------------------------- | ----------------------------------------------------------- |
| Model Version | FCN-4 | | Model Version | FCN-4 | FCN-4 |
| Resource | Ascend 910; CPU 2.60GHz, 56cores; Memory 314G; OS Euler2.8 | | Resource | Ascend 910; CPU 2.60GHz, 56cores; Memory 314G; OS Euler2.8 | Tesla V100-PICE-32G |
| uploaded Date | 07/05/2021 (month/day/year) | | uploaded Date | 07/05/2021 (month/day/year) | 07/26/2021 (month/day/year) |
| MindSpore Version | 1.3.0 | | MindSpore Version | 1.3.0 | 1.3.0 |
| Training Parameters | epoch=10, steps=534, batch_size = 32, lr=0.005 | | Training Parameters | epoch=10, steps=534, batch_size = 32, lr=0.005 | epoch=10, steps=534, batch_size = 32, lr=0.005 |
| Optimizer | Adam | | Optimizer | Adam | Adam |
| Loss Function | Binary cross entropy | | Loss Function | Binary cross entropy | Binary cross entropy |
| outputs | probability | | outputs | probability | probability |
| Loss | AUC 0.909 | | Loss | AUC 0.909 | AUC 0.909 |
| Speed | 1pc: 160 samples/sec; | | Speed | 1pc: 160 samples/sec; | 1pc: 160 samples/sec; |
| Total time | 1pc: 20 mins; | | Total time | 1pc: 20 mins; | 1pc: 20 mins; |
| Checkpoint for Fine tuning | 198.73M(.ckpt file) | | Checkpoint for Fine tuning | 198.73M(.ckpt file) | 198.73M(.ckpt file) |
| Scripts | [music_auto_tagging script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/audio/fcn-4) | | Scripts | [music_auto_tagging script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/audio/fcn-4) |
## [ModelZoo Homepage](#contents) ## [ModelZoo Homepage](#contents)

View File

@ -6,7 +6,7 @@ checkpoint_url: ""
data_path: "/cache/data" data_path: "/cache/data"
output_path: "/cache/train" output_path: "/cache/train"
load_path: "/cache/checkpoint_path" load_path: "/cache/checkpoint_path"
device_target: Ascend device_target: "Ascend"
enable_profiling: False enable_profiling: False
# ============================================================================== # ==============================================================================

View File

@ -18,13 +18,11 @@ python eval.py
''' '''
import numpy as np import numpy as np
from src.model_utils.config import config from src.model_utils.config import config
from src.model_utils.moxing_adapter import moxing_wrapper from src.model_utils.moxing_adapter import moxing_wrapper
from src.model_utils.device_adapter import get_device_id from src.model_utils.device_adapter import get_device_id
from src.musictagger import MusicTaggerCNN from src.musictagger import MusicTaggerCNN
from src.dataset import create_dataset from src.dataset import create_dataset
import mindspore.common.dtype as mstype import mindspore.common.dtype as mstype
from mindspore import context from mindspore import context
from mindspore import Tensor from mindspore import Tensor
@ -113,12 +111,15 @@ def validation(net, model_path, data_dir, filename, num_consumer, batch):
def modelarts_process(): def modelarts_process():
pass pass
@moxing_wrapper(pre_process=modelarts_process) @moxing_wrapper(pre_process=modelarts_process)
def fcn4_eval(): def fcn4_eval():
""" """
eval network eval network
""" """
context.set_context(device_target=config.device_target, mode=context.GRAPH_MODE, device_id=get_device_id()) context.set_context(device_target=config.device_target, mode=context.GRAPH_MODE)
if config.device_target == 'Ascend':
context.set_context(device_id=get_device_id())
network = MusicTaggerCNN(in_classes=[1, 128, 384, 768, 2048], network = MusicTaggerCNN(in_classes=[1, 128, 384, 768, 2048],
kernel_size=[3, 3, 3, 3, 3], kernel_size=[3, 3, 3, 3, 3],

View File

@ -0,0 +1,27 @@
#!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
echo "run as sh run_train_gpu.sh [CUDA_VISIBLE_DEVICES] [DATA_PATH] [CKPT_PATH]"
echo "for example sh run_train_gpu.sh 0 /home/dataset/Music-Tagging /home/fcn-4/"
export CUDA_VISIBLE_DEVICES=$1
DATA_PATH=$2
CKPT_PATH=$3
export SLOG_PRINT_TO_STDOUT=1
rm -rf eval_gpu
mkdir eval_gpu
python ../eval.py --data_dir=$DATA_PATH --checkpoint_path=$CKPT_PATH --device_target=GPU > eval_gpu/eval.log 2>&1 &

View File

@ -0,0 +1,37 @@
#!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
echo "run as sh run_train_gpu.sh [CUDA_VISIBLE_DEVICES] [DATA_PATH] [CKPT_PATH(options)]"
echo "for example sh run_train_gpu.sh 0 /home/dataset/Music-Tagging /home/fcn-4/(options)"
export CUDA_VISIBLE_DEVICES=$1
DATA_PATH=$2
CKPT_PATH="./"
PRE_TRAINED=False
export SLOG_PRINT_TO_STDOUT=1
if [ $# == 3 ]
then
CKPT_PATH=$3
PRE_TRAINED=True
fi
rm -rf train_gpu
mkdir train_gpu
echo "start training"
python ../train.py --data_dir=$DATA_PATH --checkpoint_path=$CKPT_PATH \
--pre_trained=$PRE_TRAINED \
--device_target=GPU > train_gpu/train.log 2>&1 &

View File

@ -124,4 +124,5 @@ def get_config():
final_config = merge(args, default) final_config = merge(args, default)
return Config(final_config) return Config(final_config)
config = get_config() config = get_config()

View File

@ -16,7 +16,7 @@
##############train models################# ##############train models#################
python train.py python train.py
''' '''
import os
from mindspore import context, nn from mindspore import context, nn
from mindspore.train import Model from mindspore.train import Model
from mindspore.common import set_seed from mindspore.common import set_seed
@ -35,6 +35,7 @@ from src.loss import BCELoss
def modelarts_pre_process(): def modelarts_pre_process():
pass pass
@moxing_wrapper(pre_process=modelarts_pre_process) @moxing_wrapper(pre_process=modelarts_pre_process)
def train(model, dataset_direct, filename, columns_list, num_consumer=4, def train(model, dataset_direct, filename, columns_list, num_consumer=4,
batch=16, epoch=50, save_checkpoint_steps=2172, keep_checkpoint_max=50, batch=16, epoch=50, save_checkpoint_steps=2172, keep_checkpoint_max=50,
@ -58,8 +59,12 @@ def train(model, dataset_direct, filename, columns_list, num_consumer=4,
if __name__ == "__main__": if __name__ == "__main__":
set_seed(1) set_seed(1)
context.set_context(device_target='Ascend', mode=context.GRAPH_MODE, device_id=get_device_id()) config.checkpoint_path = os.path.abspath(config.checkpoint_path)
context.set_context(device_target=config.device_target, mode=context.GRAPH_MODE)
context.set_context(enable_auto_mixed_precision=config.mixed_precision) context.set_context(enable_auto_mixed_precision=config.mixed_precision)
if config.device_target == 'Ascend':
context.set_context(device_id=get_device_id())
network = MusicTaggerCNN(in_classes=[1, 128, 384, 768, 2048], network = MusicTaggerCNN(in_classes=[1, 128, 384, 768, 2048],
kernel_size=[3, 3, 3, 3, 3], kernel_size=[3, 3, 3, 3, 3],
padding=[0] * 5, padding=[0] * 5,