forked from mindspore-Ecosystem/mindspore
fcn-4 gpu compatibility
This commit is contained in:
parent
1b91c025fe
commit
2e00d43f45
|
@ -188,6 +188,8 @@ SLOG_PRINT_TO_STDOUT=1 python eval.py --device_id 0
|
|||
│ ├──run_train.sh // shell script for distributed on Ascend
|
||||
│ ├──run_eval.sh // shell script for evaluation on Ascend
|
||||
│ ├──run_process_data.sh // shell script for convert audio clips to mindrecord
|
||||
│ ├──run_train_gpu.sh // shell script for distributed on GPU
|
||||
│ ├──run_eval_gpu.sh // shell script for evaluation on GPU
|
||||
├── src
|
||||
│ ├──dataset.py // creating dataset
|
||||
│ ├──pre_process_data.py // pre-process dataset
|
||||
|
@ -253,7 +255,13 @@ Parameters for both training and evaluation can be set in default_config.yaml
|
|||
- running on Ascend
|
||||
|
||||
```shell
|
||||
python train.py > train.log 2>&1 &
|
||||
python train.py --device_target Ascend > train.log 2>&1 &
|
||||
```
|
||||
|
||||
- running on GPU
|
||||
|
||||
```shell
|
||||
python train.py --device_target GPU --data_dir [dataset dir path] --checkpoint_path [chekpoint save dir] > train.log 2>&1 &
|
||||
```
|
||||
|
||||
The python command above will run in the background, you can view the results through the file `train.log`.
|
||||
|
@ -310,21 +318,21 @@ AUC: 0.90995
|
|||
|
||||
#### Evaluation Performance
|
||||
|
||||
| Parameters | Ascend |
|
||||
| -------------------------- | ----------------------------------------------------------- |
|
||||
| Model Version | FCN-4 |
|
||||
| Resource | Ascend 910; CPU 2.60GHz, 56cores; Memory 314G; OS Euler2.8 |
|
||||
| uploaded Date | 07/05/2021 (month/day/year) |
|
||||
| MindSpore Version | 1.3.0 |
|
||||
| Training Parameters | epoch=10, steps=534, batch_size = 32, lr=0.005 |
|
||||
| Optimizer | Adam |
|
||||
| Loss Function | Binary cross entropy |
|
||||
| outputs | probability |
|
||||
| Loss | AUC 0.909 |
|
||||
| Speed | 1pc: 160 samples/sec; |
|
||||
| Total time | 1pc: 20 mins; |
|
||||
| Checkpoint for Fine tuning | 198.73M(.ckpt file) |
|
||||
| Scripts | [music_auto_tagging script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/audio/fcn-4) |
|
||||
| Parameters | Ascend | GPU |
|
||||
| -------------------------- | ----------------------------------------------------------- | ----------------------------------------------------------- |
|
||||
| Model Version | FCN-4 | FCN-4 |
|
||||
| Resource | Ascend 910; CPU 2.60GHz, 56cores; Memory 314G; OS Euler2.8 | Tesla V100-PICE-32G |
|
||||
| uploaded Date | 07/05/2021 (month/day/year) | 07/26/2021 (month/day/year) |
|
||||
| MindSpore Version | 1.3.0 | 1.3.0 |
|
||||
| Training Parameters | epoch=10, steps=534, batch_size = 32, lr=0.005 | epoch=10, steps=534, batch_size = 32, lr=0.005 |
|
||||
| Optimizer | Adam | Adam |
|
||||
| Loss Function | Binary cross entropy | Binary cross entropy |
|
||||
| outputs | probability | probability |
|
||||
| Loss | AUC 0.909 | AUC 0.909 |
|
||||
| Speed | 1pc: 160 samples/sec; | 1pc: 160 samples/sec; |
|
||||
| Total time | 1pc: 20 mins; | 1pc: 20 mins; |
|
||||
| Checkpoint for Fine tuning | 198.73M(.ckpt file) | 198.73M(.ckpt file) |
|
||||
| Scripts | [music_auto_tagging script](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/audio/fcn-4) |
|
||||
|
||||
## [ModelZoo Homepage](#contents)
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ checkpoint_url: ""
|
|||
data_path: "/cache/data"
|
||||
output_path: "/cache/train"
|
||||
load_path: "/cache/checkpoint_path"
|
||||
device_target: Ascend
|
||||
device_target: "Ascend"
|
||||
enable_profiling: False
|
||||
|
||||
# ==============================================================================
|
||||
|
|
|
@ -18,13 +18,11 @@ python eval.py
|
|||
'''
|
||||
|
||||
import numpy as np
|
||||
|
||||
from src.model_utils.config import config
|
||||
from src.model_utils.moxing_adapter import moxing_wrapper
|
||||
from src.model_utils.device_adapter import get_device_id
|
||||
from src.musictagger import MusicTaggerCNN
|
||||
from src.dataset import create_dataset
|
||||
|
||||
import mindspore.common.dtype as mstype
|
||||
from mindspore import context
|
||||
from mindspore import Tensor
|
||||
|
@ -113,12 +111,15 @@ def validation(net, model_path, data_dir, filename, num_consumer, batch):
|
|||
def modelarts_process():
|
||||
pass
|
||||
|
||||
|
||||
@moxing_wrapper(pre_process=modelarts_process)
|
||||
def fcn4_eval():
|
||||
"""
|
||||
eval network
|
||||
"""
|
||||
context.set_context(device_target=config.device_target, mode=context.GRAPH_MODE, device_id=get_device_id())
|
||||
context.set_context(device_target=config.device_target, mode=context.GRAPH_MODE)
|
||||
if config.device_target == 'Ascend':
|
||||
context.set_context(device_id=get_device_id())
|
||||
|
||||
network = MusicTaggerCNN(in_classes=[1, 128, 384, 768, 2048],
|
||||
kernel_size=[3, 3, 3, 3, 3],
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
echo "run as sh run_train_gpu.sh [CUDA_VISIBLE_DEVICES] [DATA_PATH] [CKPT_PATH]"
|
||||
echo "for example sh run_train_gpu.sh 0 /home/dataset/Music-Tagging /home/fcn-4/"
|
||||
|
||||
export CUDA_VISIBLE_DEVICES=$1
|
||||
DATA_PATH=$2
|
||||
CKPT_PATH=$3
|
||||
export SLOG_PRINT_TO_STDOUT=1
|
||||
|
||||
rm -rf eval_gpu
|
||||
mkdir eval_gpu
|
||||
|
||||
python ../eval.py --data_dir=$DATA_PATH --checkpoint_path=$CKPT_PATH --device_target=GPU > eval_gpu/eval.log 2>&1 &
|
|
@ -0,0 +1,37 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
echo "run as sh run_train_gpu.sh [CUDA_VISIBLE_DEVICES] [DATA_PATH] [CKPT_PATH(options)]"
|
||||
echo "for example sh run_train_gpu.sh 0 /home/dataset/Music-Tagging /home/fcn-4/(options)"
|
||||
|
||||
export CUDA_VISIBLE_DEVICES=$1
|
||||
DATA_PATH=$2
|
||||
CKPT_PATH="./"
|
||||
PRE_TRAINED=False
|
||||
export SLOG_PRINT_TO_STDOUT=1
|
||||
|
||||
if [ $# == 3 ]
|
||||
then
|
||||
CKPT_PATH=$3
|
||||
PRE_TRAINED=True
|
||||
fi
|
||||
|
||||
rm -rf train_gpu
|
||||
mkdir train_gpu
|
||||
|
||||
echo "start training"
|
||||
python ../train.py --data_dir=$DATA_PATH --checkpoint_path=$CKPT_PATH \
|
||||
--pre_trained=$PRE_TRAINED \
|
||||
--device_target=GPU > train_gpu/train.log 2>&1 &
|
|
@ -124,4 +124,5 @@ def get_config():
|
|||
final_config = merge(args, default)
|
||||
return Config(final_config)
|
||||
|
||||
|
||||
config = get_config()
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
##############train models#################
|
||||
python train.py
|
||||
'''
|
||||
|
||||
import os
|
||||
from mindspore import context, nn
|
||||
from mindspore.train import Model
|
||||
from mindspore.common import set_seed
|
||||
|
@ -35,6 +35,7 @@ from src.loss import BCELoss
|
|||
def modelarts_pre_process():
|
||||
pass
|
||||
|
||||
|
||||
@moxing_wrapper(pre_process=modelarts_pre_process)
|
||||
def train(model, dataset_direct, filename, columns_list, num_consumer=4,
|
||||
batch=16, epoch=50, save_checkpoint_steps=2172, keep_checkpoint_max=50,
|
||||
|
@ -58,8 +59,12 @@ def train(model, dataset_direct, filename, columns_list, num_consumer=4,
|
|||
if __name__ == "__main__":
|
||||
set_seed(1)
|
||||
|
||||
context.set_context(device_target='Ascend', mode=context.GRAPH_MODE, device_id=get_device_id())
|
||||
config.checkpoint_path = os.path.abspath(config.checkpoint_path)
|
||||
context.set_context(device_target=config.device_target, mode=context.GRAPH_MODE)
|
||||
context.set_context(enable_auto_mixed_precision=config.mixed_precision)
|
||||
if config.device_target == 'Ascend':
|
||||
context.set_context(device_id=get_device_id())
|
||||
|
||||
network = MusicTaggerCNN(in_classes=[1, 128, 384, 768, 2048],
|
||||
kernel_size=[3, 3, 3, 3, 3],
|
||||
padding=[0] * 5,
|
||||
|
|
Loading…
Reference in New Issue