Merge pull request !18219 from huchunmei/lstm_
This commit is contained in:
i-robot 2021-06-16 15:04:22 +08:00 committed by Gitee
commit 3f778ed90c
27 changed files with 766 additions and 285 deletions

View File

@ -94,13 +94,21 @@ Note that you can run the scripts based on the dataset mentioned in original pap
   │   ├── run_eval_cpu.sh # shell script for evaluation on CPU
   │   ├── run_train_gpu.sh # shell script for training on GPU
   │   ├── run_train_ascend.sh # shell script for training on Ascend
   │   └── run_train_cpu.sh # shell script for training on CPU
   │   ├── run_train_cpu.sh # shell script for training on CPU
  │   └── run_infer_310.sh # shell script for infer310
   ├── src
   │   ├── config.py # parameter configuration
   │   ├── lstm.py # Sentiment model
   │   ├── dataset.py # dataset preprocess
   │   ├── imdb.py # imdb dataset read script
   │   ├── lr_schedule.py # dynamic_lr script
   │   └── lstm.py # Sentiment model
│ └─model_utils
│ ├── config.py # Processing configuration parameters
│ ├── device_adapter.py # Get cloud ID
│ ├── local_adapter.py # Get local ID
│ └── moxing_adapter.py # Parameter processing
├── default_config.yaml # Training parameter profile(cpu/gpu)
├── config_ascend.yaml # Training parameter profile(ascend)
├── config_ascend_8p.yaml # Training parameter profile(ascend_8p)
   ├── eval.py # evaluation script on GPU, CPU and Ascend
   └── train.py # training script on GPU, CPU and Ascend
```

View File

@ -101,13 +101,21 @@ LSTM模型包含嵌入层、编码器和解码器这几个模块编码器模
   │   ├── run_eval_cpu.sh # CPU评估shell脚本
   │   ├── run_train_ascend.sh # Ascend训练的shell脚本
   │   ├── run_train_gpu.sh # GPU训练的shell脚本
   │   └── run_train_cpu.sh # CPU训练的shell脚本
  │   ├── run_train_cpu.sh # CPU训练的shell脚本
   │   └── run_infer_310.sh # infer310的shell脚本
   ├── src
   │   ├── config.py # 参数配置
   │   ├── lstm.py # 情感模型
   │   ├── dataset.py # 数据集预处理
   │   ├── imdb.py # IMDB数据集读脚本
   │   ├── lr_schedule.py # 动态学习率脚步
   │   └── lstm.py # 情感模型
│ └── model_utils
│ ├── config.py # 获取.yaml配置参数
│ ├── device_adapter.py # 获取云上id
│ ├── local_adapter.py # 获取本地id
│ └── moxing_adapter.py # 云上数据准备
├── default_config.yaml # 训练配置参数(cpu/gpu)
├── config_ascend.yaml # 训练配置参数(ascend)
├── config_ascend_8p.yaml # 训练配置参数(ascend_8p)
   ├── eval.py # GPU、CPU和Ascend的评估脚本
   └── train.py # GPU、CPU和Ascend的训练脚本
```

View File

@ -0,0 +1,82 @@
# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
enable_modelarts: False
data_url: ""
train_url: ""
checkpoint_url: ""
data_path: "/cache/data"
output_path: "/cache/train"
load_path: "/cache/checkpoint_path"
checkpoint_path: './checkpoint/'
checkpoint_file: './checkpoint/lstm-20_390.ckpt'
device_target: Ascend
enable_profiling: False
# ==============================================================================
# LSTM CONFIG IN ASCEND for 1p training
num_classes: 2
momentum: 0.9
num_epochs: 20
batch_size: 64
embed_size: 300
num_hiddens: 128
num_layers: 2
bidirectional: True
save_checkpoint_steps: 7800
keep_checkpoint_max: 10
dynamic_lr: True
lr_init: 0.05
lr_end: 0.01
lr_max: 0.1
lr_adjust_epoch: 6
warmup_epochs: 1
global_step: 0
# MindSpore LSTM Example - train.py
preprocess: 'false'
aclimdb_path: "/cache/data/aclImdb"
glove_path: "/cache/data"
preprocess_path: "./preprocess"
ckpt_path: './ckpt_lstm/'
pre_trained: '' # None
device_num: 1
distribute: "false"
enable_graph_kernel: "true"
# export.py
ckpt_file: './ckpt_lstm/lstm-20_390.ckpt'
device_id: 0
file_name: "lstm"
file_format: "AIR"
# LSTM Postprocess
label_dir: ''
result_dir: "./result_Files"
# preprocess
result_path: './preprocess_Result/'
---
# Config description for each option
enable_modelarts: 'Whether training on modelarts, default: False'
data_url: 'Dataset url for obs'
train_url: 'Training output url for obs'
data_path: 'Dataset path for local'
output_path: 'Training output path for local'
preprocess: 'whether to preprocess data.'
aclimdb_path: 'path where the dataset is stored.'
glove_path: 'path where the GloVe is stored.'
preprocess_path: 'path where the pre-process data is stored.'
ckpt_path: 'the path to save the checkpoint file.'
pre_trained: 'the pretrained checkpoint file path.'
device_target: 'the target device to run, support "GPU", "CPU". Default: "Ascend".'
device_num: 'Use device nums, default is 1.'
distribute: 'Run distribute, default is false.'
enable_graph_kernel: 'Accelerate by graph kernel, default is true.'
---
device_target: ['Ascend', 'GPU', 'CPU']
distribute: ['true', 'false']
distribute: ['true', 'false']
enable_graph_kernel: ['true', 'false']
file_format: ['AIR', 'ONNX', 'MINDIR']

View File

@ -0,0 +1,84 @@
# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
enable_modelarts: False
data_url: ""
train_url: ""
checkpoint_url: ""
data_path: "/cache/data"
output_path: "/cache/train"
load_path: "/cache/checkpoint_path"
checkpoint_path: './checkpoint/'
checkpoint_file: './checkpoint/lstm-20_390.ckpt'
device_target: Ascend
enable_profiling: False
# ==============================================================================
# LSTM CONFIG IN ASCEND for 8p training
num_classes: 2
momentum: 0.9
num_epochs: 20
batch_size: 64
embed_size: 300
num_hiddens: 128
num_layers: 2
bidirectional: True
save_checkpoint_steps: 7800
keep_checkpoint_max: 10
dynamic_lr: True
lr_init: 0.05
lr_end: 0.01
lr_max: 0.3
lr_adjust_epoch: 20
warmup_epochs: 2
global_step: 0
# MindSpore LSTM Example - train.py
preprocess: 'false'
aclimdb_path: "/cache/data/aclImdb"
glove_path: "/cache/data"
preprocess_path: "./preprocess"
ckpt_path: './ckpt_lstm/'
pre_trained: '' # None
device_num: 8
distribute: "true"
enable_graph_kernel: "true"
# export.py
ckpt_file: './ckpt_lstm/lstm-20_390.ckpt'
device_id: 0
rank_id: 0
file_name: "lstm"
file_format: "AIR"
# LSTM Postprocess
label_dir: ''
result_dir: "./result_Files"
# preprocess
result_path: './preprocess_Result/'
---
# Config description for each option
enable_modelarts: 'Whether training on modelarts, default: False'
data_url: 'Dataset url for obs'
train_url: 'Training output url for obs'
data_path: 'Dataset path for local'
output_path: 'Training output path for local'
preprocess: 'whether to preprocess data.'
aclimdb_path: 'path where the dataset is stored.'
glove_path: 'path where the GloVe is stored.'
preprocess_path: 'path where the pre-process data is stored.'
ckpt_path: 'the path to save the checkpoint file.'
pre_trained: 'the pretrained checkpoint file path.'
device_target: 'the target device to run, support "GPU", "CPU". Default: "Ascend".'
device_num: 'Use device nums, default is 1.'
distribute: 'Run distribute, default is false.'
enable_graph_kernel: 'Accelerate by graph kernel, default is true.'
---
device_target: ['Ascend', 'GPU', 'CPU']
distribute: ['true', 'false']
distribute: ['true', 'false']
enable_graph_kernel: ['true', 'false']
file_format: ['AIR', 'ONNX', 'MINDIR']

View File

@ -0,0 +1,77 @@
# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing)
enable_modelarts: False
data_url: ""
train_url: ""
checkpoint_url: ""
data_path: "/cache/data"
output_path: "/cache/train"
load_path: "/cache/checkpoint_path"
checkpoint_path: './checkpoint/'
checkpoint_file: './checkpoint/lstm-20_390.ckpt'
device_target: CPU
enable_profiling: False
# ==============================================================================
# LSTM CONFIG
num_classes: 2
dynamic_lr: False
learning_rate: 0.1
momentum: 0.9
num_epochs: 20
batch_size: 64
embed_size: 300
num_hiddens: 100
num_layers: 2
bidirectional: True
save_checkpoint_steps: 390
keep_checkpoint_max: 10
# MindSpore LSTM Example - train.py
preprocess: 'false'
aclimdb_path: "/cache/data/aclImdb"
glove_path: "/cache/data"
preprocess_path: "./preprocess"
ckpt_path: './ckpt_lstm/'
pre_trained: '' # None
device_num: 1
distribute: "false"
enable_graph_kernel: "true"
# export.py
ckpt_file: './ckpt_lstm/lstm-20_390.ckpt'
device_id: 0
file_name: "lstm"
file_format: "AIR"
# LSTM Postprocess
label_dir: ''
result_dir: "./result_Files"
# preprocess
result_path: './preprocess_Result/'
---
# Config description for each option
enable_modelarts: 'Whether training on modelarts, default: False'
data_url: 'Dataset url for obs'
train_url: 'Training output url for obs'
data_path: 'Dataset path for local'
output_path: 'Training output path for local'
preprocess: 'whether to preprocess data.'
aclimdb_path: 'path where the dataset is stored.'
glove_path: 'path where the GloVe is stored.'
preprocess_path: 'path where the pre-process data is stored.'
ckpt_path: 'the path to save the checkpoint file.'
pre_trained: 'the pretrained checkpoint file path.'
device_target: 'the target device to run, support "GPU", "CPU". Default: "Ascend".'
device_num: 'Use device nums, default is 1.'
distribute: 'Run distribute, default is false.'
enable_graph_kernel: 'Accelerate by graph kernel, default is true.'
---
device_target: ['Ascend', 'GPU', 'CPU']
distribute: ['true', 'false']
distribute: ['true', 'false']
enable_graph_kernel: ['true', 'false']
file_format: ['AIR', 'MINDIR']

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -15,76 +15,66 @@
"""
#################train lstm example on aclImdb########################
"""
import argparse
import os
import numpy as np
from src.config import lstm_cfg, lstm_cfg_ascend
from src.model_utils.config import config
from src.model_utils.moxing_adapter import moxing_wrapper
from src.dataset import lstm_create_dataset, convert_to_mindrecord
from src.lstm import SentimentNet
from mindspore import Tensor, nn, Model, context
from mindspore.nn import Accuracy, Recall, F1
from mindspore.train.serialization import load_checkpoint, load_param_into_net
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='MindSpore LSTM Example')
parser.add_argument('--preprocess', type=str, default='false', choices=['true', 'false'],
help='whether to preprocess data.')
parser.add_argument('--aclimdb_path', type=str, default="./aclImdb",
help='path where the dataset is stored.')
parser.add_argument('--glove_path', type=str, default="./glove",
help='path where the GloVe is stored.')
parser.add_argument('--preprocess_path', type=str, default="./preprocess",
help='path where the pre-process data is stored.')
parser.add_argument('--ckpt_path', type=str, default=None,
help='the checkpoint file path used to evaluate model.')
parser.add_argument('--device_target', type=str, default="Ascend", choices=['GPU', 'CPU', 'Ascend'],
help='the target device to run, support "GPU", "CPU". Default: "Ascend".')
args = parser.parse_args()
def modelarts_process():
config.ckpt_file = os.path.join(config.output_path, config.ckpt_file)
@moxing_wrapper(pre_process=modelarts_process)
def eval_lstm():
""" eval lstm """
print('\neval.py config: \n', config)
config.preprocess_path = os.path.join(config.glove_path, config.preprocess_path)
context.set_context(
mode=context.GRAPH_MODE,
save_graphs=False,
device_target=args.device_target)
device_target=config.device_target)
if args.device_target == 'Ascend':
cfg = lstm_cfg_ascend
else:
cfg = lstm_cfg
if args.preprocess == "true":
if config.preprocess == "true":
print("============== Starting Data Pre-processing ==============")
convert_to_mindrecord(cfg.embed_size, args.aclimdb_path, args.preprocess_path, args.glove_path)
convert_to_mindrecord(config.embed_size, config.aclimdb_path, config.preprocess_path, config.glove_path)
embedding_table = np.loadtxt(os.path.join(args.preprocess_path, "weight.txt")).astype(np.float32)
embedding_table = np.loadtxt(os.path.join(config.preprocess_path, "weight.txt")).astype(np.float32)
# DynamicRNN in this network on Ascend platform only support the condition that the shape of input_size
# and hiddle_size is multiples of 16, this problem will be solved later.
if args.device_target == 'Ascend':
pad_num = int(np.ceil(cfg.embed_size / 16) * 16 - cfg.embed_size)
if config.device_target == 'Ascend':
pad_num = int(np.ceil(config.embed_size / 16) * 16 - config.embed_size)
if pad_num > 0:
embedding_table = np.pad(embedding_table, [(0, 0), (0, pad_num)], 'constant')
cfg.embed_size = int(np.ceil(cfg.embed_size / 16) * 16)
config.embed_size = int(np.ceil(config.embed_size / 16) * 16)
network = SentimentNet(vocab_size=embedding_table.shape[0],
embed_size=cfg.embed_size,
num_hiddens=cfg.num_hiddens,
num_layers=cfg.num_layers,
bidirectional=cfg.bidirectional,
num_classes=cfg.num_classes,
embed_size=config.embed_size,
num_hiddens=config.num_hiddens,
num_layers=config.num_layers,
bidirectional=config.bidirectional,
num_classes=config.num_classes,
weight=Tensor(embedding_table),
batch_size=cfg.batch_size)
batch_size=config.batch_size)
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
ds_eval = lstm_create_dataset(args.preprocess_path, cfg.batch_size, training=False)
ds_eval = lstm_create_dataset(config.preprocess_path, config.batch_size, training=False)
model = Model(network, loss, metrics={'acc': Accuracy(), 'recall': Recall(), 'f1': F1()})
print("============== Starting Testing ==============")
param_dict = load_checkpoint(args.ckpt_path)
param_dict = load_checkpoint(config.ckpt_file)
load_param_into_net(network, param_dict)
if args.device_target == "CPU":
if config.device_target == "CPU":
acc = model.eval(ds_eval, dataset_sink_mode=False)
else:
acc = model.eval(ds_eval)
print("============== {} ==============".format(acc))
if __name__ == '__main__':
eval_lstm()

View File

@ -16,58 +16,52 @@
##############export checkpoint file into mindir model#################
python export.py
"""
import argparse
import os
import numpy as np
from mindspore import Tensor, context
from mindspore import export, load_checkpoint, load_param_into_net
from src.config import lstm_cfg, lstm_cfg_ascend
from src.lstm import SentimentNet
from src.model_utils.config import config
from src.model_utils.moxing_adapter import moxing_wrapper
from src.model_utils.device_adapter import get_device_id
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='MindSpore LSTM Exporter')
parser.add_argument('--preprocess_path', type=str, default='./preprocess',
help='path where the pre-process data is stored.')
parser.add_argument('--ckpt_file', type=str, required=True, help='lstm ckpt file.')
parser.add_argument("--device_id", type=int, default=0, help="Device id")
parser.add_argument("--file_name", type=str, default="lstm", help="output file name.")
parser.add_argument('--file_format', type=str, choices=["AIR", "MINDIR"], default='AIR', help='file format')
parser.add_argument('--device_target', type=str, default="Ascend", choices=['GPU', 'CPU', 'Ascend'],
help='the target device to run, support "GPU", "CPU". Default: "Ascend".')
args = parser.parse_args()
def modelarts_process():
config.ckpt_file = os.path.join(config.output_path, config.ckpt_file)
@moxing_wrapper(pre_process=modelarts_process)
def export_lstm():
""" export lstm """
config.preprocess_path = os.path.join(config.glove_path, config.preprocess_path)
context.set_context(
mode=context.GRAPH_MODE,
save_graphs=False,
device_target=args.device_target,
device_id=args.device_id)
device_target=config.device_target,
device_id=get_device_id())
if args.device_target == 'Ascend':
cfg = lstm_cfg_ascend
else:
cfg = lstm_cfg
embedding_table = np.loadtxt(os.path.join(config.preprocess_path, "weight.txt")).astype(np.float32)
embedding_table = np.loadtxt(os.path.join(args.preprocess_path, "weight.txt")).astype(np.float32)
if args.device_target == 'Ascend':
pad_num = int(np.ceil(cfg.embed_size / 16) * 16 - cfg.embed_size)
if config.device_target == 'Ascend':
pad_num = int(np.ceil(config.embed_size / 16) * 16 - config.embed_size)
if pad_num > 0:
embedding_table = np.pad(embedding_table, [(0, 0), (0, pad_num)], 'constant')
cfg.embed_size = int(np.ceil(cfg.embed_size / 16) * 16)
config.embed_size = int(np.ceil(config.embed_size / 16) * 16)
network = SentimentNet(vocab_size=embedding_table.shape[0],
embed_size=cfg.embed_size,
num_hiddens=cfg.num_hiddens,
num_layers=cfg.num_layers,
bidirectional=cfg.bidirectional,
num_classes=cfg.num_classes,
embed_size=config.embed_size,
num_hiddens=config.num_hiddens,
num_layers=config.num_layers,
bidirectional=config.bidirectional,
num_classes=config.num_classes,
weight=Tensor(embedding_table),
batch_size=cfg.batch_size)
batch_size=config.batch_size)
param_dict = load_checkpoint(args.ckpt_file)
param_dict = load_checkpoint(config.ckpt_file)
load_param_into_net(network, param_dict)
input_arr = Tensor(np.random.uniform(0.0, 1e5, size=[cfg.batch_size, 500]).astype(np.int32))
export(network, input_arr, file_name=args.file_name, file_format=args.file_format)
input_arr = Tensor(np.random.uniform(0.0, 1e5, size=[config.batch_size, 500]).astype(np.int32))
export(network, input_arr, file_name=config.file_name, file_format=config.file_format)
if __name__ == '__main__':
export_lstm()

View File

@ -15,33 +15,20 @@
"""
#################lstm postprocess########################
"""
import argparse
import os
import numpy as np
from mindspore.nn import Accuracy
from src.config import lstm_cfg, lstm_cfg_ascend
from src.model_utils.config import config
parser = argparse.ArgumentParser(description='LSTM Postprocess')
parser.add_argument('--label_dir', type=str, default='', help='label data directory.')
parser.add_argument('--result_dir', type=str, default="./result_Files",
help='infer result dir.')
parser.add_argument('--device_target', type=str, default="Ascend", choices=['GPU', 'CPU', 'Ascend'],
help='the target device to run, support "GPU", "CPU". Default: "Ascend".')
args, _ = parser.parse_known_args()
if __name__ == '__main__':
metrics = Accuracy()
rst_path = args.result_dir
labels = np.load(args.label_dir)
if args.device_target == 'Ascend':
cfg = lstm_cfg_ascend
else:
cfg = lstm_cfg
rst_path = config.result_dir
labels = np.load(config.label_dir)
for i in range(len(os.listdir(rst_path))):
file_name = os.path.join(rst_path, "LSTM_data_bs" + str(cfg.batch_size) + '_' + str(i) + '_0.bin')
output = np.fromfile(file_name, np.float32).reshape(cfg.batch_size, cfg.num_classes)
file_name = os.path.join(rst_path, "LSTM_data_bs" + str(config.batch_size) + '_' + str(i) + '_0.bin')
output = np.fromfile(file_name, np.float32).reshape(config.batch_size, config.num_classes)
metrics.update(output, labels[i])
print("result of Accuracy is: ", metrics.eval())

View File

@ -15,35 +15,24 @@
"""
##############preprocess#################
"""
import argparse
import os
import numpy as np
from src.config import lstm_cfg, lstm_cfg_ascend
from src.dataset import lstm_create_dataset
parser = argparse.ArgumentParser(description='preprocess')
parser.add_argument('--preprocess_path', type=str, default="./preprocess",
help='path where the pre-process data is stored.')
parser.add_argument('--result_path', type=str, default='./preprocess_Result/', help='result path')
parser.add_argument('--device_target', type=str, default="Ascend", choices=['GPU', 'CPU', 'Ascend'],
help='the target device to run, support "GPU", "CPU". Default: "Ascend".')
args = parser.parse_args()
from src.dataset import lstm_create_dataset
from src.model_utils.config import config
if __name__ == '__main__':
if args.device_target == 'Ascend':
cfg = lstm_cfg_ascend
else:
cfg = lstm_cfg
dataset = lstm_create_dataset(args.preprocess_path, cfg.batch_size, training=False)
img_path = os.path.join(args.result_path, "00_data")
config.preprocess_path = os.path.join(config.glove_path, config.preprocess_path)
dataset = lstm_create_dataset(config.preprocess_path, config.batch_size, training=False)
img_path = os.path.join(config.result_path, "00_data")
os.makedirs(img_path)
label_list = []
for i, data in enumerate(dataset.create_dict_iterator(output_numpy=True)):
file_name = "LSTM_data_bs" + str(cfg.batch_size) + "_" + str(i) + ".bin"
file_name = "LSTM_data_bs" + str(config.batch_size) + "_" + str(i) + ".bin"
file_path = img_path + "/" + file_name
data['feature'].tofile(file_path)
label_list.append(data['label'])
np.save(args.result_path + "label_ids.npy", label_list)
np.save(config.result_path + "label_ids.npy", label_list)
print("="*20, "export bin files finished", "="*20)

View File

@ -27,6 +27,8 @@ RANK_SIZE=$2
ACLIMDB_DIR=$3
GLOVE_DIR=$4
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
CONFIG_FILE="${BASE_PATH}/../config_ascend_8p.yaml"
for((i=0;i<${RANK_SIZE};i++));
do
@ -34,15 +36,18 @@ do
mkdir ${ROOT_PATH}/device$i
cd ${ROOT_PATH}/device$i || exit
cp ../../*.py ./
cp ../../*.yaml ./
cp -r ../../src ./
export RANK_ID=$i
export DEVICE_ID=$i
python train.py \
--config_path=$CONFIG_FILE \
--device_target="Ascend" \
--aclimdb_path=$ACLIMDB_DIR \
--glove_path=$GLOVE_DIR \
--distribute=true \
--device_num=$RANK_SIZE \
--preprocess=true \
--device_id=$i --rank_id=$i \
--preprocess=false \
--preprocess_path=./preprocess > log.txt 2>&1 &
done

View File

@ -1,5 +1,5 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -32,8 +32,13 @@ CUR_DIR=`pwd`
export GLOG_log_dir=${CUR_DIR}/ms_log
export GLOG_logtostderr=0
export DEVICE_ID=$DEVICE_ID
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
CONFIG_FILE="${BASE_PATH}/../../config_ascend.yaml"
python ../../eval.py \
--config_path=$CONFIG_FILE \
--device_target="Ascend" \
--preprocess=false \
--preprocess_path=$PREPROCESS_DIR \
--ckpt_path=$CKPT_FILE > log.txt 2>&1 &
--glove_path=$PREPROCESS_DIR \
--ckpt_file=$CKPT_FILE > log.txt 2>&1 &

View File

@ -1,5 +1,5 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -28,10 +28,15 @@ mkdir -p ms_log
CUR_DIR=`pwd`
export GLOG_log_dir=${CUR_DIR}/ms_log
export GLOG_logtostderr=0
python eval.py \
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
python ../eval.py \
--config_path=$CONFIG_FILE \
--device_target="CPU" \
--aclimdb_path=$ACLIMDB_DIR \
--glove_path=$GLOVE_DIR \
--preprocess=false \
--preprocess_path=./preprocess \
--ckpt_path=$CKPT_FILE > log.txt 2>&1 &
--ckpt_file=$CKPT_FILE > log.txt 2>&1 &

View File

@ -1,5 +1,5 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -31,10 +31,15 @@ mkdir -p ms_log
CUR_DIR=`pwd`
export GLOG_log_dir=${CUR_DIR}/ms_log
export GLOG_logtostderr=0
python eval.py \
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
python ../eval.py \
--config_path=$CONFIG_FILE \
--device_target="GPU" \
--aclimdb_path=$ACLIMDB_DIR \
--glove_path=$GLOVE_DIR \
--preprocess=false \
--preprocess_path=./preprocess \
--ckpt_path=$CKPT_FILE > log.txt 2>&1 &
--ckpt_file=$CKPT_FILE > log.txt 2>&1 &

View File

@ -56,6 +56,9 @@ echo "need preprocess: "$need_preprocess
echo "device_target: "$device_target
echo "device id: "$device_id
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
export ASCEND_HOME=/usr/local/Ascend/
if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then
export PATH=$ASCEND_HOME/fwkacllib/bin:$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/atc/bin:$PATH
@ -76,7 +79,7 @@ function preprocess_data()
rm -rf ./preprocess_Result
fi
mkdir preprocess_Result
python3.7 ../preprocess.py --preprocess_path=$dataset_path --result_path=./preprocess_Result/ --device_target=$device_target
python3.7 ../preprocess.py --config_path=$CONFIG_FILE --preprocess_path=$dataset_path --result_path=./preprocess_Result/ --device_target=$device_target
}
function compile_app()
@ -103,7 +106,7 @@ function infer()
function cal_acc()
{
python3.7 ../postprocess.py --result_dir=./result_Files --label_dir=./preprocess_Result/label_ids.npy --device_target=$device_target &> acc.log
python3.7 ../postprocess.py --config_path=$CONFIG_FILE --result_dir=./result_Files --label_dir=./preprocess_Result/label_ids.npy --device_target=$device_target &> acc.log
}
if [ $need_preprocess == "y" ]; then

View File

@ -1,5 +1,5 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -31,9 +31,14 @@ CUR_DIR=`pwd`
export GLOG_log_dir=${CUR_DIR}/ms_log
export GLOG_logtostderr=0
export DEVICE_ID=$DEVICE_ID
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
CONFIG_FILE="${BASE_PATH}/../../config_ascend.yaml"
python ../../train.py \
--config_path=$CONFIG_FILE \
--device_target="Ascend" \
--aclimdb_path=$ACLIMDB_DIR \
--glove_path=$GLOVE_DIR \
--preprocess=true \
--preprocess=false \
--preprocess_path=./preprocess > log.txt 2>&1 &

View File

@ -1,5 +1,5 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -27,9 +27,14 @@ mkdir -p ms_log
CUR_DIR=`pwd`
export GLOG_log_dir=${CUR_DIR}/ms_log
export GLOG_logtostderr=0
python train.py \
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
python ../train.py \
--config_path=$CONFIG_FILE \
--device_target="CPU" \
--aclimdb_path=$ACLIMDB_DIR \
--glove_path=$GLOVE_DIR \
--preprocess=true \
--preprocess=false \
--preprocess_path=./preprocess > log.txt 2>&1 &

View File

@ -1,5 +1,5 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -30,9 +30,14 @@ mkdir -p ms_log
CUR_DIR=`pwd`
export GLOG_log_dir=${CUR_DIR}/ms_log
export GLOG_logtostderr=0
python train.py \
BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd)
CONFIG_FILE="${BASE_PATH}/../default_config.yaml"
python ../train.py \
--config_path=$CONFIG_FILE \
--device_target="GPU" \
--aclimdb_path=$ACLIMDB_DIR \
--glove_path=$GLOVE_DIR \
--preprocess=true \
--preprocess=false \
--preprocess_path=./preprocess > log.txt 2>&1 &

View File

@ -12,3 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
__init__.py
"""
from . import imdb
from . import dataset
from . import lr_schedule
from . import lstm

View File

@ -1,76 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
network config setting
"""
from easydict import EasyDict as edict
# LSTM CONFIG
lstm_cfg = edict({
'num_classes': 2,
'dynamic_lr': False,
'learning_rate': 0.1,
'momentum': 0.9,
'num_epochs': 20,
'batch_size': 64,
'embed_size': 300,
'num_hiddens': 100,
'num_layers': 2,
'bidirectional': True,
'save_checkpoint_steps': 390,
'keep_checkpoint_max': 10
})
# LSTM CONFIG IN ASCEND for 1p training
lstm_cfg_ascend = edict({
'num_classes': 2,
'momentum': 0.9,
'num_epochs': 20,
'batch_size': 64,
'embed_size': 300,
'num_hiddens': 128,
'num_layers': 2,
'bidirectional': True,
'save_checkpoint_steps': 7800,
'keep_checkpoint_max': 10,
'dynamic_lr': True,
'lr_init': 0.05,
'lr_end': 0.01,
'lr_max': 0.1,
'lr_adjust_epoch': 6,
'warmup_epochs': 1,
'global_step': 0
})
# LSTM CONFIG IN ASCEND for 8p training
lstm_cfg_ascend_8p = edict({
'num_classes': 2,
'momentum': 0.9,
'num_epochs': 20,
'batch_size': 64,
'embed_size': 300,
'num_hiddens': 128,
'num_layers': 2,
'bidirectional': True,
'save_checkpoint_steps': 7800,
'keep_checkpoint_max': 10,
'dynamic_lr': True,
'lr_init': 0.05,
'lr_end': 0.01,
'lr_max': 0.3,
'lr_adjust_epoch': 20,
'warmup_epochs': 2,
'global_step': 0
})

View File

@ -16,7 +16,6 @@
Data operations, will be used in train.py and eval.py
"""
import os
import numpy as np
import mindspore.dataset as ds

View File

@ -17,7 +17,6 @@ imdb dataset parser.
"""
import os
from itertools import chain
import numpy as np
import gensim

View File

@ -0,0 +1,127 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Parse arguments"""
import os
import ast
import argparse
from pprint import pprint, pformat
import yaml
class Config:
"""
Configuration namespace. Convert dictionary to members.
"""
def __init__(self, cfg_dict):
for k, v in cfg_dict.items():
if isinstance(v, (list, tuple)):
setattr(self, k, [Config(x) if isinstance(x, dict) else x for x in v])
else:
setattr(self, k, Config(v) if isinstance(v, dict) else v)
def __str__(self):
return pformat(self.__dict__)
def __repr__(self):
return self.__str__()
def parse_cli_to_yaml(parser, cfg, helper=None, choices=None, cfg_path="default_config.yaml"):
"""
Parse command line arguments to the configuration according to the default yaml.
Args:
parser: Parent parser.
cfg: Base configuration.
helper: Helper description.
cfg_path: Path to the default yaml config.
"""
parser = argparse.ArgumentParser(description="[REPLACE THIS at config.py]",
parents=[parser])
helper = {} if helper is None else helper
choices = {} if choices is None else choices
for item in cfg:
if not isinstance(cfg[item], list) and not isinstance(cfg[item], dict):
help_description = helper[item] if item in helper else "Please reference to {}".format(cfg_path)
choice = choices[item] if item in choices else None
if isinstance(cfg[item], bool):
parser.add_argument("--" + item, type=ast.literal_eval, default=cfg[item], choices=choice,
help=help_description)
else:
parser.add_argument("--" + item, type=type(cfg[item]), default=cfg[item], choices=choice,
help=help_description)
args = parser.parse_args()
return args
def parse_yaml(yaml_path):
"""
Parse the yaml config file.
Args:
yaml_path: Path to the yaml config.
"""
with open(yaml_path, 'r') as fin:
try:
cfgs = yaml.load_all(fin.read(), Loader=yaml.FullLoader)
cfgs = [x for x in cfgs]
if len(cfgs) == 1:
cfg_helper = {}
cfg = cfgs[0]
cfg_choices = {}
elif len(cfgs) == 2:
cfg, cfg_helper = cfgs
cfg_choices = {}
elif len(cfgs) == 3:
cfg, cfg_helper, cfg_choices = cfgs
else:
raise ValueError("At most 3 docs (config, description for help, choices) are supported in config yaml")
print(cfg_helper)
except:
raise ValueError("Failed to parse yaml")
return cfg, cfg_helper, cfg_choices
def merge(args, cfg):
"""
Merge the base config from yaml file and command line arguments.
Args:
args: Command line arguments.
cfg: Base configuration.
"""
args_var = vars(args)
for item in args_var:
cfg[item] = args_var[item]
return cfg
def get_config():
"""
Get Config according to the yaml file and cli arguments.
"""
parser = argparse.ArgumentParser(description="default name", add_help=False)
current_dir = os.path.dirname(os.path.abspath(__file__))
parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, "../../default_config.yaml"),
help="Config file path")
path_args, _ = parser.parse_known_args()
default, helper, choices = parse_yaml(path_args.config_path)
pprint(default)
args = parse_cli_to_yaml(parser=parser, cfg=default, helper=helper, choices=choices, cfg_path=path_args.config_path)
final_config = merge(args, default)
return Config(final_config)
config = get_config()

View File

@ -0,0 +1,27 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Device adapter for ModelArts"""
from .config import config
if config.enable_modelarts:
from .moxing_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
else:
from .local_adapter import get_device_id, get_device_num, get_rank_id, get_job_id
__all__ = [
"get_device_id", "get_device_num", "get_rank_id", "get_job_id"
]

View File

@ -0,0 +1,36 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Local adapter"""
import os
def get_device_id():
device_id = os.getenv('DEVICE_ID', '0')
return int(device_id)
def get_device_num():
device_num = os.getenv('RANK_SIZE', '1')
return int(device_num)
def get_rank_id():
global_rank_id = os.getenv('RANK_ID', '0')
return int(global_rank_id)
def get_job_id():
return "Local Job"

View File

@ -0,0 +1,122 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Moxing adapter for ModelArts"""
import os
import functools
from mindspore import context
from mindspore.profiler import Profiler
from .config import config
_global_sync_count = 0
def get_device_id():
device_id = os.getenv('DEVICE_ID', '0')
return int(device_id)
def get_device_num():
device_num = os.getenv('RANK_SIZE', '1')
return int(device_num)
def get_rank_id():
global_rank_id = os.getenv('RANK_ID', '0')
return int(global_rank_id)
def get_job_id():
job_id = os.getenv('JOB_ID')
job_id = job_id if job_id != "" else "default"
return job_id
def sync_data(from_path, to_path):
"""
Download data from remote obs to local directory if the first url is remote url and the second one is local path
Upload data from local directory to remote obs in contrast.
"""
import moxing as mox
import time
global _global_sync_count
sync_lock = "/tmp/copy_sync.lock" + str(_global_sync_count)
_global_sync_count += 1
# Each server contains 8 devices as most.
if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
print("from path: ", from_path)
print("to path: ", to_path)
mox.file.copy_parallel(from_path, to_path)
print("===finish data synchronization===")
try:
os.mknod(sync_lock)
except IOError:
pass
print("===save flag===")
while True:
if os.path.exists(sync_lock):
break
time.sleep(1)
print("Finish sync data from {} to {}.".format(from_path, to_path))
def moxing_wrapper(pre_process=None, post_process=None):
"""
Moxing wrapper to download dataset and upload outputs.
"""
def wrapper(run_func):
@functools.wraps(run_func)
def wrapped_func(*args, **kwargs):
# Download data from data_url
if config.enable_modelarts:
if config.data_url:
sync_data(config.data_url, config.data_path)
print("Dataset downloaded: ", os.listdir(config.data_path))
if config.checkpoint_url:
sync_data(config.checkpoint_url, config.load_path)
print("Preload downloaded: ", os.listdir(config.load_path))
if config.train_url:
sync_data(config.train_url, config.output_path)
print("Workspace downloaded: ", os.listdir(config.output_path))
context.set_context(save_graphs_path=os.path.join(config.output_path, str(get_rank_id())))
config.device_num = get_device_num()
config.device_id = get_device_id()
if not os.path.exists(config.output_path):
os.makedirs(config.output_path)
if pre_process:
pre_process()
if config.enable_profiling:
profiler = Profiler()
run_func(*args, **kwargs)
if config.enable_profiling:
profiler.analyse()
# Upload data to train_url
if config.enable_modelarts:
if post_process:
post_process()
if config.train_url:
print("Start to copy output directory")
sync_data(config.output_path, config.train_url)
return wrapped_func
return wrapper

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -15,16 +15,16 @@
"""
#################train lstm example on aclImdb########################
"""
import argparse
import os
import numpy as np
from src.config import lstm_cfg, lstm_cfg_ascend, lstm_cfg_ascend_8p
from src.model_utils.config import config
from src.model_utils.moxing_adapter import moxing_wrapper
from src.dataset import convert_to_mindrecord
from src.dataset import lstm_create_dataset
from src.lr_schedule import get_lr
from src.lstm import SentimentNet
from mindspore import Tensor, nn, Model, context
from mindspore.nn import Accuracy
from mindspore.train.callback import LossMonitor, CheckpointConfig, ModelCheckpoint, TimeMonitor
@ -32,103 +32,85 @@ from mindspore.train.serialization import load_param_into_net, load_checkpoint
from mindspore.communication.management import init, get_rank
from mindspore.context import ParallelMode
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='MindSpore LSTM Example')
parser.add_argument('--preprocess', type=str, default='false', choices=['true', 'false'],
help='whether to preprocess data.')
parser.add_argument('--aclimdb_path', type=str, default="./aclImdb",
help='path where the dataset is stored.')
parser.add_argument('--glove_path', type=str, default="./glove",
help='path where the GloVe is stored.')
parser.add_argument('--preprocess_path', type=str, default="./preprocess",
help='path where the pre-process data is stored.')
parser.add_argument('--ckpt_path', type=str, default="./",
help='the path to save the checkpoint file.')
parser.add_argument('--pre_trained', type=str, default=None,
help='the pretrained checkpoint file path.')
parser.add_argument('--device_target', type=str, default="Ascend", choices=['GPU', 'CPU', 'Ascend'],
help='the target device to run, support "GPU", "CPU". Default: "Ascend".')
parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.")
parser.add_argument("--distribute", type=str, default="false", choices=["true", "false"],
help="Run distribute, default is false.")
parser.add_argument("--enable_graph_kernel", type=str, default="true", choices=["true", "false"],
help="Accelerate by graph kernel, default is true.")
def modelarts_pre_process():
config.ckpt_path = os.path.join(config.output_path, config.ckpt_path)
args = parser.parse_args()
@moxing_wrapper(pre_process=modelarts_pre_process)
def train_lstm():
""" train lstm """
print('\ntrain.py config: \n', config)
config.preprocess_path = os.path.join(config.glove_path, config.preprocess_path)
_enable_graph_kernel = args.enable_graph_kernel == "true" and args.device_target == "GPU"
_enable_graph_kernel = config.enable_graph_kernel == "true" and config.device_target == "GPU"
context.set_context(
mode=context.GRAPH_MODE,
save_graphs=False,
enable_graph_kernel=_enable_graph_kernel,
device_target=args.device_target)
device_target=config.device_target)
rank = 0
device_num = 1
if args.device_target == 'Ascend':
cfg = lstm_cfg_ascend
if args.distribute == "true":
cfg = lstm_cfg_ascend_8p
init()
device_num = args.device_num
rank = get_rank()
if config.device_target == 'Ascend' and config.distribute == "true":
init()
device_num = config.device_num # get_device_num()
rank = get_rank()
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, \
device_num=device_num)
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
device_num=device_num)
else:
cfg = lstm_cfg
if args.preprocess == "true":
if config.preprocess == "true":
print("============== Starting Data Pre-processing ==============")
convert_to_mindrecord(cfg.embed_size, args.aclimdb_path, args.preprocess_path, args.glove_path)
convert_to_mindrecord(config.embed_size, config.aclimdb_path, config.preprocess_path, config.glove_path)
embedding_table = np.loadtxt(os.path.join(args.preprocess_path, "weight.txt")).astype(np.float32)
embedding_table = np.loadtxt(os.path.join(config.preprocess_path, "weight.txt")).astype(np.float32)
# DynamicRNN in this network on Ascend platform only support the condition that the shape of input_size
# and hiddle_size is multiples of 16, this problem will be solved later.
if args.device_target == 'Ascend':
pad_num = int(np.ceil(cfg.embed_size / 16) * 16 - cfg.embed_size)
if config.device_target == 'Ascend':
pad_num = int(np.ceil(config.embed_size / 16) * 16 - config.embed_size)
if pad_num > 0:
embedding_table = np.pad(embedding_table, [(0, 0), (0, pad_num)], 'constant')
cfg.embed_size = int(np.ceil(cfg.embed_size / 16) * 16)
config.embed_size = int(np.ceil(config.embed_size / 16) * 16)
network = SentimentNet(vocab_size=embedding_table.shape[0],
embed_size=cfg.embed_size,
num_hiddens=cfg.num_hiddens,
num_layers=cfg.num_layers,
bidirectional=cfg.bidirectional,
num_classes=cfg.num_classes,
embed_size=config.embed_size,
num_hiddens=config.num_hiddens,
num_layers=config.num_layers,
bidirectional=config.bidirectional,
num_classes=config.num_classes,
weight=Tensor(embedding_table),
batch_size=cfg.batch_size)
batch_size=config.batch_size)
# pre_trained
if args.pre_trained:
load_param_into_net(network, load_checkpoint(args.pre_trained))
if config.pre_trained:
load_param_into_net(network, load_checkpoint(config.pre_trained))
ds_train = lstm_create_dataset(args.preprocess_path, cfg.batch_size, 1, device_num=device_num, rank=rank)
ds_train = lstm_create_dataset(config.preprocess_path, config.batch_size, 1, device_num=device_num, rank=rank)
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
if cfg.dynamic_lr:
lr = Tensor(get_lr(global_step=cfg.global_step,
lr_init=cfg.lr_init, lr_end=cfg.lr_end, lr_max=cfg.lr_max,
warmup_epochs=cfg.warmup_epochs,
total_epochs=cfg.num_epochs,
if config.dynamic_lr:
lr = Tensor(get_lr(global_step=config.global_step,
lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max,
warmup_epochs=config.warmup_epochs,
total_epochs=config.num_epochs,
steps_per_epoch=ds_train.get_dataset_size(),
lr_adjust_epoch=cfg.lr_adjust_epoch))
lr_adjust_epoch=config.lr_adjust_epoch))
else:
lr = cfg.learning_rate
lr = config.learning_rate
opt = nn.Momentum(network.trainable_params(), lr, cfg.momentum)
opt = nn.Momentum(network.trainable_params(), lr, config.momentum)
loss_cb = LossMonitor()
model = Model(network, loss, opt, {'acc': Accuracy()})
print("============== Starting Training ==============")
config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
keep_checkpoint_max=cfg.keep_checkpoint_max)
ckpoint_cb = ModelCheckpoint(prefix="lstm", directory=args.ckpt_path, config=config_ck)
config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps,
keep_checkpoint_max=config.keep_checkpoint_max)
ckpoint_cb = ModelCheckpoint(prefix="lstm", directory=config.ckpt_path, config=config_ck)
time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
if args.device_target == "CPU":
model.train(cfg.num_epochs, ds_train, callbacks=[time_cb, ckpoint_cb, loss_cb], dataset_sink_mode=False)
if config.device_target == "CPU":
model.train(config.num_epochs, ds_train, callbacks=[time_cb, ckpoint_cb, loss_cb], dataset_sink_mode=False)
else:
model.train(cfg.num_epochs, ds_train, callbacks=[time_cb, ckpoint_cb, loss_cb])
model.train(config.num_epochs, ds_train, callbacks=[time_cb, ckpoint_cb, loss_cb])
print("============== Training Success ==============")
if __name__ == '__main__':
train_lstm()