enable export textrcnn ,remove useless code in eval.py

This commit is contained in:
陈劢 2020-12-25 16:15:49 +08:00
parent be56c201ef
commit 786e4c4cbe
8 changed files with 77 additions and 22 deletions

View File

@ -23,7 +23,6 @@ parser.add_argument('--data_dir', type=str, help='the source dataset directory.'
parser.add_argument('--out_dir', type=str, help='the target dataset directory.', default='./data') parser.add_argument('--out_dir', type=str, help='the target dataset directory.', default='./data')
args = parser.parse_args() args = parser.parse_args()
np.random.seed(2)
def dataset_split(label): def dataset_split(label):
@ -34,6 +33,7 @@ def dataset_split(label):
pfhand = open(pos_file, encoding='utf-8') pfhand = open(pos_file, encoding='utf-8')
pos_samples += pfhand.readlines() pos_samples += pfhand.readlines()
pfhand.close() pfhand.close()
np.random.seed(0)
perm = np.random.permutation(len(pos_samples)) perm = np.random.permutation(len(pos_samples))
perm_train = perm[0:int(len(pos_samples) * 0.9)] perm_train = perm[0:int(len(pos_samples) * 0.9)]
perm_test = perm[int(len(pos_samples) * 0.9):] perm_test = perm[int(len(pos_samples) * 0.9):]

View File

@ -48,13 +48,12 @@ if __name__ == '__main__':
network = textrcnn(weight=Tensor(embedding_table), vocab_size=embedding_table.shape[0], network = textrcnn(weight=Tensor(embedding_table), vocab_size=embedding_table.shape[0],
cell=cfg.cell, batch_size=cfg.batch_size) cell=cfg.cell, batch_size=cfg.batch_size)
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
loss_cb = LossMonitor() loss_cb = LossMonitor()
print("============== Starting Testing ==============") print("============== Starting Testing ==============")
ds_eval = create_dataset(cfg.preprocess_path, cfg.batch_size, 1, False) ds_eval = create_dataset(cfg.preprocess_path, cfg.batch_size, False)
param_dict = load_checkpoint(args.ckpt_path) param_dict = load_checkpoint(args.ckpt_path)
load_param_into_net(network, param_dict) load_param_into_net(network, param_dict)
network.set_train(False) network.set_train(False)
model = Model(network, loss, opt, metrics={'acc': Accuracy()}, amp_level='O3') model = Model(network, loss, metrics={'acc': Accuracy()}, amp_level='O3')
acc = model.eval(ds_eval, dataset_sink_mode=False) acc = model.eval(ds_eval, dataset_sink_mode=False)
print("============== Accuracy:{} ==============".format(acc)) print("============== Accuracy:{} ==============".format(acc))

View File

@ -0,0 +1,49 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""textrcnn export ckpt file to mindir/air"""
import os
import argparse
import numpy as np
from mindspore import Tensor, context, load_checkpoint, load_param_into_net, export
from src.textrcnn import textrcnn
from src.config import textrcnn_cfg as config
parser = argparse.ArgumentParser(description="textrcnn")
parser.add_argument("--device_id", type=int, default=0, help="Device id")
parser.add_argument("--ckpt_file", type=str, required=True, help="textrcnn ckpt file.")
parser.add_argument("--file_name", type=str, default="textrcnn", help="textrcnn output file name.")
parser.add_argument("--file_format", type=str, choices=["AIR", "MINDIR"],
default="MINDIR", help="file format")
parser.add_argument("--device_target", type=str, choices=["Ascend"], default="Ascend",
help="device target")
args = parser.parse_args()
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, device_id=args.device_id)
if __name__ == "__main__":
# define net
embedding_table = np.loadtxt(os.path.join(config.preprocess_path, "weight.txt")).astype(np.float32)
net = textrcnn(weight=Tensor(embedding_table), vocab_size=embedding_table.shape[0],
cell=config.cell, batch_size=config.batch_size)
# load checkpoint
param_dict = load_checkpoint(args.ckpt_file)
load_param_into_net(net, param_dict)
net.set_train(False)
image = Tensor(np.ones([config.batch_size, 50], np.int32))
export(net, image, file_name=args.file_name, file_format=args.file_format)

View File

@ -100,6 +100,7 @@ bash scripts/run_eval.sh
│ ├──textrcnn.py // textrcnn architecture │ ├──textrcnn.py // textrcnn architecture
│ ├──config.py // parameter configuration │ ├──config.py // parameter configuration
├── train.py // training script ├── train.py // training script
├── export.py // export script
├── eval.py // evaluation script ├── eval.py // evaluation script
├── data_helpers.py // dataset split script ├── data_helpers.py // dataset split script
├── sample.txt // the shell to train and eval the model without scripts ├── sample.txt // the shell to train and eval the model without scripts
@ -129,8 +130,7 @@ Parameters for both training and evaluation can be set in config.py
'emb_path': './word2vec', # the directory to save the embedding file 'emb_path': './word2vec', # the directory to save the embedding file
'embed_size': 300, # the dimension of the word embedding 'embed_size': 300, # the dimension of the word embedding
'save_checkpoint_steps': 149, # per step to save the checkpoint 'save_checkpoint_steps': 149, # per step to save the checkpoint
'keep_checkpoint_max': 10, # max checkpoints to save 'keep_checkpoint_max': 10 # max checkpoints to save
'momentum': 0.9 # the momentum rate
``` ```
### Performance ### Performance

View File

@ -39,5 +39,4 @@ textrcnn_cfg = edict({
'embed_size': 300, 'embed_size': 300,
'save_checkpoint_steps': 149, 'save_checkpoint_steps': 149,
'keep_checkpoint_max': 10, 'keep_checkpoint_max': 10,
'momentum': 0.9
}) })

View File

@ -76,9 +76,7 @@ def tokenizer(text):
def collect_weight(glove_path, vocab, word_to_idx, embed_size): def collect_weight(glove_path, vocab, word_to_idx, embed_size):
""" collect weight """ """ collect weight """
vocab_size = len(vocab) vocab_size = len(vocab)
# wvmodel = gensim.models.KeyedVectors.load_word2vec_format(os.path.join(glove_path, 'glove.6B.300d.txt'), wvmodel = gensim.models.KeyedVectors.load_word2vec_format(os.path.join(glove_path,
# binary=False, encoding='utf-8')
wvmodel = gensim.models.KeyedVectors.load_word2vec_format(os.path.join(glove_path, \
'GoogleNews-vectors-negative300.bin'), 'GoogleNews-vectors-negative300.bin'),
binary=True) binary=True)
weight_np = np.zeros((vocab_size + 1, embed_size)).astype(np.float32) weight_np = np.zeros((vocab_size + 1, embed_size)).astype(np.float32)
@ -164,7 +162,7 @@ def convert_to_mindrecord(embed_size, data_path, proprocess_path, glove_path):
writer.commit() writer.commit()
def create_dataset(base_path, batch_size, num_epochs, is_train): def create_dataset(base_path, batch_size, is_train):
"""Create dataset for training.""" """Create dataset for training."""
columns_list = ["feature", "label"] columns_list = ["feature", "label"]
num_consumer = 4 num_consumer = 4
@ -175,7 +173,7 @@ def create_dataset(base_path, batch_size, num_epochs, is_train):
path = os.path.join(base_path, 'aclImdb_test.mindrecord0') path = os.path.join(base_path, 'aclImdb_test.mindrecord0')
data_set = ds.MindDataset(path, columns_list, num_consumer) data_set = ds.MindDataset(path, columns_list, num_consumer)
ds.config.set_seed(1) ds.config.set_seed(0)
data_set = data_set.shuffle(buffer_size=data_set.get_dataset_size()) data_set = data_set.shuffle(buffer_size=data_set.get_dataset_size())
data_set = data_set.batch(batch_size, drop_remainder=True) data_set = data_set.batch(batch_size, drop_remainder=True)
return data_set return data_set

View File

@ -47,16 +47,16 @@ class textrcnn(nn.Cell):
self.lstm = P.DynamicRNN(forget_bias=0.0) self.lstm = P.DynamicRNN(forget_bias=0.0)
self.w1_fw = Parameter( self.w1_fw = Parameter(
np.random.uniform(-k, k, (self.embed_size + self.num_hiddens, 4 * self.num_hiddens)).astype( np.random.uniform(-k, k, (self.embed_size + self.num_hiddens, 4 * self.num_hiddens)).astype(
np.float32), name="w1_fw") np.float16), name="w1_fw")
self.b1_fw = Parameter(np.random.uniform(-k, k, (4 * self.num_hiddens)).astype(np.float32), self.b1_fw = Parameter(np.random.uniform(-k, k, (4 * self.num_hiddens)).astype(np.float16),
name="b1_fw") name="b1_fw")
self.w1_bw = Parameter( self.w1_bw = Parameter(
np.random.uniform(-k, k, (self.embed_size + self.num_hiddens, 4 * self.num_hiddens)).astype( np.random.uniform(-k, k, (self.embed_size + self.num_hiddens, 4 * self.num_hiddens)).astype(
np.float32), name="w1_bw") np.float16), name="w1_bw")
self.b1_bw = Parameter(np.random.uniform(-k, k, (4 * self.num_hiddens)).astype(np.float32), self.b1_bw = Parameter(np.random.uniform(-k, k, (4 * self.num_hiddens)).astype(np.float16),
name="b1_bw") name="b1_bw")
self.h1 = Tensor(np.zeros(shape=(1, self.batch_size, self.num_hiddens)).astype(np.float32)) self.h1 = Tensor(np.zeros(shape=(1, self.batch_size, self.num_hiddens)).astype(np.float16))
self.c1 = Tensor(np.zeros(shape=(1, self.batch_size, self.num_hiddens)).astype(np.float32)) self.c1 = Tensor(np.zeros(shape=(1, self.batch_size, self.num_hiddens)).astype(np.float16))
if cell == "vanilla": if cell == "vanilla":
self.rnnW_fw = nn.Dense(self.num_hiddens, self.num_hiddens) self.rnnW_fw = nn.Dense(self.num_hiddens, self.num_hiddens)
@ -72,6 +72,12 @@ class textrcnn(nn.Cell):
self.rnnWz_bw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens) self.rnnWz_bw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens)
self.rnnWh_bw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens) self.rnnWh_bw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens)
self.ones = Tensor(np.ones(shape=(self.batch_size, self.num_hiddens)).astype(np.float16)) self.ones = Tensor(np.ones(shape=(self.batch_size, self.num_hiddens)).astype(np.float16))
self.rnnWr_fw.to_float(mstype.float16)
self.rnnWz_fw.to_float(mstype.float16)
self.rnnWh_fw.to_float(mstype.float16)
self.rnnWr_bw.to_float(mstype.float16)
self.rnnWz_bw.to_float(mstype.float16)
self.rnnWh_bw.to_float(mstype.float16)
self.transpose = P.Transpose() self.transpose = P.Transpose()
self.reduce_max = P.ReduceMax() self.reduce_max = P.ReduceMax()
@ -91,6 +97,9 @@ class textrcnn(nn.Cell):
self.tanh = P.Tanh() self.tanh = P.Tanh()
self.sigmoid = P.Sigmoid() self.sigmoid = P.Sigmoid()
self.slice = P.Slice() self.slice = P.Slice()
self.text_rep_dense.to_float(mstype.float16)
self.mydense.to_float(mstype.float16)
self.output_dense.to_float(mstype.float16)
def construct(self, x): def construct(self, x):
"""class construction""" """class construction"""

View File

@ -22,7 +22,7 @@ import mindspore.context as context
from mindspore import Tensor from mindspore import Tensor
from mindspore.train import Model from mindspore.train import Model
from mindspore.nn.metrics import Accuracy from mindspore.nn.metrics import Accuracy
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
from mindspore.common import set_seed from mindspore.common import set_seed
from src.config import textrcnn_cfg as cfg from src.config import textrcnn_cfg as cfg
@ -31,7 +31,7 @@ from src.dataset import convert_to_mindrecord
from src.textrcnn import textrcnn from src.textrcnn import textrcnn
from src.utils import get_lr from src.utils import get_lr
set_seed(2) set_seed(0)
if __name__ == '__main__': if __name__ == '__main__':
@ -58,7 +58,7 @@ if __name__ == '__main__':
network = textrcnn(weight=Tensor(embedding_table), vocab_size=embedding_table.shape[0], network = textrcnn(weight=Tensor(embedding_table), vocab_size=embedding_table.shape[0],
cell=cfg.cell, batch_size=cfg.batch_size) cell=cfg.cell, batch_size=cfg.batch_size)
ds_train = create_dataset(cfg.preprocess_path, cfg.batch_size, cfg.num_epochs, True) ds_train = create_dataset(cfg.preprocess_path, cfg.batch_size, True)
step_size = ds_train.get_dataset_size() step_size = ds_train.get_dataset_size()
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
@ -70,11 +70,12 @@ if __name__ == '__main__':
opt = nn.Adam(params=network.trainable_params(), learning_rate=lr) opt = nn.Adam(params=network.trainable_params(), learning_rate=lr)
loss_cb = LossMonitor() loss_cb = LossMonitor()
time_cb = TimeMonitor()
model = Model(network, loss, opt, {'acc': Accuracy()}, amp_level="O3") model = Model(network, loss, opt, {'acc': Accuracy()}, amp_level="O3")
print("============== Starting Training ==============") print("============== Starting Training ==============")
config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
keep_checkpoint_max=cfg.keep_checkpoint_max) keep_checkpoint_max=cfg.keep_checkpoint_max)
ckpoint_cb = ModelCheckpoint(prefix=cfg.cell, directory=cfg.ckpt_folder_path, config=config_ck) ckpoint_cb = ModelCheckpoint(prefix=cfg.cell, directory=cfg.ckpt_folder_path, config=config_ck)
model.train(num_epochs, ds_train, callbacks=[ckpoint_cb, loss_cb]) model.train(num_epochs, ds_train, callbacks=[ckpoint_cb, loss_cb, time_cb])
print("train success") print("train success")