forked from mindspore-Ecosystem/mindspore
enable export textrcnn ,remove useless code in eval.py
This commit is contained in:
parent
be56c201ef
commit
786e4c4cbe
|
@ -23,7 +23,6 @@ parser.add_argument('--data_dir', type=str, help='the source dataset directory.'
|
||||||
parser.add_argument('--out_dir', type=str, help='the target dataset directory.', default='./data')
|
parser.add_argument('--out_dir', type=str, help='the target dataset directory.', default='./data')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
np.random.seed(2)
|
|
||||||
|
|
||||||
|
|
||||||
def dataset_split(label):
|
def dataset_split(label):
|
||||||
|
@ -34,6 +33,7 @@ def dataset_split(label):
|
||||||
pfhand = open(pos_file, encoding='utf-8')
|
pfhand = open(pos_file, encoding='utf-8')
|
||||||
pos_samples += pfhand.readlines()
|
pos_samples += pfhand.readlines()
|
||||||
pfhand.close()
|
pfhand.close()
|
||||||
|
np.random.seed(0)
|
||||||
perm = np.random.permutation(len(pos_samples))
|
perm = np.random.permutation(len(pos_samples))
|
||||||
perm_train = perm[0:int(len(pos_samples) * 0.9)]
|
perm_train = perm[0:int(len(pos_samples) * 0.9)]
|
||||||
perm_test = perm[int(len(pos_samples) * 0.9):]
|
perm_test = perm[int(len(pos_samples) * 0.9):]
|
||||||
|
|
|
@ -48,13 +48,12 @@ if __name__ == '__main__':
|
||||||
network = textrcnn(weight=Tensor(embedding_table), vocab_size=embedding_table.shape[0],
|
network = textrcnn(weight=Tensor(embedding_table), vocab_size=embedding_table.shape[0],
|
||||||
cell=cfg.cell, batch_size=cfg.batch_size)
|
cell=cfg.cell, batch_size=cfg.batch_size)
|
||||||
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
|
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
|
||||||
opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
|
|
||||||
loss_cb = LossMonitor()
|
loss_cb = LossMonitor()
|
||||||
print("============== Starting Testing ==============")
|
print("============== Starting Testing ==============")
|
||||||
ds_eval = create_dataset(cfg.preprocess_path, cfg.batch_size, 1, False)
|
ds_eval = create_dataset(cfg.preprocess_path, cfg.batch_size, False)
|
||||||
param_dict = load_checkpoint(args.ckpt_path)
|
param_dict = load_checkpoint(args.ckpt_path)
|
||||||
load_param_into_net(network, param_dict)
|
load_param_into_net(network, param_dict)
|
||||||
network.set_train(False)
|
network.set_train(False)
|
||||||
model = Model(network, loss, opt, metrics={'acc': Accuracy()}, amp_level='O3')
|
model = Model(network, loss, metrics={'acc': Accuracy()}, amp_level='O3')
|
||||||
acc = model.eval(ds_eval, dataset_sink_mode=False)
|
acc = model.eval(ds_eval, dataset_sink_mode=False)
|
||||||
print("============== Accuracy:{} ==============".format(acc))
|
print("============== Accuracy:{} ==============".format(acc))
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
"""textrcnn export ckpt file to mindir/air"""
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
import numpy as np
|
||||||
|
from mindspore import Tensor, context, load_checkpoint, load_param_into_net, export
|
||||||
|
|
||||||
|
from src.textrcnn import textrcnn
|
||||||
|
from src.config import textrcnn_cfg as config
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="textrcnn")
|
||||||
|
parser.add_argument("--device_id", type=int, default=0, help="Device id")
|
||||||
|
parser.add_argument("--ckpt_file", type=str, required=True, help="textrcnn ckpt file.")
|
||||||
|
parser.add_argument("--file_name", type=str, default="textrcnn", help="textrcnn output file name.")
|
||||||
|
parser.add_argument("--file_format", type=str, choices=["AIR", "MINDIR"],
|
||||||
|
default="MINDIR", help="file format")
|
||||||
|
parser.add_argument("--device_target", type=str, choices=["Ascend"], default="Ascend",
|
||||||
|
help="device target")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, device_id=args.device_id)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# define net
|
||||||
|
embedding_table = np.loadtxt(os.path.join(config.preprocess_path, "weight.txt")).astype(np.float32)
|
||||||
|
|
||||||
|
net = textrcnn(weight=Tensor(embedding_table), vocab_size=embedding_table.shape[0],
|
||||||
|
cell=config.cell, batch_size=config.batch_size)
|
||||||
|
|
||||||
|
# load checkpoint
|
||||||
|
param_dict = load_checkpoint(args.ckpt_file)
|
||||||
|
load_param_into_net(net, param_dict)
|
||||||
|
net.set_train(False)
|
||||||
|
|
||||||
|
image = Tensor(np.ones([config.batch_size, 50], np.int32))
|
||||||
|
export(net, image, file_name=args.file_name, file_format=args.file_format)
|
|
@ -100,6 +100,7 @@ bash scripts/run_eval.sh
|
||||||
│ ├──textrcnn.py // textrcnn architecture
|
│ ├──textrcnn.py // textrcnn architecture
|
||||||
│ ├──config.py // parameter configuration
|
│ ├──config.py // parameter configuration
|
||||||
├── train.py // training script
|
├── train.py // training script
|
||||||
|
├── export.py // export script
|
||||||
├── eval.py // evaluation script
|
├── eval.py // evaluation script
|
||||||
├── data_helpers.py // dataset split script
|
├── data_helpers.py // dataset split script
|
||||||
├── sample.txt // the shell to train and eval the model without scripts
|
├── sample.txt // the shell to train and eval the model without scripts
|
||||||
|
@ -129,8 +130,7 @@ Parameters for both training and evaluation can be set in config.py
|
||||||
'emb_path': './word2vec', # the directory to save the embedding file
|
'emb_path': './word2vec', # the directory to save the embedding file
|
||||||
'embed_size': 300, # the dimension of the word embedding
|
'embed_size': 300, # the dimension of the word embedding
|
||||||
'save_checkpoint_steps': 149, # per step to save the checkpoint
|
'save_checkpoint_steps': 149, # per step to save the checkpoint
|
||||||
'keep_checkpoint_max': 10, # max checkpoints to save
|
'keep_checkpoint_max': 10 # max checkpoints to save
|
||||||
'momentum': 0.9 # the momentum rate
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Performance
|
### Performance
|
||||||
|
|
|
@ -39,5 +39,4 @@ textrcnn_cfg = edict({
|
||||||
'embed_size': 300,
|
'embed_size': 300,
|
||||||
'save_checkpoint_steps': 149,
|
'save_checkpoint_steps': 149,
|
||||||
'keep_checkpoint_max': 10,
|
'keep_checkpoint_max': 10,
|
||||||
'momentum': 0.9
|
|
||||||
})
|
})
|
||||||
|
|
|
@ -76,9 +76,7 @@ def tokenizer(text):
|
||||||
def collect_weight(glove_path, vocab, word_to_idx, embed_size):
|
def collect_weight(glove_path, vocab, word_to_idx, embed_size):
|
||||||
""" collect weight """
|
""" collect weight """
|
||||||
vocab_size = len(vocab)
|
vocab_size = len(vocab)
|
||||||
# wvmodel = gensim.models.KeyedVectors.load_word2vec_format(os.path.join(glove_path, 'glove.6B.300d.txt'),
|
wvmodel = gensim.models.KeyedVectors.load_word2vec_format(os.path.join(glove_path,
|
||||||
# binary=False, encoding='utf-8')
|
|
||||||
wvmodel = gensim.models.KeyedVectors.load_word2vec_format(os.path.join(glove_path, \
|
|
||||||
'GoogleNews-vectors-negative300.bin'),
|
'GoogleNews-vectors-negative300.bin'),
|
||||||
binary=True)
|
binary=True)
|
||||||
weight_np = np.zeros((vocab_size + 1, embed_size)).astype(np.float32)
|
weight_np = np.zeros((vocab_size + 1, embed_size)).astype(np.float32)
|
||||||
|
@ -164,7 +162,7 @@ def convert_to_mindrecord(embed_size, data_path, proprocess_path, glove_path):
|
||||||
writer.commit()
|
writer.commit()
|
||||||
|
|
||||||
|
|
||||||
def create_dataset(base_path, batch_size, num_epochs, is_train):
|
def create_dataset(base_path, batch_size, is_train):
|
||||||
"""Create dataset for training."""
|
"""Create dataset for training."""
|
||||||
columns_list = ["feature", "label"]
|
columns_list = ["feature", "label"]
|
||||||
num_consumer = 4
|
num_consumer = 4
|
||||||
|
@ -175,7 +173,7 @@ def create_dataset(base_path, batch_size, num_epochs, is_train):
|
||||||
path = os.path.join(base_path, 'aclImdb_test.mindrecord0')
|
path = os.path.join(base_path, 'aclImdb_test.mindrecord0')
|
||||||
|
|
||||||
data_set = ds.MindDataset(path, columns_list, num_consumer)
|
data_set = ds.MindDataset(path, columns_list, num_consumer)
|
||||||
ds.config.set_seed(1)
|
ds.config.set_seed(0)
|
||||||
data_set = data_set.shuffle(buffer_size=data_set.get_dataset_size())
|
data_set = data_set.shuffle(buffer_size=data_set.get_dataset_size())
|
||||||
data_set = data_set.batch(batch_size, drop_remainder=True)
|
data_set = data_set.batch(batch_size, drop_remainder=True)
|
||||||
return data_set
|
return data_set
|
||||||
|
|
|
@ -47,16 +47,16 @@ class textrcnn(nn.Cell):
|
||||||
self.lstm = P.DynamicRNN(forget_bias=0.0)
|
self.lstm = P.DynamicRNN(forget_bias=0.0)
|
||||||
self.w1_fw = Parameter(
|
self.w1_fw = Parameter(
|
||||||
np.random.uniform(-k, k, (self.embed_size + self.num_hiddens, 4 * self.num_hiddens)).astype(
|
np.random.uniform(-k, k, (self.embed_size + self.num_hiddens, 4 * self.num_hiddens)).astype(
|
||||||
np.float32), name="w1_fw")
|
np.float16), name="w1_fw")
|
||||||
self.b1_fw = Parameter(np.random.uniform(-k, k, (4 * self.num_hiddens)).astype(np.float32),
|
self.b1_fw = Parameter(np.random.uniform(-k, k, (4 * self.num_hiddens)).astype(np.float16),
|
||||||
name="b1_fw")
|
name="b1_fw")
|
||||||
self.w1_bw = Parameter(
|
self.w1_bw = Parameter(
|
||||||
np.random.uniform(-k, k, (self.embed_size + self.num_hiddens, 4 * self.num_hiddens)).astype(
|
np.random.uniform(-k, k, (self.embed_size + self.num_hiddens, 4 * self.num_hiddens)).astype(
|
||||||
np.float32), name="w1_bw")
|
np.float16), name="w1_bw")
|
||||||
self.b1_bw = Parameter(np.random.uniform(-k, k, (4 * self.num_hiddens)).astype(np.float32),
|
self.b1_bw = Parameter(np.random.uniform(-k, k, (4 * self.num_hiddens)).astype(np.float16),
|
||||||
name="b1_bw")
|
name="b1_bw")
|
||||||
self.h1 = Tensor(np.zeros(shape=(1, self.batch_size, self.num_hiddens)).astype(np.float32))
|
self.h1 = Tensor(np.zeros(shape=(1, self.batch_size, self.num_hiddens)).astype(np.float16))
|
||||||
self.c1 = Tensor(np.zeros(shape=(1, self.batch_size, self.num_hiddens)).astype(np.float32))
|
self.c1 = Tensor(np.zeros(shape=(1, self.batch_size, self.num_hiddens)).astype(np.float16))
|
||||||
|
|
||||||
if cell == "vanilla":
|
if cell == "vanilla":
|
||||||
self.rnnW_fw = nn.Dense(self.num_hiddens, self.num_hiddens)
|
self.rnnW_fw = nn.Dense(self.num_hiddens, self.num_hiddens)
|
||||||
|
@ -72,6 +72,12 @@ class textrcnn(nn.Cell):
|
||||||
self.rnnWz_bw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens)
|
self.rnnWz_bw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens)
|
||||||
self.rnnWh_bw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens)
|
self.rnnWh_bw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens)
|
||||||
self.ones = Tensor(np.ones(shape=(self.batch_size, self.num_hiddens)).astype(np.float16))
|
self.ones = Tensor(np.ones(shape=(self.batch_size, self.num_hiddens)).astype(np.float16))
|
||||||
|
self.rnnWr_fw.to_float(mstype.float16)
|
||||||
|
self.rnnWz_fw.to_float(mstype.float16)
|
||||||
|
self.rnnWh_fw.to_float(mstype.float16)
|
||||||
|
self.rnnWr_bw.to_float(mstype.float16)
|
||||||
|
self.rnnWz_bw.to_float(mstype.float16)
|
||||||
|
self.rnnWh_bw.to_float(mstype.float16)
|
||||||
|
|
||||||
self.transpose = P.Transpose()
|
self.transpose = P.Transpose()
|
||||||
self.reduce_max = P.ReduceMax()
|
self.reduce_max = P.ReduceMax()
|
||||||
|
@ -91,6 +97,9 @@ class textrcnn(nn.Cell):
|
||||||
self.tanh = P.Tanh()
|
self.tanh = P.Tanh()
|
||||||
self.sigmoid = P.Sigmoid()
|
self.sigmoid = P.Sigmoid()
|
||||||
self.slice = P.Slice()
|
self.slice = P.Slice()
|
||||||
|
self.text_rep_dense.to_float(mstype.float16)
|
||||||
|
self.mydense.to_float(mstype.float16)
|
||||||
|
self.output_dense.to_float(mstype.float16)
|
||||||
|
|
||||||
def construct(self, x):
|
def construct(self, x):
|
||||||
"""class construction"""
|
"""class construction"""
|
||||||
|
|
|
@ -22,7 +22,7 @@ import mindspore.context as context
|
||||||
from mindspore import Tensor
|
from mindspore import Tensor
|
||||||
from mindspore.train import Model
|
from mindspore.train import Model
|
||||||
from mindspore.nn.metrics import Accuracy
|
from mindspore.nn.metrics import Accuracy
|
||||||
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
|
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
|
||||||
from mindspore.common import set_seed
|
from mindspore.common import set_seed
|
||||||
|
|
||||||
from src.config import textrcnn_cfg as cfg
|
from src.config import textrcnn_cfg as cfg
|
||||||
|
@ -31,7 +31,7 @@ from src.dataset import convert_to_mindrecord
|
||||||
from src.textrcnn import textrcnn
|
from src.textrcnn import textrcnn
|
||||||
from src.utils import get_lr
|
from src.utils import get_lr
|
||||||
|
|
||||||
set_seed(2)
|
set_seed(0)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
@ -58,7 +58,7 @@ if __name__ == '__main__':
|
||||||
network = textrcnn(weight=Tensor(embedding_table), vocab_size=embedding_table.shape[0],
|
network = textrcnn(weight=Tensor(embedding_table), vocab_size=embedding_table.shape[0],
|
||||||
cell=cfg.cell, batch_size=cfg.batch_size)
|
cell=cfg.cell, batch_size=cfg.batch_size)
|
||||||
|
|
||||||
ds_train = create_dataset(cfg.preprocess_path, cfg.batch_size, cfg.num_epochs, True)
|
ds_train = create_dataset(cfg.preprocess_path, cfg.batch_size, True)
|
||||||
step_size = ds_train.get_dataset_size()
|
step_size = ds_train.get_dataset_size()
|
||||||
|
|
||||||
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
|
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
|
||||||
|
@ -70,11 +70,12 @@ if __name__ == '__main__':
|
||||||
opt = nn.Adam(params=network.trainable_params(), learning_rate=lr)
|
opt = nn.Adam(params=network.trainable_params(), learning_rate=lr)
|
||||||
|
|
||||||
loss_cb = LossMonitor()
|
loss_cb = LossMonitor()
|
||||||
|
time_cb = TimeMonitor()
|
||||||
model = Model(network, loss, opt, {'acc': Accuracy()}, amp_level="O3")
|
model = Model(network, loss, opt, {'acc': Accuracy()}, amp_level="O3")
|
||||||
|
|
||||||
print("============== Starting Training ==============")
|
print("============== Starting Training ==============")
|
||||||
config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
|
config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
|
||||||
keep_checkpoint_max=cfg.keep_checkpoint_max)
|
keep_checkpoint_max=cfg.keep_checkpoint_max)
|
||||||
ckpoint_cb = ModelCheckpoint(prefix=cfg.cell, directory=cfg.ckpt_folder_path, config=config_ck)
|
ckpoint_cb = ModelCheckpoint(prefix=cfg.cell, directory=cfg.ckpt_folder_path, config=config_ck)
|
||||||
model.train(num_epochs, ds_train, callbacks=[ckpoint_cb, loss_cb])
|
model.train(num_epochs, ds_train, callbacks=[ckpoint_cb, loss_cb, time_cb])
|
||||||
print("train success")
|
print("train success")
|
||||||
|
|
Loading…
Reference in New Issue