From 207e515fce245ab01f53496abe5b592d2edb3d30 Mon Sep 17 00:00:00 2001 From: cj Date: Wed, 16 Sep 2020 20:52:05 +0800 Subject: [PATCH] LSTM API optimization --- mindspore/nn/layer/lstm.py | 139 ++++++++---------------- model_zoo/official/nlp/lstm/src/lstm.py | 139 +++++++++++++++++++----- tests/st/ops/cpu/test_lstm_op.py | 82 +++++++++++++- 3 files changed, 229 insertions(+), 131 deletions(-) diff --git a/mindspore/nn/layer/lstm.py b/mindspore/nn/layer/lstm.py index 7987e42a518..6f68e3dd6cc 100755 --- a/mindspore/nn/layer/lstm.py +++ b/mindspore/nn/layer/lstm.py @@ -14,12 +14,12 @@ # ============================================================================ """lstm""" import math + import numpy as np -import mindspore.nn as nn -from mindspore import context + from mindspore._checkparam import Validator as validator from mindspore.common.initializer import initializer -from mindspore.common.parameter import Parameter, ParameterTuple +from mindspore.common.parameter import Parameter from mindspore.common.tensor import Tensor from mindspore.nn.cell import Cell from mindspore.ops import operations as P @@ -118,83 +118,41 @@ class LSTM(Cell): dropout=0, bidirectional=False): super(LSTM, self).__init__() - self.input_size = input_size - self.hidden_size = hidden_size - self.num_layers = num_layers - self.has_bias = has_bias - self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name) - self.hidden_size = validator.check_integer("hidden_size", hidden_size, 0, Rel.GT, self.cls_name) - self.num_layers = validator.check_integer("num_layers", num_layers, 0, Rel.GT, self.cls_name) - self.dropout = float(dropout) - self.bidirectional = bidirectional - if self.batch_first: - self.transpose1 = P.Transpose() - self.transpose2 = P.Transpose() - num_directions = 2 if self.bidirectional else 1 - self.cpu_target = False - enable_debug = context.get_context("enable_debug_runtime") - if context.get_context("device_target") == "CPU" and not enable_debug: - self.cpu_target = True - if not self.cpu_target: - self.lstm = P.LSTM(input_size=self.input_size, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - has_bias=self.has_bias, - bidirectional=self.bidirectional, - dropout=self.dropout) - weight_size = 0 - gate_size = 4 * self.hidden_size - for layer in range(self.num_layers): - input_layer_size = self.input_size if layer == 0 else self.hidden_size * num_directions - increment_size = gate_size * input_layer_size - increment_size += gate_size * self.hidden_size - if self.has_bias: - increment_size += 2 * gate_size - weight_size += increment_size * num_directions - stdv = 1 / math.sqrt(hidden_size) - w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) - self.weight = Parameter(initializer(Tensor(w_np), [weight_size, 1, 1]), name='weight') - else: - input_size_list = [] - input_size_list.append(self.input_size) - for i in range(self.num_layers - 1): - input_size_list.append(self.hidden_size * num_directions) - weights = [] - layers = [] - bias_size = 0 if not self.has_bias else num_directions * self.hidden_size * 4 - stdv = 1 / math.sqrt(hidden_size) - for i in range(num_layers): - weight_size = (input_size_list[i] + self.hidden_size) * num_directions * self.hidden_size * 4 - if has_bias: - weight_size = weight_size + bias_size - w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) - weights.append(Parameter(initializer(Tensor(w_np), w_np.shape), name='weight' + str(i))) - layers.append(nn.LSTMCell(input_size=input_size_list[i], - hidden_size=self.hidden_size, - has_bias=self.has_bias, - bidirectional=self.bidirectional, - dropout=self.dropout)) - self.lstms = layers - self.weight = ParameterTuple(tuple(weights)) - self.fill = P.Fill() - self.shape = P.Shape() + validator.check_value_type("batch_first", batch_first, [bool], self.cls_name) + validator.check_integer("hidden_size", hidden_size, 0, Rel.GT, self.cls_name) + validator.check_integer("num_layers", num_layers, 0, Rel.GT, self.cls_name) + + self.batch_first = batch_first + self.transpose = P.Transpose() + self.lstm = P.LSTM(input_size=input_size, + hidden_size=hidden_size, + num_layers=num_layers, + has_bias=has_bias, + bidirectional=bidirectional, + dropout=float(dropout)) + + weight_size = 0 + gate_size = 4 * hidden_size + num_directions = 2 if bidirectional else 1 + for layer in range(num_layers): + input_layer_size = input_size if layer == 0 else hidden_size * num_directions + increment_size = gate_size * input_layer_size + increment_size += gate_size * hidden_size + if has_bias: + increment_size += 2 * gate_size + weight_size += increment_size * num_directions + stdv = 1 / math.sqrt(hidden_size) + w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) + self.weight = Parameter(initializer(Tensor(w_np), [weight_size, 1, 1]), name='weight') def construct(self, x, hx): if self.batch_first: - x = self.transpose1(x, (1, 0, 2)) - if not self.cpu_target: - h, c = hx - output, h, c, _, _ = self.lstm(x, h, c, self.weight) - if self.batch_first: - output = self.transpose2(output, (1, 0, 2)) - return (output, (h, c)) + x = self.transpose(x, (1, 0, 2)) h, c = hx - output, hn, cn, _, _ = self.lstms[0](x, h[0], c[0], self.weight[0]) - for i in range(1, self.num_layers): - output, hn, cn, _, _ = self.lstms[i](output, h[i], c[i], self.weight[i]) + x, h, c, _, _ = self.lstm(x, h, c, self.weight) if self.batch_first: - output = self.transpose2(output, (1, 0, 2)) - return (output, (hn, cn)) + x = self.transpose(x, (1, 0, 2)) + return x, (h, c) class LSTMCell(Cell): @@ -291,30 +249,19 @@ class LSTMCell(Cell): dropout=0, bidirectional=False): super(LSTMCell, self).__init__() - self.input_size = input_size - self.hidden_size = hidden_size - self.has_bias = has_bias self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name) - self.dropout = float(dropout) - self.bidirectional = bidirectional - self.num_directions = 1 - if self.bidirectional: - self.num_directions = 2 - if self.batch_first: - self.transpose1 = P.Transpose() - self.transpose2 = P.Transpose() - - self.lstm = P.LSTM(input_size=self.input_size, - hidden_size=self.hidden_size, + self.transpose = P.Transpose() + self.lstm = P.LSTM(input_size=input_size, + hidden_size=hidden_size, num_layers=1, - has_bias=self.has_bias, - bidirectional=self.bidirectional, - dropout=self.dropout) + has_bias=has_bias, + bidirectional=bidirectional, + dropout=float(dropout)) def construct(self, x, h, c, w): if self.batch_first: - x = self.transpose1(x, (1, 0, 2)) - output, hn, cn, _, _ = self.lstm(x, h, c, w) + x = self.transpose(x, (1, 0, 2)) + x, h, c, _, _ = self.lstm(x, h, c, w) if self.batch_first: - output = self.transpose2(output, (1, 0, 2)) - return output, hn, cn, _, _ + x = self.transpose(x, (1, 0, 2)) + return x, h, c, _, _ diff --git a/model_zoo/official/nlp/lstm/src/lstm.py b/model_zoo/official/nlp/lstm/src/lstm.py index c3ca0bbf7c9..5ee90b8ad21 100644 --- a/model_zoo/official/nlp/lstm/src/lstm.py +++ b/model_zoo/official/nlp/lstm/src/lstm.py @@ -13,40 +13,108 @@ # limitations under the License. # ============================================================================ """LSTM.""" +import math import numpy as np -from mindspore import Tensor, nn, context +from mindspore import Tensor, nn, context, Parameter, ParameterTuple +from mindspore.common.initializer import initializer from mindspore.ops import operations as P +STACK_LSTM_DEVICE = ["CPU"] + + # Initialize short-term memory (h) and long-term memory (c) to 0 def lstm_default_state(batch_size, hidden_size, num_layers, bidirectional): """init default input.""" - num_directions = 1 - if bidirectional: - num_directions = 2 - - if context.get_context("device_target") == "CPU": - h_list = [] - c_list = [] - i = 0 - while i < num_layers: - hi = Tensor(np.zeros((num_directions, batch_size, hidden_size)).astype(np.float32)) - h_list.append(hi) - ci = Tensor(np.zeros((num_directions, batch_size, hidden_size)).astype(np.float32)) - c_list.append(ci) - i = i + 1 - h = tuple(h_list) - c = tuple(c_list) - return h, c - - h = Tensor( - np.zeros((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)) - c = Tensor( - np.zeros((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)) + num_directions = 2 if bidirectional else 1 + h = Tensor(np.zeros((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)) + c = Tensor(np.zeros((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)) return h, c +def stack_lstm_default_state(batch_size, hidden_size, num_layers, bidirectional): + """init default input.""" + num_directions = 2 if bidirectional else 1 + + h_list = c_list = [] + for _ in range(num_layers): + h_list.append(Tensor(np.zeros((num_directions, batch_size, hidden_size)).astype(np.float32))) + c_list.append(Tensor(np.zeros((num_directions, batch_size, hidden_size)).astype(np.float32))) + h, c = tuple(h_list), tuple(c_list) + return h, c + + +class StackLSTM(nn.Cell): + """ + Stack multi-layers LSTM together. + """ + + def __init__(self, + input_size, + hidden_size, + num_layers=1, + has_bias=True, + batch_first=False, + dropout=0.0, + bidirectional=False): + super(StackLSTM, self).__init__() + self.num_layers = num_layers + self.batch_first = batch_first + self.transpose = P.Transpose() + + # direction number + num_directions = 2 if bidirectional else 1 + + # input_size list + input_size_list = [input_size] + for i in range(num_layers - 1): + input_size_list.append(hidden_size * num_directions) + + # layers + layers = [] + for i in range(num_layers): + layers.append(nn.LSTMCell(input_size=input_size_list[i], + hidden_size=hidden_size, + has_bias=has_bias, + batch_first=batch_first, + bidirectional=bidirectional, + dropout=dropout)) + + # weights + weights = [] + for i in range(num_layers): + # weight size + weight_size = (input_size_list[i] + hidden_size) * num_directions * hidden_size * 4 + if has_bias: + bias_size = num_directions * hidden_size * 4 + weight_size = weight_size + bias_size + + # numpy weight + stdv = 1 / math.sqrt(hidden_size) + w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) + + # lstm weight + weights.append(Parameter(initializer(Tensor(w_np), w_np.shape), name="weight" + str(i))) + + # + self.lstms = layers + self.weight = ParameterTuple(tuple(weights)) + + def construct(self, x, hx): + """construct""" + if self.batch_first: + x = self.transpose(x, (1, 0, 2)) + # stack lstm + h, c = hx + hn = cn = None + for i in range(self.num_layers): + x, hn, cn, _, _ = self.lstms[i](x, h[i], c[i], self.weight[i]) + if self.batch_first: + x = self.transpose(x, (1, 0, 2)) + return x, (hn, cn) + + class SentimentNet(nn.Cell): """Sentiment network structure.""" @@ -67,14 +135,25 @@ class SentimentNet(nn.Cell): self.embedding.embedding_table.requires_grad = False self.trans = P.Transpose() self.perm = (1, 0, 2) - self.encoder = nn.LSTM(input_size=embed_size, - hidden_size=num_hiddens, - num_layers=num_layers, - has_bias=True, - bidirectional=bidirectional, - dropout=0.0) - self.h, self.c = lstm_default_state(batch_size, num_hiddens, num_layers, bidirectional) + if context.get_context("device_target") in STACK_LSTM_DEVICE: + # stack lstm by user + self.encoder = StackLSTM(input_size=embed_size, + hidden_size=num_hiddens, + num_layers=num_layers, + has_bias=True, + bidirectional=bidirectional, + dropout=0.0) + self.h, self.c = stack_lstm_default_state(batch_size, num_hiddens, num_layers, bidirectional) + else: + # standard lstm + self.encoder = nn.LSTM(input_size=embed_size, + hidden_size=num_hiddens, + num_layers=num_layers, + has_bias=True, + bidirectional=bidirectional, + dropout=0.0) + self.h, self.c = lstm_default_state(batch_size, num_hiddens, num_layers, bidirectional) self.concat = P.Concat(1) if bidirectional: diff --git a/tests/st/ops/cpu/test_lstm_op.py b/tests/st/ops/cpu/test_lstm_op.py index 3b159c83db9..c630dea1bd5 100644 --- a/tests/st/ops/cpu/test_lstm_op.py +++ b/tests/st/ops/cpu/test_lstm_op.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ +import math import pytest import numpy as np @@ -20,12 +21,83 @@ import mindspore.context as context from mindspore.common.api import ms_function from mindspore.common.initializer import initializer from mindspore.ops import composite as C +from mindspore.ops import operations as P from mindspore.common.tensor import Tensor from mindspore.common.parameter import ParameterTuple, Parameter context.set_context(mode=context.GRAPH_MODE, device_target='CPU') +class StackLSTM(nn.Cell): + """ + Stack multi-layers LSTM together. + """ + + def __init__(self, + input_size, + hidden_size, + num_layers=1, + has_bias=True, + batch_first=False, + dropout=0.0, + bidirectional=False): + super(StackLSTM, self).__init__() + self.num_layers = num_layers + self.batch_first = batch_first + self.transpose = P.Transpose() + + # direction number + num_directions = 2 if bidirectional else 1 + + # input_size list + input_size_list = [input_size] + for i in range(num_layers - 1): + input_size_list.append(hidden_size * num_directions) + + # layers + layers = [] + for i in range(num_layers): + layers.append(nn.LSTMCell(input_size=input_size_list[i], + hidden_size=hidden_size, + has_bias=has_bias, + batch_first=batch_first, + bidirectional=bidirectional, + dropout=dropout)) + + # weights + weights = [] + for i in range(num_layers): + # weight size + weight_size = (input_size_list[i] + hidden_size) * num_directions * hidden_size * 4 + if has_bias: + bias_size = num_directions * hidden_size * 4 + weight_size = weight_size + bias_size + + # numpy weight + stdv = 1 / math.sqrt(hidden_size) + w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) + + # lstm weight + weights.append(Parameter(initializer(Tensor(w_np), w_np.shape), name="weight" + str(i))) + + # + self.lstms = layers + self.weight = ParameterTuple(tuple(weights)) + + def construct(self, x, hx): + """construct""" + if self.batch_first: + x = self.transpose(x, (1, 0, 2)) + # stack lstm + h, c = hx + hn = cn = None + for i in range(self.num_layers): + x, hn, cn, _, _ = self.lstms[i](x, h[i], c[i], self.weight[i]) + if self.batch_first: + x = self.transpose(x, (1, 0, 2)) + return x, (hn, cn) + + class LstmNet(nn.Cell): def __init__(self, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout): super(LstmNet, self).__init__() @@ -34,7 +106,7 @@ class LstmNet(nn.Cell): if bidirectional: num_directions = 2 - self.lstm = nn.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout) + self.lstm = StackLSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout) input_np = np.array([[[0.6755, -1.6607, 0.1367], [0.4276, -0.7850, -0.3758]], [[-0.6424, -0.6095, 0.6639], [0.7918, 0.4147, -0.5089]], [[-1.5612, 0.0120, -0.7289], [-0.6656, -0.6626, -0.5883]], @@ -137,8 +209,8 @@ class MultiLayerBiLstmNet(nn.Cell): if bidirectional: num_directions = 2 - self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias, - bidirectional=bidirectional, dropout=dropout) + self.lstm = StackLSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias, + bidirectional=bidirectional, dropout=dropout) input_np = np.array([[[-0.1887, -0.4144, -0.0235, 0.7489, 0.7522, 0.5969, 0.3342, 1.2198, 0.6786, -0.9404], [-0.8643, -1.6835, -2.4965, 2.8093, 0.1741, 0.2707, 0.7387, -0.0939, -1.7990, 0.4765]], @@ -264,8 +336,8 @@ class Net(nn.Cell): bih = np.zeros((1, 8)).astype(np.float32) w_np = np.concatenate((wih, whh, bih), axis=1).reshape([-1, 1, 1]) self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='weight0') - self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, - has_bias=has_bias, bidirectional=bidirectional, dropout=dropout) + self.lstm = StackLSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, + has_bias=has_bias, bidirectional=bidirectional, dropout=dropout) self.lstm.weight = ParameterTuple(tuple([self.w])) @ms_function