forked from mindspore-Ecosystem/mindspore
add lstm test case
This commit is contained in:
parent
18d79d35b6
commit
3f633348e2
|
@ -0,0 +1,179 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
import math
|
||||
import pytest
|
||||
import numpy as np
|
||||
from mindspore import context
|
||||
from mindspore import nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.common.initializer import initializer
|
||||
from mindspore.common.parameter import ParameterTuple
|
||||
from mindspore.common.parameter import Parameter
|
||||
from mindspore.ops import composite as c
|
||||
|
||||
|
||||
class GradOfAllInputsAndParams(nn.Cell):
|
||||
def __init__(self, network, sens_param):
|
||||
super().__init__()
|
||||
self.grad = c.GradOperation(get_all=True, get_by_list=True, sens_param=sens_param)
|
||||
self.network = network
|
||||
self.params = ParameterTuple(self.network.trainable_params())
|
||||
|
||||
def construct(self, *inputs):
|
||||
gout = self.grad(self.network, self.params)(*inputs)
|
||||
return gout
|
||||
|
||||
|
||||
class LSTM(nn.Cell):
|
||||
def __init__(self, input_s, hidden_s, num_layers, has_bias, batch_first, bidirectional, dropout):
|
||||
super().__init__()
|
||||
self.lstm = nn.LSTM(input_size=input_s, hidden_size=hidden_s, num_layers=num_layers, has_bias=has_bias,
|
||||
batch_first=batch_first, bidirectional=bidirectional, dropout=dropout)
|
||||
|
||||
def construct(self, inp, h0, c0):
|
||||
return self.lstm(inp, (h0, c0))
|
||||
|
||||
|
||||
class LSTMWeightBias():
|
||||
def __init__(self, num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional):
|
||||
self.num_layers = num_layers
|
||||
self.has_bias = has_bias
|
||||
self.input_s = input_s
|
||||
self.num_directions = num_directions
|
||||
self.hidden_s = hidden_s
|
||||
self.bidirectional = bidirectional
|
||||
|
||||
def get_weight_bias(self):
|
||||
stdv = 1 / math.sqrt(self.hidden_s)
|
||||
gate_size = 4 * self.hidden_s
|
||||
w_list_value = []
|
||||
b_list_value = []
|
||||
|
||||
for i in range(self.num_layers):
|
||||
b0 = np.zeros(gate_size, dtype=np.float16)
|
||||
w_shape = self.input_s if i == 0 else (self.num_directions * self.hidden_s)
|
||||
w_np = np.random.uniform(-stdv, stdv, (w_shape + self.hidden_s, gate_size)).astype(np.float16)
|
||||
w_list_value.append(Parameter(initializer(Tensor(w_np), [w_shape + self.hidden_s, gate_size]),
|
||||
name="weight_fw" + str(i)))
|
||||
|
||||
if self.has_bias:
|
||||
b_np = np.random.uniform(-stdv, stdv, gate_size).astype(np.float16)
|
||||
b_list_value.append(Parameter(initializer(Tensor(b_np), [gate_size]), name="bias_fw" + str(i)))
|
||||
else:
|
||||
b_list_value.append(Parameter(initializer(Tensor(b0), [gate_size]), name="bias_fw" + str(i)))
|
||||
|
||||
if self.bidirectional:
|
||||
w_bw_np = np.random.uniform(-stdv, stdv, (w_shape + self.hidden_s, gate_size)).astype(np.float16)
|
||||
b_list_value.append(Parameter(initializer(Tensor(w_bw_np), [w_shape + self.hidden_s, gate_size]),
|
||||
name="weight_bw" + str(i)))
|
||||
b_bw_np = np.random.uniform(-stdv, stdv, (4 * self.hidden_s)).astype(
|
||||
np.float16) if self.has_bias else b0
|
||||
b_list_value.append(Parameter(initializer(Tensor(b_bw_np), [gate_size]), name="bias_bw" + str(i)))
|
||||
w_list_value = ParameterTuple(w_list_value)
|
||||
b_list_value = ParameterTuple(b_list_value)
|
||||
return w_list_value, b_list_value
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_sit_lstm_forward_input_3_32_32_is_32_hs_16():
|
||||
input_s = 32
|
||||
hidden_s = 16
|
||||
has_bias = True
|
||||
bidirectional = False
|
||||
num_layers = 1
|
||||
num_directions = 1
|
||||
|
||||
fact = LSTMWeightBias(num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional)
|
||||
w_list_value, b_list_value = fact.get_weight_bias()
|
||||
|
||||
h0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32))
|
||||
c0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32))
|
||||
input_ms = Tensor(np.random.randn(3, 32, 32).astype(np.float32))
|
||||
|
||||
# graph mode
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
net = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False,
|
||||
bidirectional=bidirectional, dropout=0.0)
|
||||
net.lstm.w_list = w_list_value
|
||||
net.lstm.b_list = b_list_value
|
||||
out, (hy, cy) = net(input_ms, h0, c0)
|
||||
|
||||
# pynative mode
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
net_pynative = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False,
|
||||
bidirectional=bidirectional, dropout=0.0)
|
||||
net_pynative.lstm.w_list = w_list_value
|
||||
net_pynative.lstm.b_list = b_list_value
|
||||
out_pynative, (hy_pynative, cy_pynative) = net_pynative(input_ms, h0, c0)
|
||||
|
||||
assert np.allclose(out.asnumpy(), out_pynative.asnumpy(), 0.0001, 0.0001)
|
||||
assert np.allclose(hy.asnumpy(), hy_pynative.asnumpy(), 0.0001, 0.0001)
|
||||
assert np.allclose(cy.asnumpy(), cy_pynative.asnumpy(), 0.0001, 0.0001)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_sit_lstm_grad_input_3_32_32_is_32_hs_16():
|
||||
input_s = 32
|
||||
hidden_s = 16
|
||||
has_bias = True
|
||||
bidirectional = False
|
||||
num_layers = 1
|
||||
num_directions = 1
|
||||
|
||||
fact = LSTMWeightBias(num_layers, has_bias, input_s, num_directions, hidden_s, bidirectional)
|
||||
w_list_value, b_list_value = fact.get_weight_bias()
|
||||
|
||||
h0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32))
|
||||
c0 = Tensor(np.random.randn(num_layers * 1, 32, 16).astype(np.float32))
|
||||
input_ms = Tensor(np.random.randn(3, 32, 32).astype(np.float32))
|
||||
|
||||
# graph mode
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
net = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False,
|
||||
bidirectional=bidirectional, dropout=0.0)
|
||||
net.lstm.w_list = w_list_value
|
||||
net.lstm.b_list = b_list_value
|
||||
|
||||
grad_net_inp = GradOfAllInputsAndParams(net, sens_param=False)
|
||||
grad_net_inp.set_train()
|
||||
out_grad, _ = grad_net_inp(input_ms, h0, c0)
|
||||
x_grad = out_grad[0].asnumpy()
|
||||
h_grad = out_grad[1].asnumpy()
|
||||
c_grad = out_grad[2].asnumpy()
|
||||
|
||||
# pynative mode
|
||||
context.set_context(mode=context.PYNATIVE_MODE)
|
||||
net_pynative = LSTM(input_s=input_s, hidden_s=16, num_layers=num_layers, has_bias=has_bias, batch_first=False,
|
||||
bidirectional=bidirectional, dropout=0.0)
|
||||
net_pynative.lstm.w_list = w_list_value
|
||||
net_pynative.lstm.b_list = b_list_value
|
||||
|
||||
grad_net_inp_pynative = GradOfAllInputsAndParams(net_pynative, sens_param=False)
|
||||
grad_net_inp_pynative.set_train()
|
||||
out_grad_pynative, _ = grad_net_inp_pynative(input_ms, h0, c0)
|
||||
x_grad_pynative = out_grad_pynative[0].asnumpy()
|
||||
h_grad_pynative = out_grad_pynative[1].asnumpy()
|
||||
c_grad_pynative = out_grad_pynative[2].asnumpy()
|
||||
|
||||
assert np.allclose(x_grad, x_grad_pynative, 0.0001, 0.0001)
|
||||
assert np.allclose(h_grad, h_grad_pynative, 0.0001, 0.0001)
|
||||
assert np.allclose(c_grad, c_grad_pynative, 0.0001, 0.0001)
|
Loading…
Reference in New Issue