From 9bcdf4cbdc9a620a212b2ac649ecbc1e6b7ff3f6 Mon Sep 17 00:00:00 2001 From: baihuawei Date: Thu, 28 May 2020 09:09:56 +0800 Subject: [PATCH] add lstm --- .../kernel/cpu/mkldnn/lstm_cpu_kernel.cc | 120 +++++++ .../ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.h | 59 +++ .../kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc | 169 +++++++++ .../kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h | 67 ++++ .../ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc | 8 +- .../ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h | 4 +- .../kernel/cpu/mkldnn/mkl_kernel_engine.cc | 2 +- .../kernel/cpu/mkldnn/mkl_kernel_engine.h | 31 +- mindspore/nn/layer/lstm.py | 215 +++++++++-- mindspore/ops/_grad/grad_array_ops.py | 30 +- mindspore/ops/_grad/grad_math_ops.py | 63 +++- mindspore/ops/_grad/grad_nn_ops.py | 19 + mindspore/ops/operations/_grad_ops.py | 58 +++ mindspore/ops/operations/nn_ops.py | 46 ++- tests/st/ops/cpu/test_lstm_op.py | 335 ++++++++++++++++++ 15 files changed, 1187 insertions(+), 39 deletions(-) create mode 100644 mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc create mode 100644 mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.h create mode 100644 mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc create mode 100644 mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h create mode 100644 tests/st/ops/cpu/test_lstm_op.py diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc new file mode 100644 index 00000000000..dab165e017e --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc @@ -0,0 +1,120 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernel/cpu/mkldnn/lstm_cpu_kernel.h" +#include +#include "common/utils.h" +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" +#include "device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { +void LstmCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + bidirectional_ = AnfAlgo::GetNodeAttr(kernel_node, "bidirectional"); + input_size_ = AnfAlgo::GetNodeAttr(kernel_node, "input_size"); + hidden_size_ = AnfAlgo::GetNodeAttr(kernel_node, "hidden_size"); + num_layers_ = AnfAlgo::GetNodeAttr(kernel_node, "num_layers"); + batch_size_ = SizeToInt(src_shape[1]); + seq_len_ = SizeToInt(src_shape[0]); + num_directions_ = 1; + if (bidirectional_) { + num_directions_ = 2; + } + int gate_size = 4 * hidden_size_; + for (int i = 0; i < num_layers_; ++i) { + weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_); + weight_h_size_ += gate_size * hidden_size_; + } + weight_size_ = weight_size_ * num_directions_; + weight_h_size_ = weight_h_size_ * num_directions_; +} + +bool LstmCPUKernel::Launch(const std::vector &inputs, + const std::vector & /*workspace*/, + const std::vector &outputs) { + using dt = dnnl::memory::data_type; + using tag = dnnl::memory::format_tag; + using dim = dnnl::memory::dims; + auto eng = MKLKernelEngine::Get().engine(); + dnnl::stream s(eng); + auto formatted_md = [](dim dimensions, tag layout) { return dnnl::memory::desc{{dimensions}, dt::f32, layout}; }; + dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional; + if (bidirectional_) { + direction = dnnl::rnn_direction::bidirectional_concat; + } + + dim src_dims = {seq_len_, batch_size_, input_size_}; + dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; + dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; + dim weights_dims = {num_layers_, num_directions_, input_size_, 4, hidden_size_}; + dim weights_h_dims = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_}; + dim bias_dims = {num_layers_, num_directions_, 4, hidden_size_}; + dim dst_dims = {seq_len_, batch_size_, hidden_size_ * num_directions_}; + dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; + dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; + dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc); + dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc); + dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc); + dnnl::memory::desc weights_desc = formatted_md(weights_dims, tag::ldigo); + dnnl::memory::desc weights_h_desc = formatted_md(weights_h_dims, tag::ldigo); + dnnl::memory::desc bias_desc = formatted_md(bias_dims, tag::ldgo); + dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc); + dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc); + dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc); + dnnl::lstm_forward::desc desc = + dnnl::lstm_forward::desc(dnnl::prop_kind::forward_training, direction, src_desc, src_h_desc, src_c_desc, + weights_desc, weights_h_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc); + auto prim_desc = dnnl::lstm_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); + auto workspace_memory = dnnl::memory(prim_desc.workspace_desc(), eng); + auto src_memory = dnnl::memory(formatted_md(src_dims, tag::tnc), eng); + write_to_dnnl_memory(inputs[0]->addr, src_memory); + + auto src_h_memory = dnnl::memory(prim_desc.src_iter_desc(), eng); + auto src_c_memory = dnnl::memory(prim_desc.src_iter_c_desc(), eng); + write_to_dnnl_memory(inputs[1]->addr, src_h_memory); + write_to_dnnl_memory(inputs[2]->addr, src_c_memory); + + auto weights_memory = dnnl::memory(formatted_md(weights_dims, tag::ldigo), eng); + auto weights_h_memory = dnnl::memory(formatted_md(weights_h_dims, tag::ldigo), eng); + auto bias_memory = dnnl::memory(formatted_md(bias_dims, tag::ldgo), eng); + write_to_dnnl_memory(inputs[3]->addr, weights_memory); + write_to_dnnl_memory(reinterpret_cast(inputs[3]->addr) + weight_size_, weights_h_memory); + write_to_dnnl_memory(reinterpret_cast(inputs[3]->addr) + weight_size_ + weight_h_size_, bias_memory); + + auto dst_memory = dnnl::memory(formatted_md(dst_dims, tag::tnc), eng); + auto dst_h_memory = dnnl::memory(prim_desc.dst_iter_desc(), eng); + auto dst_c_memory = dnnl::memory(prim_desc.dst_iter_c_desc(), eng); + dnnl::lstm_forward fw_layer(prim_desc); + workspace_memory.set_data_handle(outputs[3]->addr); + dst_memory.set_data_handle(outputs[0]->addr); + dst_h_memory.set_data_handle(outputs[1]->addr); + dst_c_memory.set_data_handle(outputs[2]->addr); + fw_layer.execute(s, {{DNNL_ARG_SRC_LAYER, src_memory}, + {DNNL_ARG_SRC_ITER, src_h_memory}, + {DNNL_ARG_SRC_ITER_C, src_c_memory}, + {DNNL_ARG_WEIGHTS_LAYER, weights_memory}, + {DNNL_ARG_WEIGHTS_ITER, weights_h_memory}, + {DNNL_ARG_BIAS, bias_memory}, + {DNNL_ARG_DST_LAYER, dst_memory}, + {DNNL_ARG_DST_ITER, dst_h_memory}, + {DNNL_ARG_DST_ITER_C, dst_c_memory}, + {DNNL_ARG_WORKSPACE, workspace_memory}}); + return true; +} + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.h new file mode 100644 index 00000000000..6cb9a1ff74d --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.h @@ -0,0 +1,59 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H +#define MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H +#include +#include +#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" +namespace mindspore { +namespace kernel { +class LstmCPUKernel : public MKLCPUKernel { + public: + LstmCPUKernel() = default; + ~LstmCPUKernel() override = default; + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + private: + int weight_size_ = 0; + int weight_h_size_ = 0; + int input_size_; + int hidden_size_; + int num_layers_; + int batch_size_; + int seq_len_; + int num_directions_; + bool bidirectional_; +}; + +MS_REG_CPU_KERNEL(LSTM, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + LstmCPUKernel); +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc new file mode 100644 index 00000000000..df4744db6ff --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc @@ -0,0 +1,169 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h" +#include +#include +#include +#include +#include "common/utils.h" +#include "kernel/cpu/mkldnn/mkl_kernel_engine.h" +#include "device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { + +void LSTMGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + bidirectional_ = AnfAlgo::GetNodeAttr(kernel_node, "bidirectional"); + input_size_ = AnfAlgo::GetNodeAttr(kernel_node, "input_size"); + hidden_size_ = AnfAlgo::GetNodeAttr(kernel_node, "hidden_size"); + num_layers_ = AnfAlgo::GetNodeAttr(kernel_node, "num_layers"); + batch_size_ = SizeToInt(src_shape[1]); + seq_len_ = SizeToInt(src_shape[0]); + num_directions_ = 1; + if (bidirectional_) { + num_directions_ = 2; + } + int gate_size = 4 * hidden_size_; + for (int i = 0; i < num_layers_; ++i) { + weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_); + weight_h_size_ += gate_size * hidden_size_; + } + weight_size_ = weight_size_ * num_directions_; + weight_h_size_ = weight_h_size_ * num_directions_; +} + +bool LSTMGradCPUKernel::Launch(const std::vector &inputs, + const std::vector &workspace /*workspace*/, + const std::vector &outputs) { + using tag = dnnl::memory::format_tag; + using dt = dnnl::memory::data_type; + using dim = dnnl::memory::dims; + auto eng = MKLKernelEngine::Get().engine(); + dnnl::stream s(eng); + auto formatted_md = [](dim dimensions, tag layout) { return dnnl::memory::desc{{dimensions}, dt::f32, layout}; }; + auto generic_md = [](dim dimensions) { return dnnl::memory::desc{{dimensions}, dt::f32, tag::any}; }; + dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional; + if (bidirectional_) { + direction = dnnl::rnn_direction::bidirectional_concat; + } + dim src_dims = {seq_len_, batch_size_, input_size_}; + dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; + dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; + dim weights_dims = {num_layers_, num_directions_, input_size_, 4, hidden_size_}; + dim weights_h_dims = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_}; + dim bias_dims = {num_layers_, num_directions_, 4, hidden_size_}; + dim dst_dims = {seq_len_, batch_size_, hidden_size_ * num_directions_}; + dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; + dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; + + dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc); + dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc); + dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc); + dnnl::memory::desc weights_desc = formatted_md(weights_dims, tag::ldigo); + dnnl::memory::desc weights_h_desc = formatted_md(weights_h_dims, tag::ldigo); + dnnl::memory::desc bias_desc = formatted_md(bias_dims, tag::ldgo); + dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc); + dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc); + dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc); + + dnnl::lstm_forward::desc forward_desc = + dnnl::lstm_forward::desc(dnnl::prop_kind::forward_training, direction, src_desc, src_h_desc, src_c_desc, + weights_desc, weights_h_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc); + auto prim_forward_desc = dnnl::lstm_forward::primitive_desc(forward_desc, eng); + + dnnl::lstm_backward::desc backward_desc = dnnl::lstm_backward::desc( + dnnl::prop_kind::backward, direction, src_desc, src_h_desc, src_c_desc, generic_md(weights_dims), + generic_md(weights_h_dims), generic_md(bias_dims), dst_desc, dst_h_desc, dst_c_desc, src_desc, src_h_desc, + src_c_desc, weights_desc, weights_h_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc); + auto prim_backward_desc = dnnl::lstm_backward::primitive_desc(backward_desc, eng, prim_forward_desc); + // construct fw memory + auto src_memory = dnnl::memory(formatted_md(src_dims, tag::tnc), eng); + write_to_dnnl_memory(inputs[0]->addr, src_memory); + + auto src_h_memory = dnnl::memory(prim_forward_desc.src_iter_desc(), eng); + auto src_c_memory = dnnl::memory(prim_forward_desc.src_iter_c_desc(), eng); + write_to_dnnl_memory(inputs[1]->addr, src_h_memory); + write_to_dnnl_memory(inputs[2]->addr, src_c_memory); + + auto user_weights_memory = dnnl::memory(formatted_md(weights_dims, tag::ldigo), eng); + auto user_weights_h_memory = dnnl::memory(formatted_md(weights_h_dims, tag::ldigo), eng); + auto user_bias_memory = dnnl::memory(formatted_md(bias_dims, tag::ldgo), eng); + write_to_dnnl_memory(inputs[3]->addr, user_weights_memory); + write_to_dnnl_memory(reinterpret_cast(inputs[3]->addr) + weight_size_, user_weights_h_memory); + write_to_dnnl_memory(reinterpret_cast(inputs[3]->addr) + weight_size_ + weight_h_size_, user_bias_memory); + auto weights_memory = dnnl::memory(prim_backward_desc.weights_layer_desc(), eng); + auto weights_h_memory = dnnl::memory(prim_backward_desc.weights_iter_desc(), eng); + auto bias_memory = dnnl::memory(prim_forward_desc.bias_desc(), eng); + dnnl::reorder(user_weights_memory, weights_memory).execute(s, user_weights_memory, weights_memory); + dnnl::reorder(user_weights_h_memory, weights_h_memory).execute(s, user_weights_h_memory, weights_h_memory); + dnnl::reorder(user_bias_memory, bias_memory).execute(s, user_bias_memory, bias_memory); + + auto dst_memory = dnnl::memory(formatted_md(dst_dims, tag::tnc), eng); + write_to_dnnl_memory(reinterpret_cast(inputs[4]->addr), dst_memory); + auto dst_h_memory = dnnl::memory(prim_backward_desc.dst_iter_desc(), eng); + write_to_dnnl_memory(reinterpret_cast(inputs[5]->addr), dst_h_memory); + auto dst_c_memory = dnnl::memory(prim_backward_desc.dst_iter_c_desc(), eng); + write_to_dnnl_memory(reinterpret_cast(inputs[6]->addr), dst_c_memory); + auto workspace_memory = dnnl::memory(prim_forward_desc.workspace_desc(), eng); + write_to_dnnl_memory(inputs[10]->addr, workspace_memory); + + // construct diff memory + auto diff_src_memory = dnnl::memory(formatted_md(src_dims, tag::tnc), eng); + auto diff_src_h_memory = dnnl::memory(prim_backward_desc.diff_src_iter_desc(), eng); + auto diff_src_c_memory = dnnl::memory(prim_backward_desc.diff_src_iter_c_desc(), eng); + + auto diff_weights_memory = dnnl::memory(prim_backward_desc.diff_weights_layer_desc(), eng); + auto diff_weights_h_memory = dnnl::memory(prim_backward_desc.diff_weights_iter_desc(), eng); + auto diff_bias_memory = dnnl::memory(prim_backward_desc.diff_bias_desc(), eng); + auto diff_dst_memory = dnnl::memory(formatted_md(dst_dims, tag::tnc), eng); + write_to_dnnl_memory(reinterpret_cast(inputs[7]->addr), diff_dst_memory); + auto diff_dst_h_memory = dnnl::memory(prim_backward_desc.diff_dst_iter_desc(), eng); + write_to_dnnl_memory(reinterpret_cast(inputs[8]->addr), diff_dst_h_memory); + auto diff_dst_c_memory = dnnl::memory(prim_backward_desc.diff_dst_iter_c_desc(), eng); + write_to_dnnl_memory(reinterpret_cast(inputs[9]->addr), diff_dst_c_memory); + + diff_src_memory.set_data_handle(outputs[0]->addr); + diff_src_h_memory.set_data_handle(outputs[1]->addr); + diff_src_c_memory.set_data_handle(outputs[2]->addr); + diff_weights_memory.set_data_handle(outputs[3]->addr); + diff_weights_h_memory.set_data_handle(reinterpret_cast(outputs[3]->addr) + weight_size_); + diff_bias_memory.set_data_handle(reinterpret_cast(outputs[3]->addr) + weight_size_ + weight_h_size_); + dnnl::lstm_backward bwd_layer(prim_backward_desc); + bwd_layer.execute(s, {{DNNL_ARG_SRC_LAYER, src_memory}, + {DNNL_ARG_SRC_ITER, src_h_memory}, + {DNNL_ARG_SRC_ITER_C, src_c_memory}, + {DNNL_ARG_WEIGHTS_LAYER, weights_memory}, + {DNNL_ARG_WEIGHTS_ITER, weights_h_memory}, + {DNNL_ARG_BIAS, bias_memory}, + {DNNL_ARG_DST_LAYER, dst_memory}, + {DNNL_ARG_DST_ITER, dst_h_memory}, + {DNNL_ARG_DST_ITER_C, dst_c_memory}, + {DNNL_ARG_DIFF_SRC_LAYER, diff_src_memory}, + {DNNL_ARG_DIFF_SRC_ITER, diff_src_h_memory}, + {DNNL_ARG_DIFF_SRC_ITER_C, diff_src_c_memory}, + {DNNL_ARG_DIFF_WEIGHTS_LAYER, diff_weights_memory}, + {DNNL_ARG_DIFF_WEIGHTS_ITER, diff_weights_h_memory}, + {DNNL_ARG_DIFF_BIAS, diff_bias_memory}, + {DNNL_ARG_DIFF_DST_LAYER, diff_dst_memory}, + {DNNL_ARG_DIFF_DST_ITER, diff_dst_h_memory}, + {DNNL_ARG_DIFF_DST_ITER_C, diff_dst_c_memory}, + {DNNL_ARG_WORKSPACE, workspace_memory}}); + return true; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h new file mode 100644 index 00000000000..22ec1f62db7 --- /dev/null +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h @@ -0,0 +1,67 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_ + +#include +#include +#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h" + +namespace mindspore { +namespace kernel { +class LSTMGradCPUKernel : public MKLCPUKernel { + public: + LSTMGradCPUKernel() = default; + ~LSTMGradCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + private: + int weight_size_ = 0; + int weight_h_size_ = 0; + int input_size_; + int hidden_size_; + int num_layers_; + int batch_size_; + int seq_len_; + int num_directions_; + bool bidirectional_; +}; + +MS_REG_CPU_KERNEL(LSTMGrad, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + LSTMGradCPUKernel); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc index 1f7ccf9e416..17fca726984 100644 --- a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -98,5 +98,11 @@ void MKLCPUKernel::SetArgumentHandle(int arg_key, void *ptr) { } void MKLCPUKernel::ExecutePrimitive() { MKLKernelEngine::Get().Execute(primitive_, arguments_); } +void MKLCPUKernel::write_to_dnnl_memory(void *handle, const dnnl::memory &mem) { + MKLKernelEngine::Get().write_to_dnnl_memory(handle, mem); +} +void MKLCPUKernel::read_from_dnnl_memory(void *handle, const dnnl::memory &mem) { + MKLKernelEngine::Get().read_from_dnnl_memory(handle, mem); +} } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h index cd06032ff02..a6b8d686273 100644 --- a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,6 +39,8 @@ class MKLCPUKernel : public CPUKernel { dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const; dnnl::memory::desc GetDefaultMemDesc(const std::vector &shape); void ExecutePrimitive(); + void write_to_dnnl_memory(void *handle, const dnnl::memory &mem); + void read_from_dnnl_memory(void *handle, const dnnl::memory &mem); std::unordered_map arguments_; std::shared_ptr primitive_{nullptr}; }; diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc index ae4dbb26d84..f5270a4e9ad 100644 --- a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h index 36a3ceff6dc..b0eaaf405ff 100644 --- a/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,10 @@ */ #ifndef MINDSPORE_MKL_KERNEL_ENGINE_H_ #define MINDSPORE_MKL_KERNEL_ENGINE_H_ - +#include +#include +#include +#include #include #include #include @@ -39,6 +42,30 @@ class MKLKernelEngine { void Execute(const std::shared_ptr &primitive, const std::unordered_map &arguments); + inline void read_from_dnnl_memory(void *handle, const dnnl::memory &mem) { + dnnl::engine eng = mem.get_engine(); + size_t bytes = mem.get_desc().get_size(); + if (eng.get_kind() == dnnl::engine::kind::cpu) { + auto dst = reinterpret_cast(handle); + uint8_t *src = reinterpret_cast(mem.get_data_handle()); + for (size_t i = 0; i < bytes; ++i) { + dst[i] = src[i]; + } + } + } + // Read from handle, write to memory + inline void write_to_dnnl_memory(void *handle, const dnnl::memory &mem) { + dnnl::engine eng = mem.get_engine(); + size_t bytes = mem.get_desc().get_size(); + if (eng.get_kind() == dnnl::engine::kind::cpu) { + auto src = reinterpret_cast(handle); + uint8_t *dst = reinterpret_cast(mem.get_data_handle()); + for (size_t i = 0; i < bytes; ++i) { + dst[i] = src[i]; + } + } + } + private: MKLKernelEngine() : engine_(dnnl::engine::kind::cpu, 0), stream_(engine_) {} ~MKLKernelEngine() = default; diff --git a/mindspore/nn/layer/lstm.py b/mindspore/nn/layer/lstm.py index 86d0e8e44ac..6122e82aaa0 100755 --- a/mindspore/nn/layer/lstm.py +++ b/mindspore/nn/layer/lstm.py @@ -18,8 +18,13 @@ from mindspore.nn.cell import Cell from mindspore.common.parameter import Parameter from mindspore.common.initializer import initializer from mindspore._checkparam import Validator as validator +from mindspore import context +import mindspore.nn as nn +from mindspore.common.tensor import Tensor +import numpy as np + +__all__ = ['LSTM', 'LSTMCell'] -__all__ = ['LSTM'] class LSTM(Cell): r""" @@ -102,6 +107,7 @@ class LSTM(Cell): >>> c0 = Tensor(np.ones([1 * 2, 3, 12]).astype(np.float32)) >>> output, (hn, cn) = net(input, h0, c0) """ + def __init__(self, input_size, hidden_size, @@ -118,39 +124,198 @@ class LSTM(Cell): self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name) self.dropout = float(dropout) self.bidirectional = bidirectional - if self.batch_first: self.transpose1 = P.Transpose() self.transpose2 = P.Transpose() - self.lstm = P.LSTM(input_size=self.input_size, - hidden_size=self.hidden_size, - num_layers=self.num_layers, - has_bias=self.has_bias, - bidirectional=self.bidirectional, - dropout=self.dropout) - num_directions = 2 if self.bidirectional else 1 - - weight_size = 0 - gate_size = 4 * self.hidden_size - for layer in range(self.num_layers): - input_layer_size = self.input_size if layer == 0 else self.hidden_size * num_directions - increment_size = gate_size * input_layer_size - increment_size += gate_size * self.hidden_size - if self.has_bias: - increment_size += 2 * gate_size - weight_size += increment_size * num_directions - - self.weight = Parameter(initializer(0.0, [weight_size, 1, 1]), name='weight') - + self.cpu_target = False + if context.get_context("device_target") == "CPU": + self.cpu_target = True + if not self.cpu_target: + self.lstm = P.LSTM(input_size=self.input_size, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + has_bias=self.has_bias, + bidirectional=self.bidirectional, + dropout=self.dropout) + weight_size = 0 + gate_size = 4 * self.hidden_size + for layer in range(self.num_layers): + input_layer_size = self.input_size if layer == 0 else self.hidden_size * num_directions + increment_size = gate_size * input_layer_size + increment_size += gate_size * self.hidden_size + if self.has_bias: + increment_size += 2 * gate_size + weight_size += increment_size * num_directions + self.weight = Parameter(initializer(0.0, [weight_size, 1, 1]), name='weight') + else: + layer = [] + layer.append(nn.LSTMCell(input_size=self.input_size, + hidden_size=self.hidden_size, + layer_index=0, + has_bias=self.has_bias, + bidirectional=self.bidirectional, + dropout=self.dropout)) + for i in range(num_layers - 1): + layer.append(nn.LSTMCell(input_size=self.hidden_size * num_directions, + hidden_size=self.hidden_size, + layer_index=i + 1, + has_bias=self.has_bias, + bidirectional=self.bidirectional, + dropout=self.dropout)) + self.lstms = layer self.fill = P.Fill() self.shape = P.Shape() def construct(self, x, hx): if self.batch_first: x = self.transpose1(x, (1, 0, 2)) - h0, c0 = hx - output, hn, cn, _, _ = self.lstm(x, h0, c0, self.weight) + if not self.cpu_target: + h, c = hx + output, h, c, _, _ = self.lstm(x, h, c, self.weight) + if self.batch_first: + output = self.transpose2(output, (1, 0, 2)) + return (output, (h, c)) + h, c = hx + output, hn, cn, _, _ = self.lstms[0](x, h[0], c[0]) + for i in range(1, self.num_layers): + output, hn, cn, _, _ = self.lstms[i](output, h[i], c[i]) if self.batch_first: output = self.transpose2(output, (1, 0, 2)) - return (output, (hn, cn)) + return output, hn, cn, _, _ + + +class LSTMCell(Cell): + r""" + LSTM (Long Short-Term Memory) layer. + + Applies a LSTM layer to the input. + + There are two pipelines connecting two consecutive cells in a LSTM model; one is cell state pipeline + and another is hidden state pipeline. Denote two consecutive time nodes as :math:`t-1` and :math:`t`. + Given an input :math:`x_t` at time :math:`t`, an hidden state :math:`h_{t-1}` and an cell + state :math:`c_{t-1}` of the layer at time :math:`{t-1}`, the cell state and hidden state at + time :math:`t` is computed using an gating mechanism. Input gate :math:`i_t` is designed to protect the cell + from perturbation by irrelevant inputs. Forget gate :math:`f_t` affords protection of the cell by forgetting + some information in the past, which is stored in :math:`h_{t-1}`. Output gate :math:`o_t` protects other + units from perturbation by currently irrelevant memory contents. Candidate cell state :math:`\tilde{c}_t` is + calculated with the current input, on which the input gate will be applied. Finally, current cell state + :math:`c_{t}` and hidden state :math:`h_{t}` are computed with the calculated gates and cell states. The complete + formulation is as follows. + + .. math:: + \begin{array}{ll} \\ + i_t = \sigma(W_{ix} x_t + b_{ix} + W_{ih} h_{(t-1)} + b_{ih}) \\ + f_t = \sigma(W_{fx} x_t + b_{fx} + W_{fh} h_{(t-1)} + b_{fh}) \\ + \tilde{c}_t = \tanh(W_{cx} x_t + b_{cx} + W_{ch} h_{(t-1)} + b_{ch}) \\ + o_t = \sigma(W_{ox} x_t + b_{ox} + W_{oh} h_{(t-1)} + b_{oh}) \\ + c_t = f_t * c_{(t-1)} + i_t * \tilde{c}_t \\ + h_t = o_t * \tanh(c_t) \\ + \end{array} + + Here :math:`\sigma` is the sigmoid function, and :math:`*` is the Hadamard product. :math:`W, b` + are learnable weights between the output and the input in the formula. For instance, + :math:`W_{ix}, b_{ix}` are the weight and bias used to transform from input :math:`x` to :math:`i`. + Details can be found in paper `LONG SHORT-TERM MEMORY + `_ and + `Long Short-Term Memory Recurrent Neural Network Architectures for Large Scale Acoustic Modeling + `_. + + Args: + input_size (int): Number of features of input. + hidden_size (int): Number of features of hidden layer. + layer_index (int): index of current layer of stacked LSTM . Default: 0. + has_bias (bool): Specifies whether has bias `b_ih` and `b_hh`. Default: True. + batch_first (bool): Specifies whether the first dimension of input is batch_size. Default: False. + dropout (float, int): If not 0, append `Dropout` layer on the outputs of each + LSTM layer except the last layer. Default 0. The range of dropout is [0.0, 1.0]. + bidirectional (bool): Specifies whether this is a bidirectional LSTM. If set True, + number of directions will be 2 otherwise number of directions is 1. Default: False. + + Inputs: + - **input** (Tensor) - Tensor of shape (seq_len, batch_size, `input_size`). + - **h** - data type mindspore.float32 or + mindspore.float16 and shape (num_directions * `num_layers`, batch_size, `hidden_size`). + - **c** - data type mindspore.float32 or + mindspore.float16 and shape (num_directions * `num_layers`, batch_size, `hidden_size`). + Data type of `h' and 'c' should be the same of `input`. + + Outputs: + `output`, `h_n`, `c_n`, 'reserve', 'state'. + + - **output** (Tensor) - Tensor of shape (seq_len, batch_size, num_directions * `hidden_size`). + - **h** - A Tensor with shape (num_directions * `num_layers`, batch_size, `hidden_size`). + - **c** - A Tensor with shape (num_directions * `num_layers`, batch_size, `hidden_size`). + - **reserve** - reserved + - **state** - reserved + + Examples: + >>> class LstmNet(nn.Cell): + >>> def __init__(self, input_size, hidden_size, layer_index, has_bias, batch_first, bidirectional): + >>> super(LstmNet, self).__init__() + >>> self.lstm = nn.LSTMCell(input_size=input_size, + >>> hidden_size=hidden_size, + >>> layer_index=layer_index, + >>> has_bias=has_bias, + >>> batch_first=batch_first, + >>> bidirectional=bidirectional, + >>> dropout=0.0) + >>> + >>> def construct(self, inp, h0, c0): + >>> return self.lstm(inp, (h0, c0)) + >>> + >>> net = LstmNet(10, 12, 2, has_bias=True, batch_first=True, bidirectional=False) + >>> input = Tensor(np.ones([3, 5, 10]).astype(np.float32)) + >>> h0 = Tensor(np.ones([1 * 2, 3, 12]).astype(np.float32)) + >>> c0 = Tensor(np.ones([1 * 2, 3, 12]).astype(np.float32)) + >>> output, hn, cn, _, _ = net(input, h0, c0) + """ + + + def __init__(self, + input_size, + hidden_size, + layer_index=0, + has_bias=True, + batch_first=False, + dropout=0, + bidirectional=False): + super(LSTMCell, self).__init__() + self.input_size = input_size + self.hidden_size = hidden_size + self.num_layers = 1 + self.layer_index = layer_index + self.has_bias = has_bias + self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name) + self.dropout = float(dropout) + self.bidirectional = bidirectional + self.num_directions = 1 + if self.bidirectional: + self.num_directions = 2 + if self.batch_first: + self.transpose1 = P.Transpose() + self.transpose2 = P.Transpose() + w_np = np.ones([(self.input_size + self.hidden_size) * self.num_directions * self.hidden_size * 4, 1]).astype( + np.float32) * 0.01 + if has_bias: + b_np = np.ones([self.num_directions * self.hidden_size * 4, 1]).astype( + np.float32) * 0.01 + else: + b_np = np.zeros([self.num_directions * self.hidden_size * 4, 1]).astype( + np.float32) * 0.01 + wb_np = np.concatenate((w_np, b_np), axis=0).reshape([-1, 1, 1]) + self.w = Parameter(initializer(Tensor(wb_np), wb_np.shape), name='w' + str(self.layer_index)) + self.lstm = P.LSTM(input_size=self.input_size, + hidden_size=self.hidden_size, + num_layers=1, + has_bias=self.has_bias, + bidirectional=self.bidirectional, + dropout=self.dropout) + + def construct(self, x, h, c): + if self.batch_first: + x = self.transpose1(x, (1, 0, 2)) + output, hn, cn, _, _ = self.lstm(x, h, c, self.w) + if self.batch_first: + output = self.transpose2(output, (1, 0, 2)) + return output, hn, cn, _, _ diff --git a/mindspore/ops/_grad/grad_array_ops.py b/mindspore/ops/_grad/grad_array_ops.py index aacb94ac6ab..53fafbb6d5c 100644 --- a/mindspore/ops/_grad/grad_array_ops.py +++ b/mindspore/ops/_grad/grad_array_ops.py @@ -49,6 +49,7 @@ def get_bprop_dtype(self): def bprop(x, out, dout): return (zeros_like(x),) + return bprop @@ -61,6 +62,7 @@ def get_bprop_cast(self): def bprop(x, t, out, dout): dx = cast(dout, get_dtype(x)) return dx, zeros_like(t) + return bprop @@ -70,6 +72,7 @@ def get_bprop_shape(self): def bprop(x, out, dout): return (zeros_like(x),) + return bprop @@ -82,6 +85,7 @@ def get_bprop_split(self): concat_op = P.Concat(axis) dx = concat_op(dout) return (dx,) + return bprop @@ -91,6 +95,7 @@ def get_bprop_rank(self): def bprop(x, out, dout): return (zeros_like(x),) + return bprop @@ -101,6 +106,7 @@ def get_bprop_reshape(self): def bprop(x, shp, out, dout): shapex = shape_op(x) return reshape(dout, shapex), zeros_like(shp) + return bprop @@ -111,6 +117,7 @@ def get_bprop_expand_dims(self): def bprop(x, axis, out, dout): shapex = shape_op(x) return reshape(dout, shapex), zeros_like(axis) + return bprop @@ -121,6 +128,7 @@ def get_bprop_squeeze(self): def bprop(x, out, dout): shapex = shape_op(x) return (reshape(dout, shapex),) + return bprop @@ -132,6 +140,7 @@ def get_bprop_flatten(self): def bprop(x, out, dout): dx = flatten_grad(dout, shape_op(x)) return (dx,) + return bprop @@ -166,6 +175,7 @@ def _tile_shape(multiples, shapex): @bprop_getters.register(P.Tile) def get_bprop_tile(self): """Generate bprop for Tile""" + def bprop(x, multiples, out, dout): shapex = shape_op(x) r_shape = _tile_shape(multiples, shapex) @@ -174,6 +184,7 @@ def get_bprop_tile(self): dx = reduce_sum(reshape(dout, r_shape), axis) dx = reshape(dx, shapex) return dx, zeros_like(multiples) + return bprop @@ -183,6 +194,7 @@ def get_bprop_transpose(self): def bprop(x, perm, out, dout): return transpose(dout, invert_permutation(perm)), zeros_like(perm) + return bprop @@ -198,6 +210,7 @@ def get_bprop_concat(self): slice_out = P.Slice()(dout, out_offset[i], shape_op(x[i])) dx = dx + (slice_out,) return (dx,) + return bprop @@ -215,12 +228,12 @@ def get_bprop_slice(self): dx = P.Pad(_slice_grad_pad(begin, size, shape_op(x)))(dout) return (dx, zeros_like(begin), zeros_like(size)) - def bprop_gpu(x, begin, size, out, dout): + def bprop_grad(x, begin, size, out, dout): dx = dx = G.SliceGrad()(dout, x, begin, size) return (dx, zeros_like(begin), zeros_like(size)) - if context.get_context('device_target') == "GPU": - return bprop_gpu + if context.get_context('device_target') == "GPU" or context.get_context('device_target') == "CPU": + return bprop_grad return bprop @@ -249,6 +262,7 @@ def _generate_inverse_index(x_shape, axis): @bprop_getters.register(P.GatherV2) def get_bprop_gather_v2(self): """Generate bprop for GatherV2""" + def bprop(x, indices, axis, out, dout): if F.rank(dout) == 0: dout = P.ExpandDims()(dout, -1) @@ -265,6 +279,7 @@ def get_bprop_gather_v2(self): perm_2 = _generate_inverse_index(x_shp, axis) params_grad = transpose(params_grad, perm_2) return params_grad, zeros_like(indices), zeros_like(axis) + return bprop @@ -286,6 +301,7 @@ def get_bprop_pack(self): pack_grad = P.Unpack(axis) out = pack_grad(dout) return (out,) + return bprop @@ -298,6 +314,7 @@ def get_bprop_unpack(self): unpack_grad = P.Pack(axis) out = unpack_grad(dout) return (out,) + return bprop @@ -313,6 +330,7 @@ def get_bprop_strided_slice(self): def bprop(x, begin, end, strides, out, dout): dx = input_grad(dout, shape_op(x), begin, end, strides) return dx, zeros_like(begin), zeros_like(end), zeros_like(strides) + return bprop @@ -322,6 +340,7 @@ def get_bprop_eye(self): def bprop(n, m, t, out, dout): return zeros_like(n), zeros_like(m), zeros_like(t) + return bprop @@ -332,6 +351,7 @@ def get_bprop_select(self): def bprop(cond, x, y, out, dout): return zeros_like(cond), select(cond, dout, zeros_like(x)), select(cond, zeros_like(y), dout) + return bprop @@ -522,9 +542,11 @@ def get_bprop_unsorted_segment_min(self): def get_bprop_space_to_batch(self): """Generate bprop for SpaceToBatch""" space_to_batch_grad = P.BatchToSpace(self.block_size, self.paddings) + def bprop(x, out, dout): dx = space_to_batch_grad(dout) return (dx,) + return bprop @@ -532,7 +554,9 @@ def get_bprop_space_to_batch(self): def get_bprop_batch_to_space(self): """Generate bprop for BatchToSpace""" batch_to_space_grad = P.SpaceToBatch(self.block_size, self.crops) + def bprop(x, out, dout): dx = batch_to_space_grad(dout) return (dx,) + return bprop diff --git a/mindspore/ops/_grad/grad_math_ops.py b/mindspore/ops/_grad/grad_math_ops.py index f457148d513..c83d13a56d0 100755 --- a/mindspore/ops/_grad/grad_math_ops.py +++ b/mindspore/ops/_grad/grad_math_ops.py @@ -15,7 +15,6 @@ """Define the grad rules of math related operations.""" - from functools import reduce import numpy as np from .. import functional as F @@ -26,7 +25,6 @@ from ..functional import broadcast_gradient_args, reduced_shape, tuple_div from .grad_base import bprop_getters from ..primitive import constexpr - shape_op = P.Shape() reduce_sum = P.ReduceSum() reshape = P.Reshape() @@ -129,6 +127,7 @@ def bprop_matmul(self): else: dw = mul2(x, dout) return dx, dw + return bprop @@ -152,6 +151,7 @@ def bprop_batchmatmul(self): else: dw = mul2(x, dout) return dx, dw + return bprop @@ -161,6 +161,7 @@ def get_bprop_tensor_add(self): def bprop(x, y, out, dout): return binop_grad_common(x, y, dout, dout) + return bprop @@ -172,6 +173,7 @@ def get_bprop_neg(self): def bprop(x, out, dout): dx = neg_grad(dout) return (dx,) + return bprop @@ -182,6 +184,7 @@ def get_bprop_sub(self): def bprop(x, y, out, dout): return binop_grad_common(x, y, dout, neg_func(dout)) + return bprop @@ -194,6 +197,7 @@ def get_bprop_mul(self): bc_dx = mul_func(dout, y) bc_dy = mul_func(dout, x) return binop_grad_common(x, y, bc_dx, bc_dy) + return bprop @@ -208,6 +212,7 @@ def get_bprop_real_div(self): bc_x = div_op(dout, y) bc_y = neg(mul_op(bc_x, out)) return binop_grad_common(x, y, bc_x, bc_y) + return bprop @@ -222,6 +227,7 @@ def get_bprop_div(self): bc_x = div_op(dout, y) bc_y = neg(mul_op(bc_x, out)) return binop_grad_common(x, y, bc_x, bc_y) + return bprop @@ -235,6 +241,7 @@ def get_bprop_floor(self): def bprop(x, out, dout): bc_x = fill_(dtype_(x), shape_(x), 0.) return (bc_x,) + return bprop @@ -249,6 +256,7 @@ def get_bprop_floordiv(self): bc_x = div_op(dout, y) bc_y = neg(mul_op(bc_x, out)) return binop_grad_common(x, y, bc_x, bc_y) + return bprop @@ -260,6 +268,7 @@ def get_bprop_floormod(self): bc_x = dout bc_y = -dout * (x // y) return binop_grad_common(x, y, bc_x, bc_y) + return bprop @@ -274,6 +283,7 @@ def get_bprop_square(self): temp = mul_func(dout, x) dx = mul_func(fill_func(dtype(temp), shape_op(x), 2.0), temp) return (dx,) + return bprop @@ -290,6 +300,7 @@ def get_bprop_sqrt(self): temp = div_op(fill_func(dtype(x), shape_op(x), 0.5), sqrt(x)) dx = mul_func(dout, temp) return (dx,) + return bprop @@ -298,9 +309,10 @@ def get_bprop_rsqrt(self): """Grad definition for `Rsqrt` operation.""" def bprop(x, out, dout): - grad = F.fill(F.dtype(x), F.shape(x), -0.5) / (F.sqrt(x)*x) + grad = F.fill(F.dtype(x), F.shape(x), -0.5) / (F.sqrt(x) * x) dx = dout * grad return (dx,) + return bprop @@ -316,6 +328,7 @@ def get_bprop_reciprocal(self): g = neg(reciprocal(square(x))) dx = mul(dout, g) return (dx,) + return bprop @@ -328,6 +341,7 @@ def get_bprop_log(self): g = reciprocal(x) dx = g * dout return dx, 0 + return bprop @@ -341,6 +355,7 @@ def get_bprop_log1p(self): g = reciprocal(x_1p) dx = g * dout return dx, 0 + return bprop @@ -358,6 +373,7 @@ def get_bprop_erf(self): x_square = square(x) dx = dout * half_root_pi * exp(-x_square) return (dx,) + return bprop @@ -388,6 +404,7 @@ def get_bprop_pow(self): bc_dx = power * pow_op(x, power - 1.0) * dout bc_dpower = out * ln(x) * dout return binop_grad_common(x, power, bc_dx, bc_dpower) + return bprop @@ -400,6 +417,7 @@ def get_bprop_exp(self): g = exp_(x) dx = g * dout return (dx,) + return bprop @@ -411,6 +429,7 @@ def get_bprop_minimum(self): def bprop(x, y, out, dout): dx, dy = input_grad(x, y, dout) return dx, dy + return bprop @@ -422,6 +441,7 @@ def get_bprop_maximum(self): def bprop(x, y, out, dout): dx, dy = input_grad(x, y, dout) return dx, dy + return bprop @@ -432,6 +452,7 @@ def get_bprop_reducesum(self): def bprop(x, axis, out, dout): dx = _sum_grad(x, axis, dout) return dx, zeros_like(axis) + return bprop @@ -442,6 +463,7 @@ def get_bprop_cumsum(self): def bprop(x, axis, out, dout): return cumsum(dout, axis), zeros_like(axis) + return bprop @@ -500,6 +522,7 @@ def get_bprop_reduceprod(self): out = transpose(y, _invert_permutation(perm)) * grad dx = reshape(out, input_shape) return dx, zeros_like(axis) + return bprop @@ -515,6 +538,7 @@ def get_bprop_cumprod(self): prod = cumprod(x, axis) out = cumsum(prod * dout, axis) return out / x, zeros_like(axis) + return bprop @@ -524,6 +548,7 @@ def get_bprop_reduceall(self): def bprop(x, axis, out, dout): return zeros_like(x), zeros_like(axis) + return bprop @@ -534,6 +559,7 @@ def get_bprop_reducemax(self): def bprop(x, axis, out, dout): dx = _min_or_max_grad(x, axis, out, dout) return (dx, zeros_like(axis)) + return bprop @@ -547,6 +573,7 @@ def get_bprop_argmaxwithvalue(self): def bprop(x, out, dout): dx = _argmin_or_argmax_grad(x, axis, keep_dims, op, out, dout) return (dx,) + return bprop @@ -557,6 +584,7 @@ def get_bprop_reducemin(self): def bprop(x, axis, out, dout): dx = _min_or_max_grad(x, axis, out, dout) return (dx, zeros_like(axis)) + return bprop @@ -570,6 +598,7 @@ def get_bprop_argminwithvalue(self): def bprop(x, out, dout): dx = _argmin_or_argmax_grad(x, axis, keep_dims, op, out, dout) return (dx,) + return bprop @@ -585,6 +614,7 @@ def get_bprop_reduce_mean(self): div_shape = F.shape_mul(shape_op(x)) / F.shape_mul(shape_op(out)) dx = div_op(grad, cast(F.scalar_to_array(div_shape), dtype(grad))) return dx, zeros_like(axis) + return bprop @@ -604,6 +634,7 @@ def get_bprop_not_equal(self): def bprop(x, y, out, dout): return zeros_like(x), zeros_like(y) + return bprop @@ -613,6 +644,7 @@ def get_bprop_greater(self): def bprop(x, y, out, dout): return zeros_like(x), zeros_like(y) + return bprop @@ -622,6 +654,7 @@ def get_bprop_greater_equal(self): def bprop(x, y, out, dout): return zeros_like(x), zeros_like(y) + return bprop @@ -631,6 +664,7 @@ def get_bprop_less(self): def bprop(x, y, out, dout): return zeros_like(x), zeros_like(y) + return bprop @@ -640,6 +674,7 @@ def get_bprop_less_equal(self): def bprop(x, y, out, dout): return zeros_like(x), zeros_like(y) + return bprop @@ -649,6 +684,7 @@ def get_bprop_logical_not(self): def bprop(x, out, dout): return (zeros_like(x),) + return bprop @@ -658,6 +694,7 @@ def get_bprop_logical_and(self): def bprop(x, y, out, dout): return zeros_like(x), zeros_like(y) + return bprop @@ -667,6 +704,7 @@ def get_bprop_logical_or(self): def bprop(x, y, out, dout): return zeros_like(x), zeros_like(y) + return bprop @@ -676,6 +714,7 @@ def get_bprop_npu_alloc_float_status(self): def bprop(out, dout): return () + return bprop @@ -685,6 +724,7 @@ def get_bprop_npu_get_float_status(self): def bprop(x, out, dout): return (zeros_like(x),) + return bprop @@ -694,6 +734,7 @@ def get_bprop_npu_clear_float_status(self): def bprop(x, out, dout): return (zeros_like(x),) + return bprop @@ -703,6 +744,7 @@ def get_bprop_assign_add(self): def bprop(x, y, out, dout): return zeros_like(x), zeros_like(y) + return bprop @@ -712,6 +754,7 @@ def get_bprop_assign_sub(self): def bprop(x, y, out, dout): return zeros_like(x), zeros_like(y) + return bprop @@ -721,8 +764,9 @@ def get_bprop_sin(self): cos = P.Cos() def bprop(x, out, dout): - dx = dout*cos(x) + dx = dout * cos(x) return (dx,) + return bprop @@ -733,8 +777,9 @@ def get_bprop_cos(self): neg = P.Neg() def bprop(x, out, dout): - dx = dout*neg(sin(x)) + dx = dout * neg(sin(x)) return (dx,) + return bprop @@ -746,6 +791,7 @@ def get_bprop_acos(self): def bprop(x, out, dout): dx = input_grad(x, dout) return (dx,) + return bprop @@ -757,6 +803,7 @@ def get_bprop_acosh(self): def bprop(x, out, dout): dx = input_grad(out, dout) return (dx,) + return bprop @@ -768,6 +815,7 @@ def get_bprop_abs(self): def bprop(x, out, dout): dx = abs_grad(x, dout) return (dx,) + return bprop @@ -777,6 +825,7 @@ def get_bprop_scalar_cast(self): def bprop(x, t, out, dout): return F.scalar_cast(dout, F.typeof(x)), zeros_like(t) + return bprop @@ -789,6 +838,7 @@ def get_bprop_scalar_addn(self): for _ in range(len(x)): dx = dx + (dout,) return dx + return bprop @@ -798,6 +848,7 @@ def get_bprop_sign(self): def bprop(x, out, dout): return (zeros_like(x),) + return bprop @@ -807,6 +858,7 @@ def get_bprop_round(self): def bprop(x, out, dout): return (zeros_like(x),) + return bprop @@ -821,4 +873,5 @@ def get_bprop_atan2(self): bc_dx = tmp * y bc_dy = tmp * (-x) return binop_grad_common(x, y, bc_dx, bc_dy) + return bprop diff --git a/mindspore/ops/_grad/grad_nn_ops.py b/mindspore/ops/_grad/grad_nn_ops.py index b5a0fb3bd8c..aa523067d90 100755 --- a/mindspore/ops/_grad/grad_nn_ops.py +++ b/mindspore/ops/_grad/grad_nn_ops.py @@ -21,6 +21,7 @@ from ..operations import _grad_ops as G from ..operations import _inner_ops as inner from ..composite.multitype_ops.zeros_like_impl import zeros_like from .grad_base import bprop_getters +from ... import context @bprop_getters.register(P.BiasAdd) @@ -551,6 +552,14 @@ def get_bprop_lstm(self): bidirectional=self.bidirectional, dropout=self.dropout ) + lstm_grad = G.LSTMGrad( + input_size=self.input_size, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + has_bias=self.has_bias, + bidirectional=self.bidirectional, + dropout=self.dropout + ) def bprop(x, hx, cx, w, out, dout): y, _, _, reserve, state = out @@ -559,6 +568,16 @@ def get_bprop_lstm(self): dw = lstm_grad_weight(F.depend(x, dx), hx, y, reserve, state) return dx, dhx, dcx, dw + # + def bprop_cpu(x, hx, cx, w, out, dout): + y, hy, cy, reserve, _ = out + dy, dhy, dcy, _, _ = dout + dx, dhx, dcx, dw = lstm_grad(x, hx, cx, w, y, hy, cy, dy, dhy, dcy, reserve) + return dx, dhx, dcx, dw + + if context.get_context('device_target') == "CPU": + return bprop_cpu + return bprop diff --git a/mindspore/ops/operations/_grad_ops.py b/mindspore/ops/operations/_grad_ops.py index 51bbac63215..2ce50260ced 100644 --- a/mindspore/ops/operations/_grad_ops.py +++ b/mindspore/ops/operations/_grad_ops.py @@ -107,6 +107,7 @@ class BiasAddGrad(Primitive): class BinaryCrossEntropyGrad(PrimitiveWithInfer): """Computes gradients for `BinaryCrossEntropy` operation.""" + @prim_attr_register def __init__(self, reduction='mean'): self.reduction = validator.check_string('reduction', reduction, ['none', 'mean', 'sum'], self.name) @@ -665,6 +666,62 @@ class LSTMGradWeight(PrimitiveWithInfer): return hx_dtype +class LSTMGrad(PrimitiveWithInfer): + """Computes the data and weight gradients of LSTM.""" + + @prim_attr_register + def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout): + self.input_size = validator.check_integer('input_size', input_size, 0, Rel.GT, self.name) + self.hidden_size = validator.check_integer('hidden_size', hidden_size, 0, Rel.GT, self.name) + self.num_layers = validator.check_integer('num_layers', num_layers, 0, Rel.GT, self.name) + self.has_bias = validator.check_value_type('has_bias', has_bias, (bool,), self.name) + self.bidirectional = validator.check_value_type('bidirectional', bidirectional, (bool,), self.name) + self.dropout = validator.check_value_type("dropout", dropout, [float], self.name) + self.dropout = validator.check_number_range('dropout', dropout, 0, 1, Rel.INC_BOTH, self.name) + + if bidirectional: + self.num_directions = 2 + else: + self.num_directions = 1 + + def infer_shape(self, x_shape, hx_shape, cx_shape, w_shape, y_shape, hy_shape, cy_shape, dy_shape, dhy_shape, + dcy_shape, reserve_shape): + # dhy and dcy should be same shape + validator.check_integer("h_shape", len(dhy_shape), 3, Rel.EQ, self.name) + validator.check_integer("h_shape", len(dhy_shape), len(dcy_shape), Rel.EQ, self.name) + validator.check_integer("h_shape[0]", dhy_shape[0], dcy_shape[0], Rel.EQ, self.name) + validator.check_integer("h_shape[1]", dhy_shape[1], dcy_shape[1], Rel.EQ, self.name) + validator.check_integer("h_shape[2]", dhy_shape[2], dcy_shape[2], Rel.EQ, self.name) + + validator.check_integer("h_shape[0]", dhy_shape[0], self.num_layers * self.num_directions, Rel.EQ, self.name) + validator.check_integer("h_shape[2]", dhy_shape[2], self.hidden_size, Rel.EQ, self.name) + + # dy: (seq_len, batch_size, hidden_size * num_directions) + validator.check_integer("dy_shape", len(dy_shape), 3, Rel.EQ, self.name) + validator.check_integer("dy[1]", dy_shape[1], dhy_shape[1], Rel.EQ, self.name) + validator.check_integer("dy[2]", dy_shape[2], self.hidden_size * self.num_directions, Rel.EQ, self.name) + + # (seq_len, batch_size, input_size) + dx_shape = (y_shape[0], y_shape[1], self.input_size) + dhx_shape = dhy_shape + dcx_shape = dcy_shape + weight_size = 0 + gate_size = 4 * self.hidden_size + for layer in range(self.num_layers): + for _ in range(self.num_directions): + input_layer_size = self.input_size if layer == 0 else self.hidden_size * self.num_directions + weight_size += gate_size * input_layer_size + weight_size += gate_size * self.hidden_size + if self.has_bias: + weight_size += gate_size + + return (dx_shape, dhx_shape, dcx_shape, (weight_size, 1, 1)) + + def infer_dtype(self, x_dtype, hx_dtype, cx_dtype, w_dtype, y_dtype, hy_dtype, cy_dtype, dy_dtype, dhy_dtype, + dcy_dtype, reserve_dtype): + return (dy_dtype, dy_dtype, dy_dtype, hx_dtype) + + class PReLUGrad(PrimitiveWithInfer): r""" Gradients of PReLU operation. @@ -1051,6 +1108,7 @@ class RefToEmbed(Primitive): __mindspore_signature__ = ( ('variable', sig_rw.RW_REF, sig_kind.KIND_POSITIONAL_KEYWORD), ) + @prim_attr_register def __init__(self): pass diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py index a4e998589e1..b771599eda5 100644 --- a/mindspore/ops/operations/nn_ops.py +++ b/mindspore/ops/operations/nn_ops.py @@ -35,9 +35,11 @@ def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=Fals """ Checks whether an argument is a positive int or tuple with 2 or 4(when allow_four is True) positive int elements. """ + def _raise_message(): raise ValueError(f"For '{prim_name}' attr '{arg_name}' should be an positive int number or a tuple of two " f"{'or four ' if allow_four else ''}positive int numbers, but got {arg_value}") + def _get_return_value(): if isinstance(arg_value, int): ret = (1, 1, arg_value, arg_value) if ret_four else (arg_value, arg_value) @@ -50,6 +52,7 @@ def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=Fals else: _raise_message() return ret + validator.check_value_type(arg_name, arg_value, (int, tuple), prim_name) ret_value = _get_return_value() for item in ret_value: @@ -58,6 +61,7 @@ def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=Fals _raise_message() return ret_value + class Flatten(PrimitiveWithInfer): r""" Flattens a tensor without changing its batch size on the 0-th axis. @@ -205,6 +209,7 @@ class Softplus(PrimitiveWithInfer): >>> softplus(input_x) [1.3132615, 2.126928, 3.0485873, 4.01815, 5.0067153] """ + @prim_attr_register def __init__(self): """init Softplus""" @@ -301,6 +306,7 @@ class ReLUV2(PrimitiveWithInfer): ([[[[1., 0.], [0., 4.]], [[0., 6.], [7., 0.]]]], [[[[1, 0], [2, 0]], [[2, 0], [1, 0]]]]) """ + @prim_attr_register def __init__(self): """init ReLUV2""" @@ -398,6 +404,7 @@ class HSwish(PrimitiveWithInfer): >>> input_x = Tensor(np.array([-1, -2, 0, 2, 1]), mindspore.float16) >>> result = hswish(input_x) """ + @prim_attr_register def __init__(self): self.init_prim_io_names(inputs=['x'], outputs=['output']) @@ -1077,6 +1084,7 @@ class MaxPoolWithArgmax(_Pool): >>> maxpool_arg_op = P.MaxPoolWithArgmax(padding="VALID", ksize=2, strides=1) >>> output_tensor, argmax = maxpool_arg_op(input_tensor) """ + def __init__(self, ksize=1, strides=1, padding="valid"): super(MaxPoolWithArgmax, self).__init__(ksize, strides, padding) self.is_tbe = context.get_context("device_target") == "Ascend" @@ -1495,6 +1503,7 @@ class ApplyMomentum(PrimitiveWithInfer): ('gradient', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD), ('momentum', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD) ) + @prim_attr_register def __init__(self, use_nesterov=False, use_locking=False, gradient_scale=1.0): self.init_prim_io_names(inputs=['variable', 'accumulation', 'learning_rate', 'gradient', 'momentum'], @@ -1584,6 +1593,7 @@ class L2Loss(PrimitiveWithInfer): >>> l2_loss(input_x) 7.0 """ + @prim_attr_register def __init__(self): """init L2Loss""" @@ -2326,7 +2336,29 @@ class LSTM(PrimitiveWithInfer): y_shape = (x_shape[0], x_shape[1], self.hidden_size * self.num_directions) # set arbitrary shape for reserved space - reserved_shape = (1, 1) + type_size = 4 + gates_ws_ld = self.get_good_ld(self.hidden_size * 4, type_size) + states_ws_ld = self.get_good_ld(max(self.hidden_size, self.input_size), type_size) + self.ws_gates_size = self.num_layers * self.num_directions * x_shape[0] * x_shape[1] * gates_ws_ld * type_size + self.ws_states_size = (self.num_layers + 1) * self.num_directions * (x_shape[0] + 1) * x_shape[ + 1] * states_ws_ld * type_size + self.ws_c_states_size = (self.num_layers + 1) * self.num_directions * (x_shape[0] + 1) * x_shape[ + 1] * states_ws_ld * type_size + self.ws_diff_states_size = (self.num_layers + 1) * self.num_directions * (x_shape[0] + 1) * (2 + 1) * x_shape[ + 1] * states_ws_ld * type_size + self.ws_grid_comp_size = 0 + self.page_size = 4096 + current_offset = 0 + current_offset += self.ws_gates_size + current_offset = self.rnd_up(current_offset, self.page_size) + current_offset += self.ws_states_size + current_offset = self.rnd_up(current_offset, self.page_size) + current_offset += self.ws_c_states_size + current_offset = self.rnd_up(current_offset, self.page_size) + current_offset += self.ws_diff_states_size + current_offset = self.rnd_up(current_offset, self.page_size) + current_offset += self.ws_grid_comp_size + reserved_shape = (current_offset, 1) state_shape = (1, 1) return (y_shape, h_shape, c_shape, reserved_shape, state_shape) @@ -2335,6 +2367,15 @@ class LSTM(PrimitiveWithInfer): validator.check_tensor_type_same(args, (mstype.float32, mstype.float16), self.name) return (x_dtype, x_dtype, x_dtype, x_dtype, x_dtype) + def rnd_up(self, current_offset, page_size): + return ((current_offset + page_size - 1) // page_size) * page_size + + def get_good_ld(self, dim, type_size): + ld = self.rnd_up(dim, 64 // type_size) + if ld * 256 == 0: + return ld + 64 // type_size + return ld + class SigmoidCrossEntropyWithLogits(PrimitiveWithInfer): r""" @@ -3000,6 +3041,7 @@ class Dropout(PrimitiveWithInfer): >>> in = Tensor((20, 16, 50, 50)) >>> out = dropout(in) """ + @prim_attr_register def __init__(self, drop_prob=0): self.drop_prob = validator.check_number_range("drop_prob", drop_prob, 0, 1, Rel.INC_BOTH, self.name) @@ -3034,6 +3076,7 @@ class DropoutGrad(PrimitiveWithInfer): >>> in = Tensor((20, 16, 50, 50)) >>> out = dropout_grad(in) """ + @prim_attr_register def __init__(self, drop_prob=0): self.drop_prob = validator.check_number_range("drop_prob", drop_prob, 0, 1, Rel.INC_BOTH, self.name) @@ -3084,6 +3127,7 @@ class CTCLoss(PrimitiveWithInfer): >>> ctc_loss = P.CTCloss() >>> output = ctc_loss(inputs, labels_indices, labels_values, sequence_length) """ + @prim_attr_register def __init__(self, preprocess_collapse_repeated=False, ctc_merge_repeated=False, ignore_longer_outputs_than_inputs=False): diff --git a/tests/st/ops/cpu/test_lstm_op.py b/tests/st/ops/cpu/test_lstm_op.py new file mode 100644 index 00000000000..2115e46a16d --- /dev/null +++ b/tests/st/ops/cpu/test_lstm_op.py @@ -0,0 +1,335 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import pytest +import mindspore.nn as nn +from mindspore.common.api import ms_function +import numpy as np +import mindspore.context as context +from mindspore.common.initializer import initializer +from mindspore.ops import composite as C +from mindspore.ops import operations as P +from mindspore.common.tensor import Tensor +from mindspore.common.parameter import ParameterTuple, Parameter + +context.set_context(device_target='CPU') + + +class LstmNet(nn.Cell): + def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout): + super(LstmNet, self).__init__() + + num_directions = 1 + if bidirectional: + num_directions = 2 + + self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout) + input_np = np.array([[[0.6755, -1.6607, 0.1367], [0.4276, -0.7850, -0.3758]], + [[-0.6424, -0.6095, 0.6639], [0.7918, 0.4147, -0.5089]], + [[-1.5612, 0.0120, -0.7289], [-0.6656, -0.6626, -0.5883]], + [[-0.9667, -0.6296, -0.7310], [0.1026, -0.6821, -0.4387]], + [[-0.4710, 0.6558, -0.3144], [-0.8449, -0.2184, -0.1806]] + ]).astype(np.float32) + self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x') + + self.h = Parameter(initializer( + Tensor( + np.array([0.1, 0.1, 0.1, 0.1]).reshape((num_layers * num_directions, batch_size, hidden_size)).astype( + np.float32)), + [num_layers * num_directions, batch_size, hidden_size]), name='h') + + self.c = Parameter(initializer( + Tensor( + np.array([0.2, 0.2, 0.2, 0.2]).reshape((num_layers * num_directions, batch_size, hidden_size)).astype( + np.float32)), + [num_layers * num_directions, batch_size, hidden_size]), name='c') + + wih = np.array([[3.4021e-01, -4.6622e-01, 4.5117e-01], + [-6.4257e-02, -2.4807e-01, 1.3550e-02], # i + [-3.2140e-01, 5.5578e-01, 6.3589e-01], + [1.6547e-01, -7.9030e-02, -2.0045e-01], + [-6.9863e-01, 5.9773e-01, -3.9062e-01], + [-3.0253e-01, -1.9464e-01, 7.0591e-01], + [-4.0835e-01, 3.6751e-01, 4.7989e-01], + [-5.6894e-01, -5.0359e-01, 4.7491e-01]]).astype(np.float32) # .reshape([1,-1]) + whh = np.array([[-0.4820, -0.2350], + [-0.1195, 0.0519], + [0.2162, -0.1178], + [0.6237, 0.0711], + [0.4511, -0.3961], + [-0.5962, 0.0906], + [0.1867, -0.1225], + [0.1831, 0.0850]]).astype(np.float32) # .reshape([1,-1]) + wih = wih.transpose((1, 0)) + whh = whh.transpose((1, 0)) + bih = np.zeros((1, 8)).astype(np.float32) + w_np = np.concatenate((wih, whh, bih), axis=0).reshape([-1, 1, 1]) + self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w') + + @ms_function + def construct(self): + return self.lstm(self.x, self.h, self.c, self.w) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_lstm(): + seq_len = 5 + batch_size = 2 + input_size = 3 + hidden_size = 2 + num_layers = 1 + has_bias = True + bidirectional = False + dropout = 0.0 + num_directions = 1 + if bidirectional: + num_directions = 2 + net = LstmNet(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout) + y, h, c, _, _ = net() + print(y) + print(c) + print(h) + expect_y = np.array([[[-0.16709016, 0.13125697], + [-0.08438572, -0.01969833]], + [[-0.2746155, 0.32764038], + [-0.06504016, -0.07770399]], + [[-0.00140004, 0.17706314], + [0.03244496, -0.10135599]], + [[0.08328028, 0.06437367], + [-0.04133911, -0.11072896]], + [[0.19004421, -0.02852732], + [0.09138509, -0.00344161]]] + ) + error = np.ones([num_layers, batch_size, hidden_size]) * 1.0e-4 + diff = y.asnumpy() - expect_y + assert np.all(diff < error) + assert np.all(-diff < error) + # + expect_h = np.array([[[0.19004421, -0.02852732], + [0.09138509, -0.00344161]]]) + + error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4 + diff = h.asnumpy() - expect_h + assert np.all(diff < error) + assert np.all(-diff < error) + # + expect_c = np.array([[[0.34533143, -0.06313794], + [0.169008, -0.00555446]]]) + error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4 + diff = c.asnumpy() - expect_c + assert np.all(diff < error) + assert np.all(-diff < error) + + +class MultiLayerBiLstmNet(nn.Cell): + def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout): + super(MultiLayerBiLstmNet, self).__init__() + + num_directions = 1 + if bidirectional: + num_directions = 2 + + self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias, + bidirectional=bidirectional, dropout=dropout) + + input_np = np.array([[[-0.1887, -0.4144, -0.0235, 0.7489, 0.7522, 0.5969, 0.3342, 1.2198, 0.6786, -0.9404], + [-0.8643, -1.6835, -2.4965, 2.8093, 0.1741, 0.2707, 0.7387, -0.0939, -1.7990, 0.4765]], + + [[-0.5963, -1.2598, -0.7226, 1.1365, -1.7320, -0.7302, 0.1221, -0.2111, -1.6173, -0.0706], + [0.8964, 0.1737, -1.0077, -0.1389, 0.4889, 0.4391, 0.7911, 0.3614, -1.9533, -0.9936]], + + [[0.3260, -1.3312, 0.0601, 1.0726, -1.6010, -1.8733, -1.5775, 1.1579, -0.8801, -0.5742], + [-2.2998, -0.6344, -0.5409, -0.9221, -0.6500, 0.1206, 1.5215, 0.7517, 1.3691, 2.0021]], + + [[-0.1245, -0.3690, 2.1193, 1.3852, -0.1841, -0.8899, -0.3646, -0.8575, -0.3131, 0.2026], + [1.0218, -1.4331, 0.1744, 0.5442, -0.7808, 0.2527, 0.1566, 1.1484, -0.7766, -0.6747]], + + [[-0.6752, 0.9906, -0.4973, 0.3471, -0.1202, -0.4213, 2.0213, 0.0441, 0.9016, 1.0365], + [1.2223, -1.3248, 0.1207, -0.8256, 0.1816, 0.7057, -0.3105, 0.5713, 0.2804, + -1.0685]]]).astype(np.float32) + + self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x') + + self.h0 = Parameter(initializer( + Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)), + [num_directions, batch_size, hidden_size]), name='h0') + self.c0 = Parameter(initializer( + Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)), + [num_directions, batch_size, hidden_size]), name='c0') + self.h1 = Parameter(initializer( + Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)), + [num_directions, batch_size, hidden_size]), name='h1') + self.c1 = Parameter(initializer( + Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)), + [num_directions, batch_size, hidden_size]), name='c1') + self.h = ParameterTuple((self.h0, self.h1)) + self.c = ParameterTuple((self.c0, self.c1)) + + @ms_function + def construct(self): + return self.lstm(self.x, (self.h, self.c)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_multi_layer_bilstm(): + seq_len = 5 + batch_size = 2 + input_size = 10 + hidden_size = 2 + num_layers = 2 + has_bias = True + bidirectional = True + dropout = 0.0 + + num_directions = 1 + if bidirectional: + num_directions = 2 + + net = MultiLayerBiLstmNet(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, + dropout) + y, h, c, _, _ = net() + print(y) + print(h) + print(c) + + +class Grad(nn.Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.network = network + self.weights = ParameterTuple(network.trainable_params()) + self.grad = C.GradOperation('grad', + get_by_list=True, + sens_param=True) + + @ms_function + def construct(self, output_grad): + weights = self.weights + grads = self.grad(self.network, weights)(output_grad) + return grads + + +class Net(nn.Cell): + def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout): + super(Net, self).__init__() + + num_directions = 1 + if bidirectional: + num_directions = 2 + input_np = np.array([[[-0.5907, 1.0557, 1.7283, 0.6706, -1.2550, -0.5298, -0.2290, -0.6735, 0.8555, 1.4836], + [-1.7070, -0.5347, -0.9105, -0.2598, 0.0588, 1.5496, 1.0757, 0.3760, -1.2020, -0.2868]], + + [[0.0151, 0.2126, 0.8090, -0.5292, -2.5590, 0.4279, -0.3081, -1.4706, -0.0498, 1.2301], + [0.4165, -0.5391, -0.0996, 0.1928, -0.4909, -0.1255, 0.4444, -1.3687, 1.3096, 0.6553]], + + [[-0.7802, -0.2083, -0.6388, 1.3757, 0.4293, 0.5363, 0.3202, -0.6687, -1.3864, -0.2953], + [1.0799, -0.7204, 0.1130, -0.5857, -0.4855, -1.1068, 1.0126, 0.8716, 1.5460, -0.7392]], + + [[2.2645, -0.6586, -0.2227, 1.4290, -0.5006, -1.6576, -0.1793, 0.5319, 0.1360, 0.2707], + [-0.4071, 0.1575, 1.4199, -0.9156, 0.1855, 0.4947, 1.0460, -0.6365, 0.1191, -0.6374]], + + [[0.2468, 1.0815, -0.4893, 0.0664, 0.6405, -2.2967, 0.7612, 0.8759, 0.5685, -1.0999], + [-0.7272, -1.7750, -0.1164, -0.7159, 0.0061, -0.7839, -1.8329, 0.3434, -0.5634, + 0.5384]]]).astype(np.float32) + + self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x') + + self.h0 = Parameter(initializer( + Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)), + [num_directions, batch_size, hidden_size]), name='h0') + + self.c0 = Parameter(initializer( + Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)), + [num_directions, batch_size, hidden_size]), name='c0') + + wih_l0 = np.array([[0.2300, 0.6668, 0.4703, 0.0425, 0.0464, 0.6825, 0.2249, -0.4315, -0.2449, 0.2964], + [-0.2811, -0.3444, 0.2557, -0.5137, -0.5518, 0.1652, -0.6720, 0.1066, 0.3586, 0.6299], + [0.5728, -0.1784, 0.5661, 0.4012, 0.3856, -0.1899, 0.3102, 0.3717, -0.5651, 0.1952], + [0.1026, -0.0527, 0.1198, -0.3080, 0.2292, 0.5757, -0.3567, -0.2731, -0.0586, -0.2849], + [0.2194, -0.1622, 0.3219, -0.3008, -0.3713, -0.3034, -0.2385, 0.0412, -0.5205, 0.0280], + [-0.5499, -0.0733, -0.5236, -0.6753, -0.7045, -0.1839, -0.1037, -0.5026, -0.4055, -0.3416], + [0.1573, -0.1301, -0.2882, -0.3464, 0.6643, 0.1980, -0.6804, 0.5359, 0.5996, 0.0124], + [-0.6436, 0.0587, -0.6520, -0.0471, 0.1667, 0.6042, 0.5752, -0.6296, -0.2976, + -0.3757]]).astype(np.float32).reshape([1, -1]) + + whh_l0 = np.array([[0.3358, 0.2790], + [-0.5355, 0.0989], + [-0.1402, 0.5120], + [0.1335, 0.1653], + [0.3533, -0.3531], + [0.4166, -0.4420], + [-0.5454, -0.1720], + [0.0041, -0.0799]]).astype(np.float32).reshape([1, -1]) + + bih_l0 = np.array([0.5518, 0.1083, 0.4829, 0.0607, -0.1770, -0.6944, 0.3059, 0.5354]).astype( + np.float32).reshape([1, -1]) + bhh_l0 = np.array([0.5025, -0.1261, -0.5405, 0.3220, -0.3441, 0.6488, -0.0284, -0.2334]).astype( + np.float32).reshape([1, -1]) + + w0_np = np.concatenate( + (wih_l0, whh_l0, bih_l0 + bhh_l0), + axis=1).reshape([-1, 1, 1]) + self.w0 = Parameter(initializer(Tensor(w0_np), w0_np.shape), name='w0') + self.lstm = P.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, + has_bias=has_bias, bidirectional=bidirectional, dropout=dropout) + + @ms_function + def construct(self): + return self.lstm(self.x, self.h0, self.c0, self.w0)[0] + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_grad(): + seq_len = 5 + batch_size = 2 + input_size = 10 + hidden_size = 2 + num_layers = 1 + has_bias = True + bidirectional = False + dropout = 0.0 + num_directions = 1 + if bidirectional: + num_directions = 2 + net = Grad(Net(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)) + dy = np.array([[[-3.5471e-01, 7.0540e-01], + [2.7161e-01, 1.0865e+00]], + + [[-4.2431e-01, 1.4955e+00], + [-4.0418e-01, -2.3282e-01]], + + [[-1.3654e+00, 1.9251e+00], + [-4.6481e-01, 1.3138e+00]], + + [[1.2914e+00, -2.3753e-01], + [5.3589e-01, -1.0981e-01]], + + [[-1.6032e+00, -1.8818e-01], + [1.0065e-01, 9.2045e-01]]]).astype(np.float32) + dx, dhx, dcx, dw = net(Tensor(dy)) + print(dx) + print(dhx) + print(dcx) + print(dw) + +# test_multi_layer_bilstm() +# test_lstm() +# tf_lstm_test() +# test_grad()