forked from mindspore-Ecosystem/mindspore
commit
14f9a6e31c
|
@ -0,0 +1,120 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/mkldnn/lstm_cpu_kernel.h"
|
||||
#include <string>
|
||||
#include "common/utils.h"
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
void LstmCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional");
|
||||
input_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "input_size");
|
||||
hidden_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "hidden_size");
|
||||
num_layers_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "num_layers");
|
||||
batch_size_ = SizeToInt(src_shape[1]);
|
||||
seq_len_ = SizeToInt(src_shape[0]);
|
||||
num_directions_ = 1;
|
||||
if (bidirectional_) {
|
||||
num_directions_ = 2;
|
||||
}
|
||||
int gate_size = 4 * hidden_size_;
|
||||
for (int i = 0; i < num_layers_; ++i) {
|
||||
weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_);
|
||||
weight_h_size_ += gate_size * hidden_size_;
|
||||
}
|
||||
weight_size_ = weight_size_ * num_directions_;
|
||||
weight_h_size_ = weight_h_size_ * num_directions_;
|
||||
}
|
||||
|
||||
bool LstmCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> & /*workspace*/,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
using dt = dnnl::memory::data_type;
|
||||
using tag = dnnl::memory::format_tag;
|
||||
using dim = dnnl::memory::dims;
|
||||
auto eng = MKLKernelEngine::Get().engine();
|
||||
dnnl::stream s(eng);
|
||||
auto formatted_md = [](dim dimensions, tag layout) { return dnnl::memory::desc{{dimensions}, dt::f32, layout}; };
|
||||
dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional;
|
||||
if (bidirectional_) {
|
||||
direction = dnnl::rnn_direction::bidirectional_concat;
|
||||
}
|
||||
|
||||
dim src_dims = {seq_len_, batch_size_, input_size_};
|
||||
dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
|
||||
dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
|
||||
dim weights_dims = {num_layers_, num_directions_, input_size_, 4, hidden_size_};
|
||||
dim weights_h_dims = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_};
|
||||
dim bias_dims = {num_layers_, num_directions_, 4, hidden_size_};
|
||||
dim dst_dims = {seq_len_, batch_size_, hidden_size_ * num_directions_};
|
||||
dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
|
||||
dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
|
||||
dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc);
|
||||
dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc);
|
||||
dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc);
|
||||
dnnl::memory::desc weights_desc = formatted_md(weights_dims, tag::ldigo);
|
||||
dnnl::memory::desc weights_h_desc = formatted_md(weights_h_dims, tag::ldigo);
|
||||
dnnl::memory::desc bias_desc = formatted_md(bias_dims, tag::ldgo);
|
||||
dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc);
|
||||
dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc);
|
||||
dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc);
|
||||
dnnl::lstm_forward::desc desc =
|
||||
dnnl::lstm_forward::desc(dnnl::prop_kind::forward_training, direction, src_desc, src_h_desc, src_c_desc,
|
||||
weights_desc, weights_h_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc);
|
||||
auto prim_desc = dnnl::lstm_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
|
||||
auto workspace_memory = dnnl::memory(prim_desc.workspace_desc(), eng);
|
||||
auto src_memory = dnnl::memory(formatted_md(src_dims, tag::tnc), eng);
|
||||
write_to_dnnl_memory(inputs[0]->addr, src_memory);
|
||||
|
||||
auto src_h_memory = dnnl::memory(prim_desc.src_iter_desc(), eng);
|
||||
auto src_c_memory = dnnl::memory(prim_desc.src_iter_c_desc(), eng);
|
||||
write_to_dnnl_memory(inputs[1]->addr, src_h_memory);
|
||||
write_to_dnnl_memory(inputs[2]->addr, src_c_memory);
|
||||
|
||||
auto weights_memory = dnnl::memory(formatted_md(weights_dims, tag::ldigo), eng);
|
||||
auto weights_h_memory = dnnl::memory(formatted_md(weights_h_dims, tag::ldigo), eng);
|
||||
auto bias_memory = dnnl::memory(formatted_md(bias_dims, tag::ldgo), eng);
|
||||
write_to_dnnl_memory(inputs[3]->addr, weights_memory);
|
||||
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_, weights_h_memory);
|
||||
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_ + weight_h_size_, bias_memory);
|
||||
|
||||
auto dst_memory = dnnl::memory(formatted_md(dst_dims, tag::tnc), eng);
|
||||
auto dst_h_memory = dnnl::memory(prim_desc.dst_iter_desc(), eng);
|
||||
auto dst_c_memory = dnnl::memory(prim_desc.dst_iter_c_desc(), eng);
|
||||
dnnl::lstm_forward fw_layer(prim_desc);
|
||||
workspace_memory.set_data_handle(outputs[3]->addr);
|
||||
dst_memory.set_data_handle(outputs[0]->addr);
|
||||
dst_h_memory.set_data_handle(outputs[1]->addr);
|
||||
dst_c_memory.set_data_handle(outputs[2]->addr);
|
||||
fw_layer.execute(s, {{DNNL_ARG_SRC_LAYER, src_memory},
|
||||
{DNNL_ARG_SRC_ITER, src_h_memory},
|
||||
{DNNL_ARG_SRC_ITER_C, src_c_memory},
|
||||
{DNNL_ARG_WEIGHTS_LAYER, weights_memory},
|
||||
{DNNL_ARG_WEIGHTS_ITER, weights_h_memory},
|
||||
{DNNL_ARG_BIAS, bias_memory},
|
||||
{DNNL_ARG_DST_LAYER, dst_memory},
|
||||
{DNNL_ARG_DST_ITER, dst_h_memory},
|
||||
{DNNL_ARG_DST_ITER_C, dst_c_memory},
|
||||
{DNNL_ARG_WORKSPACE, workspace_memory}});
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,59 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H
|
||||
#define MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class LstmCPUKernel : public MKLCPUKernel {
|
||||
public:
|
||||
LstmCPUKernel() = default;
|
||||
~LstmCPUKernel() override = default;
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
private:
|
||||
int weight_size_ = 0;
|
||||
int weight_h_size_ = 0;
|
||||
int input_size_;
|
||||
int hidden_size_;
|
||||
int num_layers_;
|
||||
int batch_size_;
|
||||
int seq_len_;
|
||||
int num_directions_;
|
||||
bool bidirectional_;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(LSTM,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
LstmCPUKernel);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H
|
|
@ -0,0 +1,169 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h"
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include "common/utils.h"
|
||||
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
||||
void LSTMGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional");
|
||||
input_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "input_size");
|
||||
hidden_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "hidden_size");
|
||||
num_layers_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "num_layers");
|
||||
batch_size_ = SizeToInt(src_shape[1]);
|
||||
seq_len_ = SizeToInt(src_shape[0]);
|
||||
num_directions_ = 1;
|
||||
if (bidirectional_) {
|
||||
num_directions_ = 2;
|
||||
}
|
||||
int gate_size = 4 * hidden_size_;
|
||||
for (int i = 0; i < num_layers_; ++i) {
|
||||
weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_);
|
||||
weight_h_size_ += gate_size * hidden_size_;
|
||||
}
|
||||
weight_size_ = weight_size_ * num_directions_;
|
||||
weight_h_size_ = weight_h_size_ * num_directions_;
|
||||
}
|
||||
|
||||
bool LSTMGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &workspace /*workspace*/,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
using tag = dnnl::memory::format_tag;
|
||||
using dt = dnnl::memory::data_type;
|
||||
using dim = dnnl::memory::dims;
|
||||
auto eng = MKLKernelEngine::Get().engine();
|
||||
dnnl::stream s(eng);
|
||||
auto formatted_md = [](dim dimensions, tag layout) { return dnnl::memory::desc{{dimensions}, dt::f32, layout}; };
|
||||
auto generic_md = [](dim dimensions) { return dnnl::memory::desc{{dimensions}, dt::f32, tag::any}; };
|
||||
dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional;
|
||||
if (bidirectional_) {
|
||||
direction = dnnl::rnn_direction::bidirectional_concat;
|
||||
}
|
||||
dim src_dims = {seq_len_, batch_size_, input_size_};
|
||||
dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
|
||||
dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
|
||||
dim weights_dims = {num_layers_, num_directions_, input_size_, 4, hidden_size_};
|
||||
dim weights_h_dims = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_};
|
||||
dim bias_dims = {num_layers_, num_directions_, 4, hidden_size_};
|
||||
dim dst_dims = {seq_len_, batch_size_, hidden_size_ * num_directions_};
|
||||
dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
|
||||
dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
|
||||
|
||||
dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc);
|
||||
dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc);
|
||||
dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc);
|
||||
dnnl::memory::desc weights_desc = formatted_md(weights_dims, tag::ldigo);
|
||||
dnnl::memory::desc weights_h_desc = formatted_md(weights_h_dims, tag::ldigo);
|
||||
dnnl::memory::desc bias_desc = formatted_md(bias_dims, tag::ldgo);
|
||||
dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc);
|
||||
dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc);
|
||||
dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc);
|
||||
|
||||
dnnl::lstm_forward::desc forward_desc =
|
||||
dnnl::lstm_forward::desc(dnnl::prop_kind::forward_training, direction, src_desc, src_h_desc, src_c_desc,
|
||||
weights_desc, weights_h_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc);
|
||||
auto prim_forward_desc = dnnl::lstm_forward::primitive_desc(forward_desc, eng);
|
||||
|
||||
dnnl::lstm_backward::desc backward_desc = dnnl::lstm_backward::desc(
|
||||
dnnl::prop_kind::backward, direction, src_desc, src_h_desc, src_c_desc, generic_md(weights_dims),
|
||||
generic_md(weights_h_dims), generic_md(bias_dims), dst_desc, dst_h_desc, dst_c_desc, src_desc, src_h_desc,
|
||||
src_c_desc, weights_desc, weights_h_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc);
|
||||
auto prim_backward_desc = dnnl::lstm_backward::primitive_desc(backward_desc, eng, prim_forward_desc);
|
||||
// construct fw memory
|
||||
auto src_memory = dnnl::memory(formatted_md(src_dims, tag::tnc), eng);
|
||||
write_to_dnnl_memory(inputs[0]->addr, src_memory);
|
||||
|
||||
auto src_h_memory = dnnl::memory(prim_forward_desc.src_iter_desc(), eng);
|
||||
auto src_c_memory = dnnl::memory(prim_forward_desc.src_iter_c_desc(), eng);
|
||||
write_to_dnnl_memory(inputs[1]->addr, src_h_memory);
|
||||
write_to_dnnl_memory(inputs[2]->addr, src_c_memory);
|
||||
|
||||
auto user_weights_memory = dnnl::memory(formatted_md(weights_dims, tag::ldigo), eng);
|
||||
auto user_weights_h_memory = dnnl::memory(formatted_md(weights_h_dims, tag::ldigo), eng);
|
||||
auto user_bias_memory = dnnl::memory(formatted_md(bias_dims, tag::ldgo), eng);
|
||||
write_to_dnnl_memory(inputs[3]->addr, user_weights_memory);
|
||||
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_, user_weights_h_memory);
|
||||
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_ + weight_h_size_, user_bias_memory);
|
||||
auto weights_memory = dnnl::memory(prim_backward_desc.weights_layer_desc(), eng);
|
||||
auto weights_h_memory = dnnl::memory(prim_backward_desc.weights_iter_desc(), eng);
|
||||
auto bias_memory = dnnl::memory(prim_forward_desc.bias_desc(), eng);
|
||||
dnnl::reorder(user_weights_memory, weights_memory).execute(s, user_weights_memory, weights_memory);
|
||||
dnnl::reorder(user_weights_h_memory, weights_h_memory).execute(s, user_weights_h_memory, weights_h_memory);
|
||||
dnnl::reorder(user_bias_memory, bias_memory).execute(s, user_bias_memory, bias_memory);
|
||||
|
||||
auto dst_memory = dnnl::memory(formatted_md(dst_dims, tag::tnc), eng);
|
||||
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[4]->addr), dst_memory);
|
||||
auto dst_h_memory = dnnl::memory(prim_backward_desc.dst_iter_desc(), eng);
|
||||
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[5]->addr), dst_h_memory);
|
||||
auto dst_c_memory = dnnl::memory(prim_backward_desc.dst_iter_c_desc(), eng);
|
||||
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[6]->addr), dst_c_memory);
|
||||
auto workspace_memory = dnnl::memory(prim_forward_desc.workspace_desc(), eng);
|
||||
write_to_dnnl_memory(inputs[10]->addr, workspace_memory);
|
||||
|
||||
// construct diff memory
|
||||
auto diff_src_memory = dnnl::memory(formatted_md(src_dims, tag::tnc), eng);
|
||||
auto diff_src_h_memory = dnnl::memory(prim_backward_desc.diff_src_iter_desc(), eng);
|
||||
auto diff_src_c_memory = dnnl::memory(prim_backward_desc.diff_src_iter_c_desc(), eng);
|
||||
|
||||
auto diff_weights_memory = dnnl::memory(prim_backward_desc.diff_weights_layer_desc(), eng);
|
||||
auto diff_weights_h_memory = dnnl::memory(prim_backward_desc.diff_weights_iter_desc(), eng);
|
||||
auto diff_bias_memory = dnnl::memory(prim_backward_desc.diff_bias_desc(), eng);
|
||||
auto diff_dst_memory = dnnl::memory(formatted_md(dst_dims, tag::tnc), eng);
|
||||
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[7]->addr), diff_dst_memory);
|
||||
auto diff_dst_h_memory = dnnl::memory(prim_backward_desc.diff_dst_iter_desc(), eng);
|
||||
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[8]->addr), diff_dst_h_memory);
|
||||
auto diff_dst_c_memory = dnnl::memory(prim_backward_desc.diff_dst_iter_c_desc(), eng);
|
||||
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[9]->addr), diff_dst_c_memory);
|
||||
|
||||
diff_src_memory.set_data_handle(outputs[0]->addr);
|
||||
diff_src_h_memory.set_data_handle(outputs[1]->addr);
|
||||
diff_src_c_memory.set_data_handle(outputs[2]->addr);
|
||||
diff_weights_memory.set_data_handle(outputs[3]->addr);
|
||||
diff_weights_h_memory.set_data_handle(reinterpret_cast<float *>(outputs[3]->addr) + weight_size_);
|
||||
diff_bias_memory.set_data_handle(reinterpret_cast<float *>(outputs[3]->addr) + weight_size_ + weight_h_size_);
|
||||
dnnl::lstm_backward bwd_layer(prim_backward_desc);
|
||||
bwd_layer.execute(s, {{DNNL_ARG_SRC_LAYER, src_memory},
|
||||
{DNNL_ARG_SRC_ITER, src_h_memory},
|
||||
{DNNL_ARG_SRC_ITER_C, src_c_memory},
|
||||
{DNNL_ARG_WEIGHTS_LAYER, weights_memory},
|
||||
{DNNL_ARG_WEIGHTS_ITER, weights_h_memory},
|
||||
{DNNL_ARG_BIAS, bias_memory},
|
||||
{DNNL_ARG_DST_LAYER, dst_memory},
|
||||
{DNNL_ARG_DST_ITER, dst_h_memory},
|
||||
{DNNL_ARG_DST_ITER_C, dst_c_memory},
|
||||
{DNNL_ARG_DIFF_SRC_LAYER, diff_src_memory},
|
||||
{DNNL_ARG_DIFF_SRC_ITER, diff_src_h_memory},
|
||||
{DNNL_ARG_DIFF_SRC_ITER_C, diff_src_c_memory},
|
||||
{DNNL_ARG_DIFF_WEIGHTS_LAYER, diff_weights_memory},
|
||||
{DNNL_ARG_DIFF_WEIGHTS_ITER, diff_weights_h_memory},
|
||||
{DNNL_ARG_DIFF_BIAS, diff_bias_memory},
|
||||
{DNNL_ARG_DIFF_DST_LAYER, diff_dst_memory},
|
||||
{DNNL_ARG_DIFF_DST_ITER, diff_dst_h_memory},
|
||||
{DNNL_ARG_DIFF_DST_ITER_C, diff_dst_c_memory},
|
||||
{DNNL_ARG_WORKSPACE, workspace_memory}});
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,67 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class LSTMGradCPUKernel : public MKLCPUKernel {
|
||||
public:
|
||||
LSTMGradCPUKernel() = default;
|
||||
~LSTMGradCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
private:
|
||||
int weight_size_ = 0;
|
||||
int weight_h_size_ = 0;
|
||||
int input_size_;
|
||||
int hidden_size_;
|
||||
int num_layers_;
|
||||
int batch_size_;
|
||||
int seq_len_;
|
||||
int num_directions_;
|
||||
bool bidirectional_;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(LSTMGrad,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
LSTMGradCPUKernel);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -98,5 +98,11 @@ void MKLCPUKernel::SetArgumentHandle(int arg_key, void *ptr) {
|
|||
}
|
||||
|
||||
void MKLCPUKernel::ExecutePrimitive() { MKLKernelEngine::Get().Execute(primitive_, arguments_); }
|
||||
void MKLCPUKernel::write_to_dnnl_memory(void *handle, const dnnl::memory &mem) {
|
||||
MKLKernelEngine::Get().write_to_dnnl_memory(handle, mem);
|
||||
}
|
||||
void MKLCPUKernel::read_from_dnnl_memory(void *handle, const dnnl::memory &mem) {
|
||||
MKLKernelEngine::Get().read_from_dnnl_memory(handle, mem);
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -39,6 +39,8 @@ class MKLCPUKernel : public CPUKernel {
|
|||
dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const;
|
||||
dnnl::memory::desc GetDefaultMemDesc(const std::vector<size_t> &shape);
|
||||
void ExecutePrimitive();
|
||||
void write_to_dnnl_memory(void *handle, const dnnl::memory &mem);
|
||||
void read_from_dnnl_memory(void *handle, const dnnl::memory &mem);
|
||||
std::unordered_map<int, dnnl::memory> arguments_;
|
||||
std::shared_ptr<dnnl::primitive> primitive_{nullptr};
|
||||
};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -15,7 +15,10 @@
|
|||
*/
|
||||
#ifndef MINDSPORE_MKL_KERNEL_ENGINE_H_
|
||||
#define MINDSPORE_MKL_KERNEL_ENGINE_H_
|
||||
|
||||
#include <cstdlib>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
@ -39,6 +42,30 @@ class MKLKernelEngine {
|
|||
void Execute(const std::shared_ptr<dnnl::primitive> &primitive,
|
||||
const std::unordered_map<int, dnnl::memory> &arguments);
|
||||
|
||||
inline void read_from_dnnl_memory(void *handle, const dnnl::memory &mem) {
|
||||
dnnl::engine eng = mem.get_engine();
|
||||
size_t bytes = mem.get_desc().get_size();
|
||||
if (eng.get_kind() == dnnl::engine::kind::cpu) {
|
||||
auto dst = reinterpret_cast<uint8_t *>(handle);
|
||||
uint8_t *src = reinterpret_cast<uint8_t *>(mem.get_data_handle());
|
||||
for (size_t i = 0; i < bytes; ++i) {
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
// Read from handle, write to memory
|
||||
inline void write_to_dnnl_memory(void *handle, const dnnl::memory &mem) {
|
||||
dnnl::engine eng = mem.get_engine();
|
||||
size_t bytes = mem.get_desc().get_size();
|
||||
if (eng.get_kind() == dnnl::engine::kind::cpu) {
|
||||
auto src = reinterpret_cast<uint8_t *>(handle);
|
||||
uint8_t *dst = reinterpret_cast<uint8_t *>(mem.get_data_handle());
|
||||
for (size_t i = 0; i < bytes; ++i) {
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
MKLKernelEngine() : engine_(dnnl::engine::kind::cpu, 0), stream_(engine_) {}
|
||||
~MKLKernelEngine() = default;
|
||||
|
|
|
@ -18,8 +18,13 @@ from mindspore.nn.cell import Cell
|
|||
from mindspore.common.parameter import Parameter
|
||||
from mindspore.common.initializer import initializer
|
||||
from mindspore._checkparam import Validator as validator
|
||||
from mindspore import context
|
||||
import mindspore.nn as nn
|
||||
from mindspore.common.tensor import Tensor
|
||||
import numpy as np
|
||||
|
||||
__all__ = ['LSTM', 'LSTMCell']
|
||||
|
||||
__all__ = ['LSTM']
|
||||
|
||||
class LSTM(Cell):
|
||||
r"""
|
||||
|
@ -102,6 +107,7 @@ class LSTM(Cell):
|
|||
>>> c0 = Tensor(np.ones([1 * 2, 3, 12]).astype(np.float32))
|
||||
>>> output, (hn, cn) = net(input, h0, c0)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
input_size,
|
||||
hidden_size,
|
||||
|
@ -118,19 +124,20 @@ class LSTM(Cell):
|
|||
self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name)
|
||||
self.dropout = float(dropout)
|
||||
self.bidirectional = bidirectional
|
||||
|
||||
if self.batch_first:
|
||||
self.transpose1 = P.Transpose()
|
||||
self.transpose2 = P.Transpose()
|
||||
num_directions = 2 if self.bidirectional else 1
|
||||
self.cpu_target = False
|
||||
if context.get_context("device_target") == "CPU":
|
||||
self.cpu_target = True
|
||||
if not self.cpu_target:
|
||||
self.lstm = P.LSTM(input_size=self.input_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_layers=self.num_layers,
|
||||
has_bias=self.has_bias,
|
||||
bidirectional=self.bidirectional,
|
||||
dropout=self.dropout)
|
||||
|
||||
num_directions = 2 if self.bidirectional else 1
|
||||
|
||||
weight_size = 0
|
||||
gate_size = 4 * self.hidden_size
|
||||
for layer in range(self.num_layers):
|
||||
|
@ -140,17 +147,175 @@ class LSTM(Cell):
|
|||
if self.has_bias:
|
||||
increment_size += 2 * gate_size
|
||||
weight_size += increment_size * num_directions
|
||||
|
||||
self.weight = Parameter(initializer(0.0, [weight_size, 1, 1]), name='weight')
|
||||
|
||||
else:
|
||||
layer = []
|
||||
layer.append(nn.LSTMCell(input_size=self.input_size,
|
||||
hidden_size=self.hidden_size,
|
||||
layer_index=0,
|
||||
has_bias=self.has_bias,
|
||||
bidirectional=self.bidirectional,
|
||||
dropout=self.dropout))
|
||||
for i in range(num_layers - 1):
|
||||
layer.append(nn.LSTMCell(input_size=self.hidden_size * num_directions,
|
||||
hidden_size=self.hidden_size,
|
||||
layer_index=i + 1,
|
||||
has_bias=self.has_bias,
|
||||
bidirectional=self.bidirectional,
|
||||
dropout=self.dropout))
|
||||
self.lstms = layer
|
||||
self.fill = P.Fill()
|
||||
self.shape = P.Shape()
|
||||
|
||||
def construct(self, x, hx):
|
||||
if self.batch_first:
|
||||
x = self.transpose1(x, (1, 0, 2))
|
||||
h0, c0 = hx
|
||||
output, hn, cn, _, _ = self.lstm(x, h0, c0, self.weight)
|
||||
if not self.cpu_target:
|
||||
h, c = hx
|
||||
output, h, c, _, _ = self.lstm(x, h, c, self.weight)
|
||||
if self.batch_first:
|
||||
output = self.transpose2(output, (1, 0, 2))
|
||||
return (output, (hn, cn))
|
||||
return (output, (h, c))
|
||||
h, c = hx
|
||||
output, hn, cn, _, _ = self.lstms[0](x, h[0], c[0])
|
||||
for i in range(1, self.num_layers):
|
||||
output, hn, cn, _, _ = self.lstms[i](output, h[i], c[i])
|
||||
if self.batch_first:
|
||||
output = self.transpose2(output, (1, 0, 2))
|
||||
return output, hn, cn, _, _
|
||||
|
||||
|
||||
class LSTMCell(Cell):
|
||||
r"""
|
||||
LSTM (Long Short-Term Memory) layer.
|
||||
|
||||
Applies a LSTM layer to the input.
|
||||
|
||||
There are two pipelines connecting two consecutive cells in a LSTM model; one is cell state pipeline
|
||||
and another is hidden state pipeline. Denote two consecutive time nodes as :math:`t-1` and :math:`t`.
|
||||
Given an input :math:`x_t` at time :math:`t`, an hidden state :math:`h_{t-1}` and an cell
|
||||
state :math:`c_{t-1}` of the layer at time :math:`{t-1}`, the cell state and hidden state at
|
||||
time :math:`t` is computed using an gating mechanism. Input gate :math:`i_t` is designed to protect the cell
|
||||
from perturbation by irrelevant inputs. Forget gate :math:`f_t` affords protection of the cell by forgetting
|
||||
some information in the past, which is stored in :math:`h_{t-1}`. Output gate :math:`o_t` protects other
|
||||
units from perturbation by currently irrelevant memory contents. Candidate cell state :math:`\tilde{c}_t` is
|
||||
calculated with the current input, on which the input gate will be applied. Finally, current cell state
|
||||
:math:`c_{t}` and hidden state :math:`h_{t}` are computed with the calculated gates and cell states. The complete
|
||||
formulation is as follows.
|
||||
|
||||
.. math::
|
||||
\begin{array}{ll} \\
|
||||
i_t = \sigma(W_{ix} x_t + b_{ix} + W_{ih} h_{(t-1)} + b_{ih}) \\
|
||||
f_t = \sigma(W_{fx} x_t + b_{fx} + W_{fh} h_{(t-1)} + b_{fh}) \\
|
||||
\tilde{c}_t = \tanh(W_{cx} x_t + b_{cx} + W_{ch} h_{(t-1)} + b_{ch}) \\
|
||||
o_t = \sigma(W_{ox} x_t + b_{ox} + W_{oh} h_{(t-1)} + b_{oh}) \\
|
||||
c_t = f_t * c_{(t-1)} + i_t * \tilde{c}_t \\
|
||||
h_t = o_t * \tanh(c_t) \\
|
||||
\end{array}
|
||||
|
||||
Here :math:`\sigma` is the sigmoid function, and :math:`*` is the Hadamard product. :math:`W, b`
|
||||
are learnable weights between the output and the input in the formula. For instance,
|
||||
:math:`W_{ix}, b_{ix}` are the weight and bias used to transform from input :math:`x` to :math:`i`.
|
||||
Details can be found in paper `LONG SHORT-TERM MEMORY
|
||||
<https://www.bioinf.jku.at/publications/older/2604.pdf>`_ and
|
||||
`Long Short-Term Memory Recurrent Neural Network Architectures for Large Scale Acoustic Modeling
|
||||
<https://static.googleusercontent.com/media/research.google.com/zh-CN//pubs/archive/43905.pdf>`_.
|
||||
|
||||
Args:
|
||||
input_size (int): Number of features of input.
|
||||
hidden_size (int): Number of features of hidden layer.
|
||||
layer_index (int): index of current layer of stacked LSTM . Default: 0.
|
||||
has_bias (bool): Specifies whether has bias `b_ih` and `b_hh`. Default: True.
|
||||
batch_first (bool): Specifies whether the first dimension of input is batch_size. Default: False.
|
||||
dropout (float, int): If not 0, append `Dropout` layer on the outputs of each
|
||||
LSTM layer except the last layer. Default 0. The range of dropout is [0.0, 1.0].
|
||||
bidirectional (bool): Specifies whether this is a bidirectional LSTM. If set True,
|
||||
number of directions will be 2 otherwise number of directions is 1. Default: False.
|
||||
|
||||
Inputs:
|
||||
- **input** (Tensor) - Tensor of shape (seq_len, batch_size, `input_size`).
|
||||
- **h** - data type mindspore.float32 or
|
||||
mindspore.float16 and shape (num_directions * `num_layers`, batch_size, `hidden_size`).
|
||||
- **c** - data type mindspore.float32 or
|
||||
mindspore.float16 and shape (num_directions * `num_layers`, batch_size, `hidden_size`).
|
||||
Data type of `h' and 'c' should be the same of `input`.
|
||||
|
||||
Outputs:
|
||||
`output`, `h_n`, `c_n`, 'reserve', 'state'.
|
||||
|
||||
- **output** (Tensor) - Tensor of shape (seq_len, batch_size, num_directions * `hidden_size`).
|
||||
- **h** - A Tensor with shape (num_directions * `num_layers`, batch_size, `hidden_size`).
|
||||
- **c** - A Tensor with shape (num_directions * `num_layers`, batch_size, `hidden_size`).
|
||||
- **reserve** - reserved
|
||||
- **state** - reserved
|
||||
|
||||
Examples:
|
||||
>>> class LstmNet(nn.Cell):
|
||||
>>> def __init__(self, input_size, hidden_size, layer_index, has_bias, batch_first, bidirectional):
|
||||
>>> super(LstmNet, self).__init__()
|
||||
>>> self.lstm = nn.LSTMCell(input_size=input_size,
|
||||
>>> hidden_size=hidden_size,
|
||||
>>> layer_index=layer_index,
|
||||
>>> has_bias=has_bias,
|
||||
>>> batch_first=batch_first,
|
||||
>>> bidirectional=bidirectional,
|
||||
>>> dropout=0.0)
|
||||
>>>
|
||||
>>> def construct(self, inp, h0, c0):
|
||||
>>> return self.lstm(inp, (h0, c0))
|
||||
>>>
|
||||
>>> net = LstmNet(10, 12, 2, has_bias=True, batch_first=True, bidirectional=False)
|
||||
>>> input = Tensor(np.ones([3, 5, 10]).astype(np.float32))
|
||||
>>> h0 = Tensor(np.ones([1 * 2, 3, 12]).astype(np.float32))
|
||||
>>> c0 = Tensor(np.ones([1 * 2, 3, 12]).astype(np.float32))
|
||||
>>> output, hn, cn, _, _ = net(input, h0, c0)
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self,
|
||||
input_size,
|
||||
hidden_size,
|
||||
layer_index=0,
|
||||
has_bias=True,
|
||||
batch_first=False,
|
||||
dropout=0,
|
||||
bidirectional=False):
|
||||
super(LSTMCell, self).__init__()
|
||||
self.input_size = input_size
|
||||
self.hidden_size = hidden_size
|
||||
self.num_layers = 1
|
||||
self.layer_index = layer_index
|
||||
self.has_bias = has_bias
|
||||
self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name)
|
||||
self.dropout = float(dropout)
|
||||
self.bidirectional = bidirectional
|
||||
self.num_directions = 1
|
||||
if self.bidirectional:
|
||||
self.num_directions = 2
|
||||
if self.batch_first:
|
||||
self.transpose1 = P.Transpose()
|
||||
self.transpose2 = P.Transpose()
|
||||
w_np = np.ones([(self.input_size + self.hidden_size) * self.num_directions * self.hidden_size * 4, 1]).astype(
|
||||
np.float32) * 0.01
|
||||
if has_bias:
|
||||
b_np = np.ones([self.num_directions * self.hidden_size * 4, 1]).astype(
|
||||
np.float32) * 0.01
|
||||
else:
|
||||
b_np = np.zeros([self.num_directions * self.hidden_size * 4, 1]).astype(
|
||||
np.float32) * 0.01
|
||||
wb_np = np.concatenate((w_np, b_np), axis=0).reshape([-1, 1, 1])
|
||||
self.w = Parameter(initializer(Tensor(wb_np), wb_np.shape), name='w' + str(self.layer_index))
|
||||
self.lstm = P.LSTM(input_size=self.input_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_layers=1,
|
||||
has_bias=self.has_bias,
|
||||
bidirectional=self.bidirectional,
|
||||
dropout=self.dropout)
|
||||
|
||||
def construct(self, x, h, c):
|
||||
if self.batch_first:
|
||||
x = self.transpose1(x, (1, 0, 2))
|
||||
output, hn, cn, _, _ = self.lstm(x, h, c, self.w)
|
||||
if self.batch_first:
|
||||
output = self.transpose2(output, (1, 0, 2))
|
||||
return output, hn, cn, _, _
|
||||
|
|
|
@ -49,6 +49,7 @@ def get_bprop_dtype(self):
|
|||
|
||||
def bprop(x, out, dout):
|
||||
return (zeros_like(x),)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -61,6 +62,7 @@ def get_bprop_cast(self):
|
|||
def bprop(x, t, out, dout):
|
||||
dx = cast(dout, get_dtype(x))
|
||||
return dx, zeros_like(t)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -70,6 +72,7 @@ def get_bprop_shape(self):
|
|||
|
||||
def bprop(x, out, dout):
|
||||
return (zeros_like(x),)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -82,6 +85,7 @@ def get_bprop_split(self):
|
|||
concat_op = P.Concat(axis)
|
||||
dx = concat_op(dout)
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -91,6 +95,7 @@ def get_bprop_rank(self):
|
|||
|
||||
def bprop(x, out, dout):
|
||||
return (zeros_like(x),)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -101,6 +106,7 @@ def get_bprop_reshape(self):
|
|||
def bprop(x, shp, out, dout):
|
||||
shapex = shape_op(x)
|
||||
return reshape(dout, shapex), zeros_like(shp)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -111,6 +117,7 @@ def get_bprop_expand_dims(self):
|
|||
def bprop(x, axis, out, dout):
|
||||
shapex = shape_op(x)
|
||||
return reshape(dout, shapex), zeros_like(axis)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -121,6 +128,7 @@ def get_bprop_squeeze(self):
|
|||
def bprop(x, out, dout):
|
||||
shapex = shape_op(x)
|
||||
return (reshape(dout, shapex),)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -132,6 +140,7 @@ def get_bprop_flatten(self):
|
|||
def bprop(x, out, dout):
|
||||
dx = flatten_grad(dout, shape_op(x))
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -166,6 +175,7 @@ def _tile_shape(multiples, shapex):
|
|||
@bprop_getters.register(P.Tile)
|
||||
def get_bprop_tile(self):
|
||||
"""Generate bprop for Tile"""
|
||||
|
||||
def bprop(x, multiples, out, dout):
|
||||
shapex = shape_op(x)
|
||||
r_shape = _tile_shape(multiples, shapex)
|
||||
|
@ -174,6 +184,7 @@ def get_bprop_tile(self):
|
|||
dx = reduce_sum(reshape(dout, r_shape), axis)
|
||||
dx = reshape(dx, shapex)
|
||||
return dx, zeros_like(multiples)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -183,6 +194,7 @@ def get_bprop_transpose(self):
|
|||
|
||||
def bprop(x, perm, out, dout):
|
||||
return transpose(dout, invert_permutation(perm)), zeros_like(perm)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -198,6 +210,7 @@ def get_bprop_concat(self):
|
|||
slice_out = P.Slice()(dout, out_offset[i], shape_op(x[i]))
|
||||
dx = dx + (slice_out,)
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -215,12 +228,12 @@ def get_bprop_slice(self):
|
|||
dx = P.Pad(_slice_grad_pad(begin, size, shape_op(x)))(dout)
|
||||
return (dx, zeros_like(begin), zeros_like(size))
|
||||
|
||||
def bprop_gpu(x, begin, size, out, dout):
|
||||
def bprop_grad(x, begin, size, out, dout):
|
||||
dx = dx = G.SliceGrad()(dout, x, begin, size)
|
||||
return (dx, zeros_like(begin), zeros_like(size))
|
||||
|
||||
if context.get_context('device_target') == "GPU":
|
||||
return bprop_gpu
|
||||
if context.get_context('device_target') == "GPU" or context.get_context('device_target') == "CPU":
|
||||
return bprop_grad
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -249,6 +262,7 @@ def _generate_inverse_index(x_shape, axis):
|
|||
@bprop_getters.register(P.GatherV2)
|
||||
def get_bprop_gather_v2(self):
|
||||
"""Generate bprop for GatherV2"""
|
||||
|
||||
def bprop(x, indices, axis, out, dout):
|
||||
if F.rank(dout) == 0:
|
||||
dout = P.ExpandDims()(dout, -1)
|
||||
|
@ -265,6 +279,7 @@ def get_bprop_gather_v2(self):
|
|||
perm_2 = _generate_inverse_index(x_shp, axis)
|
||||
params_grad = transpose(params_grad, perm_2)
|
||||
return params_grad, zeros_like(indices), zeros_like(axis)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -286,6 +301,7 @@ def get_bprop_pack(self):
|
|||
pack_grad = P.Unpack(axis)
|
||||
out = pack_grad(dout)
|
||||
return (out,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -298,6 +314,7 @@ def get_bprop_unpack(self):
|
|||
unpack_grad = P.Pack(axis)
|
||||
out = unpack_grad(dout)
|
||||
return (out,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -313,6 +330,7 @@ def get_bprop_strided_slice(self):
|
|||
def bprop(x, begin, end, strides, out, dout):
|
||||
dx = input_grad(dout, shape_op(x), begin, end, strides)
|
||||
return dx, zeros_like(begin), zeros_like(end), zeros_like(strides)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -322,6 +340,7 @@ def get_bprop_eye(self):
|
|||
|
||||
def bprop(n, m, t, out, dout):
|
||||
return zeros_like(n), zeros_like(m), zeros_like(t)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -332,6 +351,7 @@ def get_bprop_select(self):
|
|||
|
||||
def bprop(cond, x, y, out, dout):
|
||||
return zeros_like(cond), select(cond, dout, zeros_like(x)), select(cond, zeros_like(y), dout)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -522,9 +542,11 @@ def get_bprop_unsorted_segment_min(self):
|
|||
def get_bprop_space_to_batch(self):
|
||||
"""Generate bprop for SpaceToBatch"""
|
||||
space_to_batch_grad = P.BatchToSpace(self.block_size, self.paddings)
|
||||
|
||||
def bprop(x, out, dout):
|
||||
dx = space_to_batch_grad(dout)
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -532,9 +554,11 @@ def get_bprop_space_to_batch(self):
|
|||
def get_bprop_batch_to_space(self):
|
||||
"""Generate bprop for BatchToSpace"""
|
||||
batch_to_space_grad = P.SpaceToBatch(self.block_size, self.crops)
|
||||
|
||||
def bprop(x, out, dout):
|
||||
dx = batch_to_space_grad(dout)
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
|
||||
"""Define the grad rules of math related operations."""
|
||||
|
||||
|
||||
from functools import reduce
|
||||
import numpy as np
|
||||
from .. import functional as F
|
||||
|
@ -26,7 +25,6 @@ from ..functional import broadcast_gradient_args, reduced_shape, tuple_div
|
|||
from .grad_base import bprop_getters
|
||||
from ..primitive import constexpr
|
||||
|
||||
|
||||
shape_op = P.Shape()
|
||||
reduce_sum = P.ReduceSum()
|
||||
reshape = P.Reshape()
|
||||
|
@ -129,6 +127,7 @@ def bprop_matmul(self):
|
|||
else:
|
||||
dw = mul2(x, dout)
|
||||
return dx, dw
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -152,6 +151,7 @@ def bprop_batchmatmul(self):
|
|||
else:
|
||||
dw = mul2(x, dout)
|
||||
return dx, dw
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -161,6 +161,7 @@ def get_bprop_tensor_add(self):
|
|||
|
||||
def bprop(x, y, out, dout):
|
||||
return binop_grad_common(x, y, dout, dout)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -172,6 +173,7 @@ def get_bprop_neg(self):
|
|||
def bprop(x, out, dout):
|
||||
dx = neg_grad(dout)
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -182,6 +184,7 @@ def get_bprop_sub(self):
|
|||
|
||||
def bprop(x, y, out, dout):
|
||||
return binop_grad_common(x, y, dout, neg_func(dout))
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -194,6 +197,7 @@ def get_bprop_mul(self):
|
|||
bc_dx = mul_func(dout, y)
|
||||
bc_dy = mul_func(dout, x)
|
||||
return binop_grad_common(x, y, bc_dx, bc_dy)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -208,6 +212,7 @@ def get_bprop_real_div(self):
|
|||
bc_x = div_op(dout, y)
|
||||
bc_y = neg(mul_op(bc_x, out))
|
||||
return binop_grad_common(x, y, bc_x, bc_y)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -222,6 +227,7 @@ def get_bprop_div(self):
|
|||
bc_x = div_op(dout, y)
|
||||
bc_y = neg(mul_op(bc_x, out))
|
||||
return binop_grad_common(x, y, bc_x, bc_y)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -235,6 +241,7 @@ def get_bprop_floor(self):
|
|||
def bprop(x, out, dout):
|
||||
bc_x = fill_(dtype_(x), shape_(x), 0.)
|
||||
return (bc_x,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -249,6 +256,7 @@ def get_bprop_floordiv(self):
|
|||
bc_x = div_op(dout, y)
|
||||
bc_y = neg(mul_op(bc_x, out))
|
||||
return binop_grad_common(x, y, bc_x, bc_y)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -260,6 +268,7 @@ def get_bprop_floormod(self):
|
|||
bc_x = dout
|
||||
bc_y = -dout * (x // y)
|
||||
return binop_grad_common(x, y, bc_x, bc_y)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -274,6 +283,7 @@ def get_bprop_square(self):
|
|||
temp = mul_func(dout, x)
|
||||
dx = mul_func(fill_func(dtype(temp), shape_op(x), 2.0), temp)
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -290,6 +300,7 @@ def get_bprop_sqrt(self):
|
|||
temp = div_op(fill_func(dtype(x), shape_op(x), 0.5), sqrt(x))
|
||||
dx = mul_func(dout, temp)
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -298,9 +309,10 @@ def get_bprop_rsqrt(self):
|
|||
"""Grad definition for `Rsqrt` operation."""
|
||||
|
||||
def bprop(x, out, dout):
|
||||
grad = F.fill(F.dtype(x), F.shape(x), -0.5) / (F.sqrt(x)*x)
|
||||
grad = F.fill(F.dtype(x), F.shape(x), -0.5) / (F.sqrt(x) * x)
|
||||
dx = dout * grad
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -316,6 +328,7 @@ def get_bprop_reciprocal(self):
|
|||
g = neg(reciprocal(square(x)))
|
||||
dx = mul(dout, g)
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -328,6 +341,7 @@ def get_bprop_log(self):
|
|||
g = reciprocal(x)
|
||||
dx = g * dout
|
||||
return dx, 0
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -341,6 +355,7 @@ def get_bprop_log1p(self):
|
|||
g = reciprocal(x_1p)
|
||||
dx = g * dout
|
||||
return dx, 0
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -358,6 +373,7 @@ def get_bprop_erf(self):
|
|||
x_square = square(x)
|
||||
dx = dout * half_root_pi * exp(-x_square)
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -388,6 +404,7 @@ def get_bprop_pow(self):
|
|||
bc_dx = power * pow_op(x, power - 1.0) * dout
|
||||
bc_dpower = out * ln(x) * dout
|
||||
return binop_grad_common(x, power, bc_dx, bc_dpower)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -400,6 +417,7 @@ def get_bprop_exp(self):
|
|||
g = exp_(x)
|
||||
dx = g * dout
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -411,6 +429,7 @@ def get_bprop_minimum(self):
|
|||
def bprop(x, y, out, dout):
|
||||
dx, dy = input_grad(x, y, dout)
|
||||
return dx, dy
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -422,6 +441,7 @@ def get_bprop_maximum(self):
|
|||
def bprop(x, y, out, dout):
|
||||
dx, dy = input_grad(x, y, dout)
|
||||
return dx, dy
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -432,6 +452,7 @@ def get_bprop_reducesum(self):
|
|||
def bprop(x, axis, out, dout):
|
||||
dx = _sum_grad(x, axis, dout)
|
||||
return dx, zeros_like(axis)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -442,6 +463,7 @@ def get_bprop_cumsum(self):
|
|||
|
||||
def bprop(x, axis, out, dout):
|
||||
return cumsum(dout, axis), zeros_like(axis)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -500,6 +522,7 @@ def get_bprop_reduceprod(self):
|
|||
out = transpose(y, _invert_permutation(perm)) * grad
|
||||
dx = reshape(out, input_shape)
|
||||
return dx, zeros_like(axis)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -515,6 +538,7 @@ def get_bprop_cumprod(self):
|
|||
prod = cumprod(x, axis)
|
||||
out = cumsum(prod * dout, axis)
|
||||
return out / x, zeros_like(axis)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -524,6 +548,7 @@ def get_bprop_reduceall(self):
|
|||
|
||||
def bprop(x, axis, out, dout):
|
||||
return zeros_like(x), zeros_like(axis)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -534,6 +559,7 @@ def get_bprop_reducemax(self):
|
|||
def bprop(x, axis, out, dout):
|
||||
dx = _min_or_max_grad(x, axis, out, dout)
|
||||
return (dx, zeros_like(axis))
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -547,6 +573,7 @@ def get_bprop_argmaxwithvalue(self):
|
|||
def bprop(x, out, dout):
|
||||
dx = _argmin_or_argmax_grad(x, axis, keep_dims, op, out, dout)
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -557,6 +584,7 @@ def get_bprop_reducemin(self):
|
|||
def bprop(x, axis, out, dout):
|
||||
dx = _min_or_max_grad(x, axis, out, dout)
|
||||
return (dx, zeros_like(axis))
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -570,6 +598,7 @@ def get_bprop_argminwithvalue(self):
|
|||
def bprop(x, out, dout):
|
||||
dx = _argmin_or_argmax_grad(x, axis, keep_dims, op, out, dout)
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -585,6 +614,7 @@ def get_bprop_reduce_mean(self):
|
|||
div_shape = F.shape_mul(shape_op(x)) / F.shape_mul(shape_op(out))
|
||||
dx = div_op(grad, cast(F.scalar_to_array(div_shape), dtype(grad)))
|
||||
return dx, zeros_like(axis)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -604,6 +634,7 @@ def get_bprop_not_equal(self):
|
|||
|
||||
def bprop(x, y, out, dout):
|
||||
return zeros_like(x), zeros_like(y)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -613,6 +644,7 @@ def get_bprop_greater(self):
|
|||
|
||||
def bprop(x, y, out, dout):
|
||||
return zeros_like(x), zeros_like(y)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -622,6 +654,7 @@ def get_bprop_greater_equal(self):
|
|||
|
||||
def bprop(x, y, out, dout):
|
||||
return zeros_like(x), zeros_like(y)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -631,6 +664,7 @@ def get_bprop_less(self):
|
|||
|
||||
def bprop(x, y, out, dout):
|
||||
return zeros_like(x), zeros_like(y)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -640,6 +674,7 @@ def get_bprop_less_equal(self):
|
|||
|
||||
def bprop(x, y, out, dout):
|
||||
return zeros_like(x), zeros_like(y)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -649,6 +684,7 @@ def get_bprop_logical_not(self):
|
|||
|
||||
def bprop(x, out, dout):
|
||||
return (zeros_like(x),)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -658,6 +694,7 @@ def get_bprop_logical_and(self):
|
|||
|
||||
def bprop(x, y, out, dout):
|
||||
return zeros_like(x), zeros_like(y)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -667,6 +704,7 @@ def get_bprop_logical_or(self):
|
|||
|
||||
def bprop(x, y, out, dout):
|
||||
return zeros_like(x), zeros_like(y)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -676,6 +714,7 @@ def get_bprop_npu_alloc_float_status(self):
|
|||
|
||||
def bprop(out, dout):
|
||||
return ()
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -685,6 +724,7 @@ def get_bprop_npu_get_float_status(self):
|
|||
|
||||
def bprop(x, out, dout):
|
||||
return (zeros_like(x),)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -694,6 +734,7 @@ def get_bprop_npu_clear_float_status(self):
|
|||
|
||||
def bprop(x, out, dout):
|
||||
return (zeros_like(x),)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -703,6 +744,7 @@ def get_bprop_assign_add(self):
|
|||
|
||||
def bprop(x, y, out, dout):
|
||||
return zeros_like(x), zeros_like(y)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -712,6 +754,7 @@ def get_bprop_assign_sub(self):
|
|||
|
||||
def bprop(x, y, out, dout):
|
||||
return zeros_like(x), zeros_like(y)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -721,8 +764,9 @@ def get_bprop_sin(self):
|
|||
cos = P.Cos()
|
||||
|
||||
def bprop(x, out, dout):
|
||||
dx = dout*cos(x)
|
||||
dx = dout * cos(x)
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -733,8 +777,9 @@ def get_bprop_cos(self):
|
|||
neg = P.Neg()
|
||||
|
||||
def bprop(x, out, dout):
|
||||
dx = dout*neg(sin(x))
|
||||
dx = dout * neg(sin(x))
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -746,6 +791,7 @@ def get_bprop_acos(self):
|
|||
def bprop(x, out, dout):
|
||||
dx = input_grad(x, dout)
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -757,6 +803,7 @@ def get_bprop_acosh(self):
|
|||
def bprop(x, out, dout):
|
||||
dx = input_grad(out, dout)
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -768,6 +815,7 @@ def get_bprop_abs(self):
|
|||
def bprop(x, out, dout):
|
||||
dx = abs_grad(x, dout)
|
||||
return (dx,)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -777,6 +825,7 @@ def get_bprop_scalar_cast(self):
|
|||
|
||||
def bprop(x, t, out, dout):
|
||||
return F.scalar_cast(dout, F.typeof(x)), zeros_like(t)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -789,6 +838,7 @@ def get_bprop_scalar_addn(self):
|
|||
for _ in range(len(x)):
|
||||
dx = dx + (dout,)
|
||||
return dx
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -798,6 +848,7 @@ def get_bprop_sign(self):
|
|||
|
||||
def bprop(x, out, dout):
|
||||
return (zeros_like(x),)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -807,6 +858,7 @@ def get_bprop_round(self):
|
|||
|
||||
def bprop(x, out, dout):
|
||||
return (zeros_like(x),)
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
@ -821,4 +873,5 @@ def get_bprop_atan2(self):
|
|||
bc_dx = tmp * y
|
||||
bc_dy = tmp * (-x)
|
||||
return binop_grad_common(x, y, bc_dx, bc_dy)
|
||||
|
||||
return bprop
|
||||
|
|
|
@ -21,6 +21,7 @@ from ..operations import _grad_ops as G
|
|||
from ..operations import _inner_ops as inner
|
||||
from ..composite.multitype_ops.zeros_like_impl import zeros_like
|
||||
from .grad_base import bprop_getters
|
||||
from ... import context
|
||||
|
||||
|
||||
@bprop_getters.register(P.BiasAdd)
|
||||
|
@ -551,6 +552,14 @@ def get_bprop_lstm(self):
|
|||
bidirectional=self.bidirectional,
|
||||
dropout=self.dropout
|
||||
)
|
||||
lstm_grad = G.LSTMGrad(
|
||||
input_size=self.input_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_layers=self.num_layers,
|
||||
has_bias=self.has_bias,
|
||||
bidirectional=self.bidirectional,
|
||||
dropout=self.dropout
|
||||
)
|
||||
|
||||
def bprop(x, hx, cx, w, out, dout):
|
||||
y, _, _, reserve, state = out
|
||||
|
@ -559,6 +568,16 @@ def get_bprop_lstm(self):
|
|||
dw = lstm_grad_weight(F.depend(x, dx), hx, y, reserve, state)
|
||||
return dx, dhx, dcx, dw
|
||||
|
||||
#
|
||||
def bprop_cpu(x, hx, cx, w, out, dout):
|
||||
y, hy, cy, reserve, _ = out
|
||||
dy, dhy, dcy, _, _ = dout
|
||||
dx, dhx, dcx, dw = lstm_grad(x, hx, cx, w, y, hy, cy, dy, dhy, dcy, reserve)
|
||||
return dx, dhx, dcx, dw
|
||||
|
||||
if context.get_context('device_target') == "CPU":
|
||||
return bprop_cpu
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
|
|
|
@ -107,6 +107,7 @@ class BiasAddGrad(Primitive):
|
|||
|
||||
class BinaryCrossEntropyGrad(PrimitiveWithInfer):
|
||||
"""Computes gradients for `BinaryCrossEntropy` operation."""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self, reduction='mean'):
|
||||
self.reduction = validator.check_string('reduction', reduction, ['none', 'mean', 'sum'], self.name)
|
||||
|
@ -665,6 +666,62 @@ class LSTMGradWeight(PrimitiveWithInfer):
|
|||
return hx_dtype
|
||||
|
||||
|
||||
class LSTMGrad(PrimitiveWithInfer):
|
||||
"""Computes the data and weight gradients of LSTM."""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
|
||||
self.input_size = validator.check_integer('input_size', input_size, 0, Rel.GT, self.name)
|
||||
self.hidden_size = validator.check_integer('hidden_size', hidden_size, 0, Rel.GT, self.name)
|
||||
self.num_layers = validator.check_integer('num_layers', num_layers, 0, Rel.GT, self.name)
|
||||
self.has_bias = validator.check_value_type('has_bias', has_bias, (bool,), self.name)
|
||||
self.bidirectional = validator.check_value_type('bidirectional', bidirectional, (bool,), self.name)
|
||||
self.dropout = validator.check_value_type("dropout", dropout, [float], self.name)
|
||||
self.dropout = validator.check_number_range('dropout', dropout, 0, 1, Rel.INC_BOTH, self.name)
|
||||
|
||||
if bidirectional:
|
||||
self.num_directions = 2
|
||||
else:
|
||||
self.num_directions = 1
|
||||
|
||||
def infer_shape(self, x_shape, hx_shape, cx_shape, w_shape, y_shape, hy_shape, cy_shape, dy_shape, dhy_shape,
|
||||
dcy_shape, reserve_shape):
|
||||
# dhy and dcy should be same shape
|
||||
validator.check_integer("h_shape", len(dhy_shape), 3, Rel.EQ, self.name)
|
||||
validator.check_integer("h_shape", len(dhy_shape), len(dcy_shape), Rel.EQ, self.name)
|
||||
validator.check_integer("h_shape[0]", dhy_shape[0], dcy_shape[0], Rel.EQ, self.name)
|
||||
validator.check_integer("h_shape[1]", dhy_shape[1], dcy_shape[1], Rel.EQ, self.name)
|
||||
validator.check_integer("h_shape[2]", dhy_shape[2], dcy_shape[2], Rel.EQ, self.name)
|
||||
|
||||
validator.check_integer("h_shape[0]", dhy_shape[0], self.num_layers * self.num_directions, Rel.EQ, self.name)
|
||||
validator.check_integer("h_shape[2]", dhy_shape[2], self.hidden_size, Rel.EQ, self.name)
|
||||
|
||||
# dy: (seq_len, batch_size, hidden_size * num_directions)
|
||||
validator.check_integer("dy_shape", len(dy_shape), 3, Rel.EQ, self.name)
|
||||
validator.check_integer("dy[1]", dy_shape[1], dhy_shape[1], Rel.EQ, self.name)
|
||||
validator.check_integer("dy[2]", dy_shape[2], self.hidden_size * self.num_directions, Rel.EQ, self.name)
|
||||
|
||||
# (seq_len, batch_size, input_size)
|
||||
dx_shape = (y_shape[0], y_shape[1], self.input_size)
|
||||
dhx_shape = dhy_shape
|
||||
dcx_shape = dcy_shape
|
||||
weight_size = 0
|
||||
gate_size = 4 * self.hidden_size
|
||||
for layer in range(self.num_layers):
|
||||
for _ in range(self.num_directions):
|
||||
input_layer_size = self.input_size if layer == 0 else self.hidden_size * self.num_directions
|
||||
weight_size += gate_size * input_layer_size
|
||||
weight_size += gate_size * self.hidden_size
|
||||
if self.has_bias:
|
||||
weight_size += gate_size
|
||||
|
||||
return (dx_shape, dhx_shape, dcx_shape, (weight_size, 1, 1))
|
||||
|
||||
def infer_dtype(self, x_dtype, hx_dtype, cx_dtype, w_dtype, y_dtype, hy_dtype, cy_dtype, dy_dtype, dhy_dtype,
|
||||
dcy_dtype, reserve_dtype):
|
||||
return (dy_dtype, dy_dtype, dy_dtype, hx_dtype)
|
||||
|
||||
|
||||
class PReLUGrad(PrimitiveWithInfer):
|
||||
r"""
|
||||
Gradients of PReLU operation.
|
||||
|
@ -1051,6 +1108,7 @@ class RefToEmbed(Primitive):
|
|||
__mindspore_signature__ = (
|
||||
('variable', sig_rw.RW_REF, sig_kind.KIND_POSITIONAL_KEYWORD),
|
||||
)
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self):
|
||||
pass
|
||||
|
|
|
@ -35,9 +35,11 @@ def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=Fals
|
|||
"""
|
||||
Checks whether an argument is a positive int or tuple with 2 or 4(when allow_four is True) positive int elements.
|
||||
"""
|
||||
|
||||
def _raise_message():
|
||||
raise ValueError(f"For '{prim_name}' attr '{arg_name}' should be an positive int number or a tuple of two "
|
||||
f"{'or four ' if allow_four else ''}positive int numbers, but got {arg_value}")
|
||||
|
||||
def _get_return_value():
|
||||
if isinstance(arg_value, int):
|
||||
ret = (1, 1, arg_value, arg_value) if ret_four else (arg_value, arg_value)
|
||||
|
@ -50,6 +52,7 @@ def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=Fals
|
|||
else:
|
||||
_raise_message()
|
||||
return ret
|
||||
|
||||
validator.check_value_type(arg_name, arg_value, (int, tuple), prim_name)
|
||||
ret_value = _get_return_value()
|
||||
for item in ret_value:
|
||||
|
@ -58,6 +61,7 @@ def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=Fals
|
|||
_raise_message()
|
||||
return ret_value
|
||||
|
||||
|
||||
class Flatten(PrimitiveWithInfer):
|
||||
r"""
|
||||
Flattens a tensor without changing its batch size on the 0-th axis.
|
||||
|
@ -205,6 +209,7 @@ class Softplus(PrimitiveWithInfer):
|
|||
>>> softplus(input_x)
|
||||
[1.3132615, 2.126928, 3.0485873, 4.01815, 5.0067153]
|
||||
"""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self):
|
||||
"""init Softplus"""
|
||||
|
@ -301,6 +306,7 @@ class ReLUV2(PrimitiveWithInfer):
|
|||
([[[[1., 0.], [0., 4.]], [[0., 6.], [7., 0.]]]],
|
||||
[[[[1, 0], [2, 0]], [[2, 0], [1, 0]]]])
|
||||
"""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self):
|
||||
"""init ReLUV2"""
|
||||
|
@ -398,6 +404,7 @@ class HSwish(PrimitiveWithInfer):
|
|||
>>> input_x = Tensor(np.array([-1, -2, 0, 2, 1]), mindspore.float16)
|
||||
>>> result = hswish(input_x)
|
||||
"""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self):
|
||||
self.init_prim_io_names(inputs=['x'], outputs=['output'])
|
||||
|
@ -1076,6 +1083,7 @@ class MaxPoolWithArgmax(_Pool):
|
|||
>>> maxpool_arg_op = P.MaxPoolWithArgmax(padding="VALID", ksize=2, strides=1)
|
||||
>>> output_tensor, argmax = maxpool_arg_op(input_tensor)
|
||||
"""
|
||||
|
||||
def __init__(self, ksize=1, strides=1, padding="valid"):
|
||||
super(MaxPoolWithArgmax, self).__init__(ksize, strides, padding)
|
||||
self.is_tbe = context.get_context("device_target") == "Ascend"
|
||||
|
@ -1494,6 +1502,7 @@ class ApplyMomentum(PrimitiveWithInfer):
|
|||
('gradient', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD),
|
||||
('momentum', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD)
|
||||
)
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self, use_nesterov=False, use_locking=False, gradient_scale=1.0):
|
||||
self.init_prim_io_names(inputs=['variable', 'accumulation', 'learning_rate', 'gradient', 'momentum'],
|
||||
|
@ -1583,6 +1592,7 @@ class L2Loss(PrimitiveWithInfer):
|
|||
>>> l2_loss(input_x)
|
||||
7.0
|
||||
"""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self):
|
||||
"""init L2Loss"""
|
||||
|
@ -2325,7 +2335,29 @@ class LSTM(PrimitiveWithInfer):
|
|||
y_shape = (x_shape[0], x_shape[1], self.hidden_size * self.num_directions)
|
||||
|
||||
# set arbitrary shape for reserved space
|
||||
reserved_shape = (1, 1)
|
||||
type_size = 4
|
||||
gates_ws_ld = self.get_good_ld(self.hidden_size * 4, type_size)
|
||||
states_ws_ld = self.get_good_ld(max(self.hidden_size, self.input_size), type_size)
|
||||
self.ws_gates_size = self.num_layers * self.num_directions * x_shape[0] * x_shape[1] * gates_ws_ld * type_size
|
||||
self.ws_states_size = (self.num_layers + 1) * self.num_directions * (x_shape[0] + 1) * x_shape[
|
||||
1] * states_ws_ld * type_size
|
||||
self.ws_c_states_size = (self.num_layers + 1) * self.num_directions * (x_shape[0] + 1) * x_shape[
|
||||
1] * states_ws_ld * type_size
|
||||
self.ws_diff_states_size = (self.num_layers + 1) * self.num_directions * (x_shape[0] + 1) * (2 + 1) * x_shape[
|
||||
1] * states_ws_ld * type_size
|
||||
self.ws_grid_comp_size = 0
|
||||
self.page_size = 4096
|
||||
current_offset = 0
|
||||
current_offset += self.ws_gates_size
|
||||
current_offset = self.rnd_up(current_offset, self.page_size)
|
||||
current_offset += self.ws_states_size
|
||||
current_offset = self.rnd_up(current_offset, self.page_size)
|
||||
current_offset += self.ws_c_states_size
|
||||
current_offset = self.rnd_up(current_offset, self.page_size)
|
||||
current_offset += self.ws_diff_states_size
|
||||
current_offset = self.rnd_up(current_offset, self.page_size)
|
||||
current_offset += self.ws_grid_comp_size
|
||||
reserved_shape = (current_offset, 1)
|
||||
state_shape = (1, 1)
|
||||
return (y_shape, h_shape, c_shape, reserved_shape, state_shape)
|
||||
|
||||
|
@ -2334,6 +2366,15 @@ class LSTM(PrimitiveWithInfer):
|
|||
validator.check_tensor_type_same(args, (mstype.float32, mstype.float16), self.name)
|
||||
return (x_dtype, x_dtype, x_dtype, x_dtype, x_dtype)
|
||||
|
||||
def rnd_up(self, current_offset, page_size):
|
||||
return ((current_offset + page_size - 1) // page_size) * page_size
|
||||
|
||||
def get_good_ld(self, dim, type_size):
|
||||
ld = self.rnd_up(dim, 64 // type_size)
|
||||
if ld * 256 == 0:
|
||||
return ld + 64 // type_size
|
||||
return ld
|
||||
|
||||
|
||||
class SigmoidCrossEntropyWithLogits(PrimitiveWithInfer):
|
||||
r"""
|
||||
|
@ -2999,6 +3040,7 @@ class Dropout(PrimitiveWithInfer):
|
|||
>>> in = Tensor((20, 16, 50, 50))
|
||||
>>> out = dropout(in)
|
||||
"""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self, drop_prob=0):
|
||||
self.drop_prob = validator.check_number_range("drop_prob", drop_prob, 0, 1, Rel.INC_BOTH, self.name)
|
||||
|
@ -3033,6 +3075,7 @@ class DropoutGrad(PrimitiveWithInfer):
|
|||
>>> in = Tensor((20, 16, 50, 50))
|
||||
>>> out = dropout_grad(in)
|
||||
"""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self, drop_prob=0):
|
||||
self.drop_prob = validator.check_number_range("drop_prob", drop_prob, 0, 1, Rel.INC_BOTH, self.name)
|
||||
|
@ -3083,6 +3126,7 @@ class CTCLoss(PrimitiveWithInfer):
|
|||
>>> ctc_loss = P.CTCloss()
|
||||
>>> output = ctc_loss(inputs, labels_indices, labels_values, sequence_length)
|
||||
"""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self, preprocess_collapse_repeated=False, ctc_merge_repeated=False,
|
||||
ignore_longer_outputs_than_inputs=False):
|
||||
|
|
|
@ -0,0 +1,335 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import pytest
|
||||
import mindspore.nn as nn
|
||||
from mindspore.common.api import ms_function
|
||||
import numpy as np
|
||||
import mindspore.context as context
|
||||
from mindspore.common.initializer import initializer
|
||||
from mindspore.ops import composite as C
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.common.parameter import ParameterTuple, Parameter
|
||||
|
||||
context.set_context(device_target='CPU')
|
||||
|
||||
|
||||
class LstmNet(nn.Cell):
|
||||
def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
|
||||
super(LstmNet, self).__init__()
|
||||
|
||||
num_directions = 1
|
||||
if bidirectional:
|
||||
num_directions = 2
|
||||
|
||||
self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
|
||||
input_np = np.array([[[0.6755, -1.6607, 0.1367], [0.4276, -0.7850, -0.3758]],
|
||||
[[-0.6424, -0.6095, 0.6639], [0.7918, 0.4147, -0.5089]],
|
||||
[[-1.5612, 0.0120, -0.7289], [-0.6656, -0.6626, -0.5883]],
|
||||
[[-0.9667, -0.6296, -0.7310], [0.1026, -0.6821, -0.4387]],
|
||||
[[-0.4710, 0.6558, -0.3144], [-0.8449, -0.2184, -0.1806]]
|
||||
]).astype(np.float32)
|
||||
self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
|
||||
|
||||
self.h = Parameter(initializer(
|
||||
Tensor(
|
||||
np.array([0.1, 0.1, 0.1, 0.1]).reshape((num_layers * num_directions, batch_size, hidden_size)).astype(
|
||||
np.float32)),
|
||||
[num_layers * num_directions, batch_size, hidden_size]), name='h')
|
||||
|
||||
self.c = Parameter(initializer(
|
||||
Tensor(
|
||||
np.array([0.2, 0.2, 0.2, 0.2]).reshape((num_layers * num_directions, batch_size, hidden_size)).astype(
|
||||
np.float32)),
|
||||
[num_layers * num_directions, batch_size, hidden_size]), name='c')
|
||||
|
||||
wih = np.array([[3.4021e-01, -4.6622e-01, 4.5117e-01],
|
||||
[-6.4257e-02, -2.4807e-01, 1.3550e-02], # i
|
||||
[-3.2140e-01, 5.5578e-01, 6.3589e-01],
|
||||
[1.6547e-01, -7.9030e-02, -2.0045e-01],
|
||||
[-6.9863e-01, 5.9773e-01, -3.9062e-01],
|
||||
[-3.0253e-01, -1.9464e-01, 7.0591e-01],
|
||||
[-4.0835e-01, 3.6751e-01, 4.7989e-01],
|
||||
[-5.6894e-01, -5.0359e-01, 4.7491e-01]]).astype(np.float32) # .reshape([1,-1])
|
||||
whh = np.array([[-0.4820, -0.2350],
|
||||
[-0.1195, 0.0519],
|
||||
[0.2162, -0.1178],
|
||||
[0.6237, 0.0711],
|
||||
[0.4511, -0.3961],
|
||||
[-0.5962, 0.0906],
|
||||
[0.1867, -0.1225],
|
||||
[0.1831, 0.0850]]).astype(np.float32) # .reshape([1,-1])
|
||||
wih = wih.transpose((1, 0))
|
||||
whh = whh.transpose((1, 0))
|
||||
bih = np.zeros((1, 8)).astype(np.float32)
|
||||
w_np = np.concatenate((wih, whh, bih), axis=0).reshape([-1, 1, 1])
|
||||
self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')
|
||||
|
||||
@ms_function
|
||||
def construct(self):
|
||||
return self.lstm(self.x, self.h, self.c, self.w)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_lstm():
|
||||
seq_len = 5
|
||||
batch_size = 2
|
||||
input_size = 3
|
||||
hidden_size = 2
|
||||
num_layers = 1
|
||||
has_bias = True
|
||||
bidirectional = False
|
||||
dropout = 0.0
|
||||
num_directions = 1
|
||||
if bidirectional:
|
||||
num_directions = 2
|
||||
net = LstmNet(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
|
||||
y, h, c, _, _ = net()
|
||||
print(y)
|
||||
print(c)
|
||||
print(h)
|
||||
expect_y = np.array([[[-0.16709016, 0.13125697],
|
||||
[-0.08438572, -0.01969833]],
|
||||
[[-0.2746155, 0.32764038],
|
||||
[-0.06504016, -0.07770399]],
|
||||
[[-0.00140004, 0.17706314],
|
||||
[0.03244496, -0.10135599]],
|
||||
[[0.08328028, 0.06437367],
|
||||
[-0.04133911, -0.11072896]],
|
||||
[[0.19004421, -0.02852732],
|
||||
[0.09138509, -0.00344161]]]
|
||||
)
|
||||
error = np.ones([num_layers, batch_size, hidden_size]) * 1.0e-4
|
||||
diff = y.asnumpy() - expect_y
|
||||
assert np.all(diff < error)
|
||||
assert np.all(-diff < error)
|
||||
#
|
||||
expect_h = np.array([[[0.19004421, -0.02852732],
|
||||
[0.09138509, -0.00344161]]])
|
||||
|
||||
error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4
|
||||
diff = h.asnumpy() - expect_h
|
||||
assert np.all(diff < error)
|
||||
assert np.all(-diff < error)
|
||||
#
|
||||
expect_c = np.array([[[0.34533143, -0.06313794],
|
||||
[0.169008, -0.00555446]]])
|
||||
error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4
|
||||
diff = c.asnumpy() - expect_c
|
||||
assert np.all(diff < error)
|
||||
assert np.all(-diff < error)
|
||||
|
||||
|
||||
class MultiLayerBiLstmNet(nn.Cell):
|
||||
def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
|
||||
super(MultiLayerBiLstmNet, self).__init__()
|
||||
|
||||
num_directions = 1
|
||||
if bidirectional:
|
||||
num_directions = 2
|
||||
|
||||
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias,
|
||||
bidirectional=bidirectional, dropout=dropout)
|
||||
|
||||
input_np = np.array([[[-0.1887, -0.4144, -0.0235, 0.7489, 0.7522, 0.5969, 0.3342, 1.2198, 0.6786, -0.9404],
|
||||
[-0.8643, -1.6835, -2.4965, 2.8093, 0.1741, 0.2707, 0.7387, -0.0939, -1.7990, 0.4765]],
|
||||
|
||||
[[-0.5963, -1.2598, -0.7226, 1.1365, -1.7320, -0.7302, 0.1221, -0.2111, -1.6173, -0.0706],
|
||||
[0.8964, 0.1737, -1.0077, -0.1389, 0.4889, 0.4391, 0.7911, 0.3614, -1.9533, -0.9936]],
|
||||
|
||||
[[0.3260, -1.3312, 0.0601, 1.0726, -1.6010, -1.8733, -1.5775, 1.1579, -0.8801, -0.5742],
|
||||
[-2.2998, -0.6344, -0.5409, -0.9221, -0.6500, 0.1206, 1.5215, 0.7517, 1.3691, 2.0021]],
|
||||
|
||||
[[-0.1245, -0.3690, 2.1193, 1.3852, -0.1841, -0.8899, -0.3646, -0.8575, -0.3131, 0.2026],
|
||||
[1.0218, -1.4331, 0.1744, 0.5442, -0.7808, 0.2527, 0.1566, 1.1484, -0.7766, -0.6747]],
|
||||
|
||||
[[-0.6752, 0.9906, -0.4973, 0.3471, -0.1202, -0.4213, 2.0213, 0.0441, 0.9016, 1.0365],
|
||||
[1.2223, -1.3248, 0.1207, -0.8256, 0.1816, 0.7057, -0.3105, 0.5713, 0.2804,
|
||||
-1.0685]]]).astype(np.float32)
|
||||
|
||||
self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
|
||||
|
||||
self.h0 = Parameter(initializer(
|
||||
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
|
||||
[num_directions, batch_size, hidden_size]), name='h0')
|
||||
self.c0 = Parameter(initializer(
|
||||
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
|
||||
[num_directions, batch_size, hidden_size]), name='c0')
|
||||
self.h1 = Parameter(initializer(
|
||||
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
|
||||
[num_directions, batch_size, hidden_size]), name='h1')
|
||||
self.c1 = Parameter(initializer(
|
||||
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
|
||||
[num_directions, batch_size, hidden_size]), name='c1')
|
||||
self.h = ParameterTuple((self.h0, self.h1))
|
||||
self.c = ParameterTuple((self.c0, self.c1))
|
||||
|
||||
@ms_function
|
||||
def construct(self):
|
||||
return self.lstm(self.x, (self.h, self.c))
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_multi_layer_bilstm():
|
||||
seq_len = 5
|
||||
batch_size = 2
|
||||
input_size = 10
|
||||
hidden_size = 2
|
||||
num_layers = 2
|
||||
has_bias = True
|
||||
bidirectional = True
|
||||
dropout = 0.0
|
||||
|
||||
num_directions = 1
|
||||
if bidirectional:
|
||||
num_directions = 2
|
||||
|
||||
net = MultiLayerBiLstmNet(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional,
|
||||
dropout)
|
||||
y, h, c, _, _ = net()
|
||||
print(y)
|
||||
print(h)
|
||||
print(c)
|
||||
|
||||
|
||||
class Grad(nn.Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.network = network
|
||||
self.weights = ParameterTuple(network.trainable_params())
|
||||
self.grad = C.GradOperation('grad',
|
||||
get_by_list=True,
|
||||
sens_param=True)
|
||||
|
||||
@ms_function
|
||||
def construct(self, output_grad):
|
||||
weights = self.weights
|
||||
grads = self.grad(self.network, weights)(output_grad)
|
||||
return grads
|
||||
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
|
||||
super(Net, self).__init__()
|
||||
|
||||
num_directions = 1
|
||||
if bidirectional:
|
||||
num_directions = 2
|
||||
input_np = np.array([[[-0.5907, 1.0557, 1.7283, 0.6706, -1.2550, -0.5298, -0.2290, -0.6735, 0.8555, 1.4836],
|
||||
[-1.7070, -0.5347, -0.9105, -0.2598, 0.0588, 1.5496, 1.0757, 0.3760, -1.2020, -0.2868]],
|
||||
|
||||
[[0.0151, 0.2126, 0.8090, -0.5292, -2.5590, 0.4279, -0.3081, -1.4706, -0.0498, 1.2301],
|
||||
[0.4165, -0.5391, -0.0996, 0.1928, -0.4909, -0.1255, 0.4444, -1.3687, 1.3096, 0.6553]],
|
||||
|
||||
[[-0.7802, -0.2083, -0.6388, 1.3757, 0.4293, 0.5363, 0.3202, -0.6687, -1.3864, -0.2953],
|
||||
[1.0799, -0.7204, 0.1130, -0.5857, -0.4855, -1.1068, 1.0126, 0.8716, 1.5460, -0.7392]],
|
||||
|
||||
[[2.2645, -0.6586, -0.2227, 1.4290, -0.5006, -1.6576, -0.1793, 0.5319, 0.1360, 0.2707],
|
||||
[-0.4071, 0.1575, 1.4199, -0.9156, 0.1855, 0.4947, 1.0460, -0.6365, 0.1191, -0.6374]],
|
||||
|
||||
[[0.2468, 1.0815, -0.4893, 0.0664, 0.6405, -2.2967, 0.7612, 0.8759, 0.5685, -1.0999],
|
||||
[-0.7272, -1.7750, -0.1164, -0.7159, 0.0061, -0.7839, -1.8329, 0.3434, -0.5634,
|
||||
0.5384]]]).astype(np.float32)
|
||||
|
||||
self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
|
||||
|
||||
self.h0 = Parameter(initializer(
|
||||
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
|
||||
[num_directions, batch_size, hidden_size]), name='h0')
|
||||
|
||||
self.c0 = Parameter(initializer(
|
||||
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
|
||||
[num_directions, batch_size, hidden_size]), name='c0')
|
||||
|
||||
wih_l0 = np.array([[0.2300, 0.6668, 0.4703, 0.0425, 0.0464, 0.6825, 0.2249, -0.4315, -0.2449, 0.2964],
|
||||
[-0.2811, -0.3444, 0.2557, -0.5137, -0.5518, 0.1652, -0.6720, 0.1066, 0.3586, 0.6299],
|
||||
[0.5728, -0.1784, 0.5661, 0.4012, 0.3856, -0.1899, 0.3102, 0.3717, -0.5651, 0.1952],
|
||||
[0.1026, -0.0527, 0.1198, -0.3080, 0.2292, 0.5757, -0.3567, -0.2731, -0.0586, -0.2849],
|
||||
[0.2194, -0.1622, 0.3219, -0.3008, -0.3713, -0.3034, -0.2385, 0.0412, -0.5205, 0.0280],
|
||||
[-0.5499, -0.0733, -0.5236, -0.6753, -0.7045, -0.1839, -0.1037, -0.5026, -0.4055, -0.3416],
|
||||
[0.1573, -0.1301, -0.2882, -0.3464, 0.6643, 0.1980, -0.6804, 0.5359, 0.5996, 0.0124],
|
||||
[-0.6436, 0.0587, -0.6520, -0.0471, 0.1667, 0.6042, 0.5752, -0.6296, -0.2976,
|
||||
-0.3757]]).astype(np.float32).reshape([1, -1])
|
||||
|
||||
whh_l0 = np.array([[0.3358, 0.2790],
|
||||
[-0.5355, 0.0989],
|
||||
[-0.1402, 0.5120],
|
||||
[0.1335, 0.1653],
|
||||
[0.3533, -0.3531],
|
||||
[0.4166, -0.4420],
|
||||
[-0.5454, -0.1720],
|
||||
[0.0041, -0.0799]]).astype(np.float32).reshape([1, -1])
|
||||
|
||||
bih_l0 = np.array([0.5518, 0.1083, 0.4829, 0.0607, -0.1770, -0.6944, 0.3059, 0.5354]).astype(
|
||||
np.float32).reshape([1, -1])
|
||||
bhh_l0 = np.array([0.5025, -0.1261, -0.5405, 0.3220, -0.3441, 0.6488, -0.0284, -0.2334]).astype(
|
||||
np.float32).reshape([1, -1])
|
||||
|
||||
w0_np = np.concatenate(
|
||||
(wih_l0, whh_l0, bih_l0 + bhh_l0),
|
||||
axis=1).reshape([-1, 1, 1])
|
||||
self.w0 = Parameter(initializer(Tensor(w0_np), w0_np.shape), name='w0')
|
||||
self.lstm = P.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
|
||||
has_bias=has_bias, bidirectional=bidirectional, dropout=dropout)
|
||||
|
||||
@ms_function
|
||||
def construct(self):
|
||||
return self.lstm(self.x, self.h0, self.c0, self.w0)[0]
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_grad():
|
||||
seq_len = 5
|
||||
batch_size = 2
|
||||
input_size = 10
|
||||
hidden_size = 2
|
||||
num_layers = 1
|
||||
has_bias = True
|
||||
bidirectional = False
|
||||
dropout = 0.0
|
||||
num_directions = 1
|
||||
if bidirectional:
|
||||
num_directions = 2
|
||||
net = Grad(Net(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout))
|
||||
dy = np.array([[[-3.5471e-01, 7.0540e-01],
|
||||
[2.7161e-01, 1.0865e+00]],
|
||||
|
||||
[[-4.2431e-01, 1.4955e+00],
|
||||
[-4.0418e-01, -2.3282e-01]],
|
||||
|
||||
[[-1.3654e+00, 1.9251e+00],
|
||||
[-4.6481e-01, 1.3138e+00]],
|
||||
|
||||
[[1.2914e+00, -2.3753e-01],
|
||||
[5.3589e-01, -1.0981e-01]],
|
||||
|
||||
[[-1.6032e+00, -1.8818e-01],
|
||||
[1.0065e-01, 9.2045e-01]]]).astype(np.float32)
|
||||
dx, dhx, dcx, dw = net(Tensor(dy))
|
||||
print(dx)
|
||||
print(dhx)
|
||||
print(dcx)
|
||||
print(dw)
|
||||
|
||||
# test_multi_layer_bilstm()
|
||||
# test_lstm()
|
||||
# tf_lstm_test()
|
||||
# test_grad()
|
Loading…
Reference in New Issue