!1310 add cpu lstm

Merge pull request !1310 from baihuawei/cpu-lstm
This commit is contained in:
mindspore-ci-bot 2020-05-28 10:35:09 +08:00 committed by Gitee
commit 14f9a6e31c
15 changed files with 1187 additions and 39 deletions

View File

@ -0,0 +1,120 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/lstm_cpu_kernel.h"
#include <string>
#include "common/utils.h"
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
void LstmCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional");
input_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "input_size");
hidden_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "hidden_size");
num_layers_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "num_layers");
batch_size_ = SizeToInt(src_shape[1]);
seq_len_ = SizeToInt(src_shape[0]);
num_directions_ = 1;
if (bidirectional_) {
num_directions_ = 2;
}
int gate_size = 4 * hidden_size_;
for (int i = 0; i < num_layers_; ++i) {
weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_);
weight_h_size_ += gate_size * hidden_size_;
}
weight_size_ = weight_size_ * num_directions_;
weight_h_size_ = weight_h_size_ * num_directions_;
}
bool LstmCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
using dt = dnnl::memory::data_type;
using tag = dnnl::memory::format_tag;
using dim = dnnl::memory::dims;
auto eng = MKLKernelEngine::Get().engine();
dnnl::stream s(eng);
auto formatted_md = [](dim dimensions, tag layout) { return dnnl::memory::desc{{dimensions}, dt::f32, layout}; };
dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional;
if (bidirectional_) {
direction = dnnl::rnn_direction::bidirectional_concat;
}
dim src_dims = {seq_len_, batch_size_, input_size_};
dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim weights_dims = {num_layers_, num_directions_, input_size_, 4, hidden_size_};
dim weights_h_dims = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_};
dim bias_dims = {num_layers_, num_directions_, 4, hidden_size_};
dim dst_dims = {seq_len_, batch_size_, hidden_size_ * num_directions_};
dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc);
dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc);
dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc);
dnnl::memory::desc weights_desc = formatted_md(weights_dims, tag::ldigo);
dnnl::memory::desc weights_h_desc = formatted_md(weights_h_dims, tag::ldigo);
dnnl::memory::desc bias_desc = formatted_md(bias_dims, tag::ldgo);
dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc);
dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc);
dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc);
dnnl::lstm_forward::desc desc =
dnnl::lstm_forward::desc(dnnl::prop_kind::forward_training, direction, src_desc, src_h_desc, src_c_desc,
weights_desc, weights_h_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc);
auto prim_desc = dnnl::lstm_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
auto workspace_memory = dnnl::memory(prim_desc.workspace_desc(), eng);
auto src_memory = dnnl::memory(formatted_md(src_dims, tag::tnc), eng);
write_to_dnnl_memory(inputs[0]->addr, src_memory);
auto src_h_memory = dnnl::memory(prim_desc.src_iter_desc(), eng);
auto src_c_memory = dnnl::memory(prim_desc.src_iter_c_desc(), eng);
write_to_dnnl_memory(inputs[1]->addr, src_h_memory);
write_to_dnnl_memory(inputs[2]->addr, src_c_memory);
auto weights_memory = dnnl::memory(formatted_md(weights_dims, tag::ldigo), eng);
auto weights_h_memory = dnnl::memory(formatted_md(weights_h_dims, tag::ldigo), eng);
auto bias_memory = dnnl::memory(formatted_md(bias_dims, tag::ldgo), eng);
write_to_dnnl_memory(inputs[3]->addr, weights_memory);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_, weights_h_memory);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_ + weight_h_size_, bias_memory);
auto dst_memory = dnnl::memory(formatted_md(dst_dims, tag::tnc), eng);
auto dst_h_memory = dnnl::memory(prim_desc.dst_iter_desc(), eng);
auto dst_c_memory = dnnl::memory(prim_desc.dst_iter_c_desc(), eng);
dnnl::lstm_forward fw_layer(prim_desc);
workspace_memory.set_data_handle(outputs[3]->addr);
dst_memory.set_data_handle(outputs[0]->addr);
dst_h_memory.set_data_handle(outputs[1]->addr);
dst_c_memory.set_data_handle(outputs[2]->addr);
fw_layer.execute(s, {{DNNL_ARG_SRC_LAYER, src_memory},
{DNNL_ARG_SRC_ITER, src_h_memory},
{DNNL_ARG_SRC_ITER_C, src_c_memory},
{DNNL_ARG_WEIGHTS_LAYER, weights_memory},
{DNNL_ARG_WEIGHTS_ITER, weights_h_memory},
{DNNL_ARG_BIAS, bias_memory},
{DNNL_ARG_DST_LAYER, dst_memory},
{DNNL_ARG_DST_ITER, dst_h_memory},
{DNNL_ARG_DST_ITER_C, dst_c_memory},
{DNNL_ARG_WORKSPACE, workspace_memory}});
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,59 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H
#define MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H
#include <vector>
#include <memory>
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {
class LstmCPUKernel : public MKLCPUKernel {
public:
LstmCPUKernel() = default;
~LstmCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
int weight_size_ = 0;
int weight_h_size_ = 0;
int input_size_;
int hidden_size_;
int num_layers_;
int batch_size_;
int seq_len_;
int num_directions_;
bool bidirectional_;
};
MS_REG_CPU_KERNEL(LSTM,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
LstmCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H

View File

@ -0,0 +1,169 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h"
#include <cstring>
#include <cmath>
#include <numeric>
#include <string>
#include "common/utils.h"
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
void LSTMGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional");
input_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "input_size");
hidden_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "hidden_size");
num_layers_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "num_layers");
batch_size_ = SizeToInt(src_shape[1]);
seq_len_ = SizeToInt(src_shape[0]);
num_directions_ = 1;
if (bidirectional_) {
num_directions_ = 2;
}
int gate_size = 4 * hidden_size_;
for (int i = 0; i < num_layers_; ++i) {
weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_);
weight_h_size_ += gate_size * hidden_size_;
}
weight_size_ = weight_size_ * num_directions_;
weight_h_size_ = weight_h_size_ * num_directions_;
}
bool LSTMGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
using tag = dnnl::memory::format_tag;
using dt = dnnl::memory::data_type;
using dim = dnnl::memory::dims;
auto eng = MKLKernelEngine::Get().engine();
dnnl::stream s(eng);
auto formatted_md = [](dim dimensions, tag layout) { return dnnl::memory::desc{{dimensions}, dt::f32, layout}; };
auto generic_md = [](dim dimensions) { return dnnl::memory::desc{{dimensions}, dt::f32, tag::any}; };
dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional;
if (bidirectional_) {
direction = dnnl::rnn_direction::bidirectional_concat;
}
dim src_dims = {seq_len_, batch_size_, input_size_};
dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim weights_dims = {num_layers_, num_directions_, input_size_, 4, hidden_size_};
dim weights_h_dims = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_};
dim bias_dims = {num_layers_, num_directions_, 4, hidden_size_};
dim dst_dims = {seq_len_, batch_size_, hidden_size_ * num_directions_};
dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc);
dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc);
dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc);
dnnl::memory::desc weights_desc = formatted_md(weights_dims, tag::ldigo);
dnnl::memory::desc weights_h_desc = formatted_md(weights_h_dims, tag::ldigo);
dnnl::memory::desc bias_desc = formatted_md(bias_dims, tag::ldgo);
dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc);
dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc);
dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc);
dnnl::lstm_forward::desc forward_desc =
dnnl::lstm_forward::desc(dnnl::prop_kind::forward_training, direction, src_desc, src_h_desc, src_c_desc,
weights_desc, weights_h_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc);
auto prim_forward_desc = dnnl::lstm_forward::primitive_desc(forward_desc, eng);
dnnl::lstm_backward::desc backward_desc = dnnl::lstm_backward::desc(
dnnl::prop_kind::backward, direction, src_desc, src_h_desc, src_c_desc, generic_md(weights_dims),
generic_md(weights_h_dims), generic_md(bias_dims), dst_desc, dst_h_desc, dst_c_desc, src_desc, src_h_desc,
src_c_desc, weights_desc, weights_h_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc);
auto prim_backward_desc = dnnl::lstm_backward::primitive_desc(backward_desc, eng, prim_forward_desc);
// construct fw memory
auto src_memory = dnnl::memory(formatted_md(src_dims, tag::tnc), eng);
write_to_dnnl_memory(inputs[0]->addr, src_memory);
auto src_h_memory = dnnl::memory(prim_forward_desc.src_iter_desc(), eng);
auto src_c_memory = dnnl::memory(prim_forward_desc.src_iter_c_desc(), eng);
write_to_dnnl_memory(inputs[1]->addr, src_h_memory);
write_to_dnnl_memory(inputs[2]->addr, src_c_memory);
auto user_weights_memory = dnnl::memory(formatted_md(weights_dims, tag::ldigo), eng);
auto user_weights_h_memory = dnnl::memory(formatted_md(weights_h_dims, tag::ldigo), eng);
auto user_bias_memory = dnnl::memory(formatted_md(bias_dims, tag::ldgo), eng);
write_to_dnnl_memory(inputs[3]->addr, user_weights_memory);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_, user_weights_h_memory);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_ + weight_h_size_, user_bias_memory);
auto weights_memory = dnnl::memory(prim_backward_desc.weights_layer_desc(), eng);
auto weights_h_memory = dnnl::memory(prim_backward_desc.weights_iter_desc(), eng);
auto bias_memory = dnnl::memory(prim_forward_desc.bias_desc(), eng);
dnnl::reorder(user_weights_memory, weights_memory).execute(s, user_weights_memory, weights_memory);
dnnl::reorder(user_weights_h_memory, weights_h_memory).execute(s, user_weights_h_memory, weights_h_memory);
dnnl::reorder(user_bias_memory, bias_memory).execute(s, user_bias_memory, bias_memory);
auto dst_memory = dnnl::memory(formatted_md(dst_dims, tag::tnc), eng);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[4]->addr), dst_memory);
auto dst_h_memory = dnnl::memory(prim_backward_desc.dst_iter_desc(), eng);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[5]->addr), dst_h_memory);
auto dst_c_memory = dnnl::memory(prim_backward_desc.dst_iter_c_desc(), eng);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[6]->addr), dst_c_memory);
auto workspace_memory = dnnl::memory(prim_forward_desc.workspace_desc(), eng);
write_to_dnnl_memory(inputs[10]->addr, workspace_memory);
// construct diff memory
auto diff_src_memory = dnnl::memory(formatted_md(src_dims, tag::tnc), eng);
auto diff_src_h_memory = dnnl::memory(prim_backward_desc.diff_src_iter_desc(), eng);
auto diff_src_c_memory = dnnl::memory(prim_backward_desc.diff_src_iter_c_desc(), eng);
auto diff_weights_memory = dnnl::memory(prim_backward_desc.diff_weights_layer_desc(), eng);
auto diff_weights_h_memory = dnnl::memory(prim_backward_desc.diff_weights_iter_desc(), eng);
auto diff_bias_memory = dnnl::memory(prim_backward_desc.diff_bias_desc(), eng);
auto diff_dst_memory = dnnl::memory(formatted_md(dst_dims, tag::tnc), eng);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[7]->addr), diff_dst_memory);
auto diff_dst_h_memory = dnnl::memory(prim_backward_desc.diff_dst_iter_desc(), eng);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[8]->addr), diff_dst_h_memory);
auto diff_dst_c_memory = dnnl::memory(prim_backward_desc.diff_dst_iter_c_desc(), eng);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[9]->addr), diff_dst_c_memory);
diff_src_memory.set_data_handle(outputs[0]->addr);
diff_src_h_memory.set_data_handle(outputs[1]->addr);
diff_src_c_memory.set_data_handle(outputs[2]->addr);
diff_weights_memory.set_data_handle(outputs[3]->addr);
diff_weights_h_memory.set_data_handle(reinterpret_cast<float *>(outputs[3]->addr) + weight_size_);
diff_bias_memory.set_data_handle(reinterpret_cast<float *>(outputs[3]->addr) + weight_size_ + weight_h_size_);
dnnl::lstm_backward bwd_layer(prim_backward_desc);
bwd_layer.execute(s, {{DNNL_ARG_SRC_LAYER, src_memory},
{DNNL_ARG_SRC_ITER, src_h_memory},
{DNNL_ARG_SRC_ITER_C, src_c_memory},
{DNNL_ARG_WEIGHTS_LAYER, weights_memory},
{DNNL_ARG_WEIGHTS_ITER, weights_h_memory},
{DNNL_ARG_BIAS, bias_memory},
{DNNL_ARG_DST_LAYER, dst_memory},
{DNNL_ARG_DST_ITER, dst_h_memory},
{DNNL_ARG_DST_ITER_C, dst_c_memory},
{DNNL_ARG_DIFF_SRC_LAYER, diff_src_memory},
{DNNL_ARG_DIFF_SRC_ITER, diff_src_h_memory},
{DNNL_ARG_DIFF_SRC_ITER_C, diff_src_c_memory},
{DNNL_ARG_DIFF_WEIGHTS_LAYER, diff_weights_memory},
{DNNL_ARG_DIFF_WEIGHTS_ITER, diff_weights_h_memory},
{DNNL_ARG_DIFF_BIAS, diff_bias_memory},
{DNNL_ARG_DIFF_DST_LAYER, diff_dst_memory},
{DNNL_ARG_DIFF_DST_ITER, diff_dst_h_memory},
{DNNL_ARG_DIFF_DST_ITER_C, diff_dst_c_memory},
{DNNL_ARG_WORKSPACE, workspace_memory}});
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,67 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {
class LSTMGradCPUKernel : public MKLCPUKernel {
public:
LSTMGradCPUKernel() = default;
~LSTMGradCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
int weight_size_ = 0;
int weight_h_size_ = 0;
int input_size_;
int hidden_size_;
int num_layers_;
int batch_size_;
int seq_len_;
int num_directions_;
bool bidirectional_;
};
MS_REG_CPU_KERNEL(LSTMGrad,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
LSTMGradCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_

View File

@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -98,5 +98,11 @@ void MKLCPUKernel::SetArgumentHandle(int arg_key, void *ptr) {
}
void MKLCPUKernel::ExecutePrimitive() { MKLKernelEngine::Get().Execute(primitive_, arguments_); }
void MKLCPUKernel::write_to_dnnl_memory(void *handle, const dnnl::memory &mem) {
MKLKernelEngine::Get().write_to_dnnl_memory(handle, mem);
}
void MKLCPUKernel::read_from_dnnl_memory(void *handle, const dnnl::memory &mem) {
MKLKernelEngine::Get().read_from_dnnl_memory(handle, mem);
}
} // namespace kernel
} // namespace mindspore

View File

@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -39,6 +39,8 @@ class MKLCPUKernel : public CPUKernel {
dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const;
dnnl::memory::desc GetDefaultMemDesc(const std::vector<size_t> &shape);
void ExecutePrimitive();
void write_to_dnnl_memory(void *handle, const dnnl::memory &mem);
void read_from_dnnl_memory(void *handle, const dnnl::memory &mem);
std::unordered_map<int, dnnl::memory> arguments_;
std::shared_ptr<dnnl::primitive> primitive_{nullptr};
};

View File

@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.

View File

@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -15,7 +15,10 @@
*/
#ifndef MINDSPORE_MKL_KERNEL_ENGINE_H_
#define MINDSPORE_MKL_KERNEL_ENGINE_H_
#include <cstdlib>
#include <algorithm>
#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>
#include <memory>
@ -39,6 +42,30 @@ class MKLKernelEngine {
void Execute(const std::shared_ptr<dnnl::primitive> &primitive,
const std::unordered_map<int, dnnl::memory> &arguments);
inline void read_from_dnnl_memory(void *handle, const dnnl::memory &mem) {
dnnl::engine eng = mem.get_engine();
size_t bytes = mem.get_desc().get_size();
if (eng.get_kind() == dnnl::engine::kind::cpu) {
auto dst = reinterpret_cast<uint8_t *>(handle);
uint8_t *src = reinterpret_cast<uint8_t *>(mem.get_data_handle());
for (size_t i = 0; i < bytes; ++i) {
dst[i] = src[i];
}
}
}
// Read from handle, write to memory
inline void write_to_dnnl_memory(void *handle, const dnnl::memory &mem) {
dnnl::engine eng = mem.get_engine();
size_t bytes = mem.get_desc().get_size();
if (eng.get_kind() == dnnl::engine::kind::cpu) {
auto src = reinterpret_cast<uint8_t *>(handle);
uint8_t *dst = reinterpret_cast<uint8_t *>(mem.get_data_handle());
for (size_t i = 0; i < bytes; ++i) {
dst[i] = src[i];
}
}
}
private:
MKLKernelEngine() : engine_(dnnl::engine::kind::cpu, 0), stream_(engine_) {}
~MKLKernelEngine() = default;

View File

@ -18,8 +18,13 @@ from mindspore.nn.cell import Cell
from mindspore.common.parameter import Parameter
from mindspore.common.initializer import initializer
from mindspore._checkparam import Validator as validator
from mindspore import context
import mindspore.nn as nn
from mindspore.common.tensor import Tensor
import numpy as np
__all__ = ['LSTM', 'LSTMCell']
__all__ = ['LSTM']
class LSTM(Cell):
r"""
@ -102,6 +107,7 @@ class LSTM(Cell):
>>> c0 = Tensor(np.ones([1 * 2, 3, 12]).astype(np.float32))
>>> output, (hn, cn) = net(input, h0, c0)
"""
def __init__(self,
input_size,
hidden_size,
@ -118,19 +124,20 @@ class LSTM(Cell):
self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name)
self.dropout = float(dropout)
self.bidirectional = bidirectional
if self.batch_first:
self.transpose1 = P.Transpose()
self.transpose2 = P.Transpose()
num_directions = 2 if self.bidirectional else 1
self.cpu_target = False
if context.get_context("device_target") == "CPU":
self.cpu_target = True
if not self.cpu_target:
self.lstm = P.LSTM(input_size=self.input_size,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
has_bias=self.has_bias,
bidirectional=self.bidirectional,
dropout=self.dropout)
num_directions = 2 if self.bidirectional else 1
weight_size = 0
gate_size = 4 * self.hidden_size
for layer in range(self.num_layers):
@ -140,17 +147,175 @@ class LSTM(Cell):
if self.has_bias:
increment_size += 2 * gate_size
weight_size += increment_size * num_directions
self.weight = Parameter(initializer(0.0, [weight_size, 1, 1]), name='weight')
else:
layer = []
layer.append(nn.LSTMCell(input_size=self.input_size,
hidden_size=self.hidden_size,
layer_index=0,
has_bias=self.has_bias,
bidirectional=self.bidirectional,
dropout=self.dropout))
for i in range(num_layers - 1):
layer.append(nn.LSTMCell(input_size=self.hidden_size * num_directions,
hidden_size=self.hidden_size,
layer_index=i + 1,
has_bias=self.has_bias,
bidirectional=self.bidirectional,
dropout=self.dropout))
self.lstms = layer
self.fill = P.Fill()
self.shape = P.Shape()
def construct(self, x, hx):
if self.batch_first:
x = self.transpose1(x, (1, 0, 2))
h0, c0 = hx
output, hn, cn, _, _ = self.lstm(x, h0, c0, self.weight)
if not self.cpu_target:
h, c = hx
output, h, c, _, _ = self.lstm(x, h, c, self.weight)
if self.batch_first:
output = self.transpose2(output, (1, 0, 2))
return (output, (hn, cn))
return (output, (h, c))
h, c = hx
output, hn, cn, _, _ = self.lstms[0](x, h[0], c[0])
for i in range(1, self.num_layers):
output, hn, cn, _, _ = self.lstms[i](output, h[i], c[i])
if self.batch_first:
output = self.transpose2(output, (1, 0, 2))
return output, hn, cn, _, _
class LSTMCell(Cell):
r"""
LSTM (Long Short-Term Memory) layer.
Applies a LSTM layer to the input.
There are two pipelines connecting two consecutive cells in a LSTM model; one is cell state pipeline
and another is hidden state pipeline. Denote two consecutive time nodes as :math:`t-1` and :math:`t`.
Given an input :math:`x_t` at time :math:`t`, an hidden state :math:`h_{t-1}` and an cell
state :math:`c_{t-1}` of the layer at time :math:`{t-1}`, the cell state and hidden state at
time :math:`t` is computed using an gating mechanism. Input gate :math:`i_t` is designed to protect the cell
from perturbation by irrelevant inputs. Forget gate :math:`f_t` affords protection of the cell by forgetting
some information in the past, which is stored in :math:`h_{t-1}`. Output gate :math:`o_t` protects other
units from perturbation by currently irrelevant memory contents. Candidate cell state :math:`\tilde{c}_t` is
calculated with the current input, on which the input gate will be applied. Finally, current cell state
:math:`c_{t}` and hidden state :math:`h_{t}` are computed with the calculated gates and cell states. The complete
formulation is as follows.
.. math::
\begin{array}{ll} \\
i_t = \sigma(W_{ix} x_t + b_{ix} + W_{ih} h_{(t-1)} + b_{ih}) \\
f_t = \sigma(W_{fx} x_t + b_{fx} + W_{fh} h_{(t-1)} + b_{fh}) \\
\tilde{c}_t = \tanh(W_{cx} x_t + b_{cx} + W_{ch} h_{(t-1)} + b_{ch}) \\
o_t = \sigma(W_{ox} x_t + b_{ox} + W_{oh} h_{(t-1)} + b_{oh}) \\
c_t = f_t * c_{(t-1)} + i_t * \tilde{c}_t \\
h_t = o_t * \tanh(c_t) \\
\end{array}
Here :math:`\sigma` is the sigmoid function, and :math:`*` is the Hadamard product. :math:`W, b`
are learnable weights between the output and the input in the formula. For instance,
:math:`W_{ix}, b_{ix}` are the weight and bias used to transform from input :math:`x` to :math:`i`.
Details can be found in paper `LONG SHORT-TERM MEMORY
<https://www.bioinf.jku.at/publications/older/2604.pdf>`_ and
`Long Short-Term Memory Recurrent Neural Network Architectures for Large Scale Acoustic Modeling
<https://static.googleusercontent.com/media/research.google.com/zh-CN//pubs/archive/43905.pdf>`_.
Args:
input_size (int): Number of features of input.
hidden_size (int): Number of features of hidden layer.
layer_index (int): index of current layer of stacked LSTM . Default: 0.
has_bias (bool): Specifies whether has bias `b_ih` and `b_hh`. Default: True.
batch_first (bool): Specifies whether the first dimension of input is batch_size. Default: False.
dropout (float, int): If not 0, append `Dropout` layer on the outputs of each
LSTM layer except the last layer. Default 0. The range of dropout is [0.0, 1.0].
bidirectional (bool): Specifies whether this is a bidirectional LSTM. If set True,
number of directions will be 2 otherwise number of directions is 1. Default: False.
Inputs:
- **input** (Tensor) - Tensor of shape (seq_len, batch_size, `input_size`).
- **h** - data type mindspore.float32 or
mindspore.float16 and shape (num_directions * `num_layers`, batch_size, `hidden_size`).
- **c** - data type mindspore.float32 or
mindspore.float16 and shape (num_directions * `num_layers`, batch_size, `hidden_size`).
Data type of `h' and 'c' should be the same of `input`.
Outputs:
`output`, `h_n`, `c_n`, 'reserve', 'state'.
- **output** (Tensor) - Tensor of shape (seq_len, batch_size, num_directions * `hidden_size`).
- **h** - A Tensor with shape (num_directions * `num_layers`, batch_size, `hidden_size`).
- **c** - A Tensor with shape (num_directions * `num_layers`, batch_size, `hidden_size`).
- **reserve** - reserved
- **state** - reserved
Examples:
>>> class LstmNet(nn.Cell):
>>> def __init__(self, input_size, hidden_size, layer_index, has_bias, batch_first, bidirectional):
>>> super(LstmNet, self).__init__()
>>> self.lstm = nn.LSTMCell(input_size=input_size,
>>> hidden_size=hidden_size,
>>> layer_index=layer_index,
>>> has_bias=has_bias,
>>> batch_first=batch_first,
>>> bidirectional=bidirectional,
>>> dropout=0.0)
>>>
>>> def construct(self, inp, h0, c0):
>>> return self.lstm(inp, (h0, c0))
>>>
>>> net = LstmNet(10, 12, 2, has_bias=True, batch_first=True, bidirectional=False)
>>> input = Tensor(np.ones([3, 5, 10]).astype(np.float32))
>>> h0 = Tensor(np.ones([1 * 2, 3, 12]).astype(np.float32))
>>> c0 = Tensor(np.ones([1 * 2, 3, 12]).astype(np.float32))
>>> output, hn, cn, _, _ = net(input, h0, c0)
"""
def __init__(self,
input_size,
hidden_size,
layer_index=0,
has_bias=True,
batch_first=False,
dropout=0,
bidirectional=False):
super(LSTMCell, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = 1
self.layer_index = layer_index
self.has_bias = has_bias
self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name)
self.dropout = float(dropout)
self.bidirectional = bidirectional
self.num_directions = 1
if self.bidirectional:
self.num_directions = 2
if self.batch_first:
self.transpose1 = P.Transpose()
self.transpose2 = P.Transpose()
w_np = np.ones([(self.input_size + self.hidden_size) * self.num_directions * self.hidden_size * 4, 1]).astype(
np.float32) * 0.01
if has_bias:
b_np = np.ones([self.num_directions * self.hidden_size * 4, 1]).astype(
np.float32) * 0.01
else:
b_np = np.zeros([self.num_directions * self.hidden_size * 4, 1]).astype(
np.float32) * 0.01
wb_np = np.concatenate((w_np, b_np), axis=0).reshape([-1, 1, 1])
self.w = Parameter(initializer(Tensor(wb_np), wb_np.shape), name='w' + str(self.layer_index))
self.lstm = P.LSTM(input_size=self.input_size,
hidden_size=self.hidden_size,
num_layers=1,
has_bias=self.has_bias,
bidirectional=self.bidirectional,
dropout=self.dropout)
def construct(self, x, h, c):
if self.batch_first:
x = self.transpose1(x, (1, 0, 2))
output, hn, cn, _, _ = self.lstm(x, h, c, self.w)
if self.batch_first:
output = self.transpose2(output, (1, 0, 2))
return output, hn, cn, _, _

View File

@ -49,6 +49,7 @@ def get_bprop_dtype(self):
def bprop(x, out, dout):
return (zeros_like(x),)
return bprop
@ -61,6 +62,7 @@ def get_bprop_cast(self):
def bprop(x, t, out, dout):
dx = cast(dout, get_dtype(x))
return dx, zeros_like(t)
return bprop
@ -70,6 +72,7 @@ def get_bprop_shape(self):
def bprop(x, out, dout):
return (zeros_like(x),)
return bprop
@ -82,6 +85,7 @@ def get_bprop_split(self):
concat_op = P.Concat(axis)
dx = concat_op(dout)
return (dx,)
return bprop
@ -91,6 +95,7 @@ def get_bprop_rank(self):
def bprop(x, out, dout):
return (zeros_like(x),)
return bprop
@ -101,6 +106,7 @@ def get_bprop_reshape(self):
def bprop(x, shp, out, dout):
shapex = shape_op(x)
return reshape(dout, shapex), zeros_like(shp)
return bprop
@ -111,6 +117,7 @@ def get_bprop_expand_dims(self):
def bprop(x, axis, out, dout):
shapex = shape_op(x)
return reshape(dout, shapex), zeros_like(axis)
return bprop
@ -121,6 +128,7 @@ def get_bprop_squeeze(self):
def bprop(x, out, dout):
shapex = shape_op(x)
return (reshape(dout, shapex),)
return bprop
@ -132,6 +140,7 @@ def get_bprop_flatten(self):
def bprop(x, out, dout):
dx = flatten_grad(dout, shape_op(x))
return (dx,)
return bprop
@ -166,6 +175,7 @@ def _tile_shape(multiples, shapex):
@bprop_getters.register(P.Tile)
def get_bprop_tile(self):
"""Generate bprop for Tile"""
def bprop(x, multiples, out, dout):
shapex = shape_op(x)
r_shape = _tile_shape(multiples, shapex)
@ -174,6 +184,7 @@ def get_bprop_tile(self):
dx = reduce_sum(reshape(dout, r_shape), axis)
dx = reshape(dx, shapex)
return dx, zeros_like(multiples)
return bprop
@ -183,6 +194,7 @@ def get_bprop_transpose(self):
def bprop(x, perm, out, dout):
return transpose(dout, invert_permutation(perm)), zeros_like(perm)
return bprop
@ -198,6 +210,7 @@ def get_bprop_concat(self):
slice_out = P.Slice()(dout, out_offset[i], shape_op(x[i]))
dx = dx + (slice_out,)
return (dx,)
return bprop
@ -215,12 +228,12 @@ def get_bprop_slice(self):
dx = P.Pad(_slice_grad_pad(begin, size, shape_op(x)))(dout)
return (dx, zeros_like(begin), zeros_like(size))
def bprop_gpu(x, begin, size, out, dout):
def bprop_grad(x, begin, size, out, dout):
dx = dx = G.SliceGrad()(dout, x, begin, size)
return (dx, zeros_like(begin), zeros_like(size))
if context.get_context('device_target') == "GPU":
return bprop_gpu
if context.get_context('device_target') == "GPU" or context.get_context('device_target') == "CPU":
return bprop_grad
return bprop
@ -249,6 +262,7 @@ def _generate_inverse_index(x_shape, axis):
@bprop_getters.register(P.GatherV2)
def get_bprop_gather_v2(self):
"""Generate bprop for GatherV2"""
def bprop(x, indices, axis, out, dout):
if F.rank(dout) == 0:
dout = P.ExpandDims()(dout, -1)
@ -265,6 +279,7 @@ def get_bprop_gather_v2(self):
perm_2 = _generate_inverse_index(x_shp, axis)
params_grad = transpose(params_grad, perm_2)
return params_grad, zeros_like(indices), zeros_like(axis)
return bprop
@ -286,6 +301,7 @@ def get_bprop_pack(self):
pack_grad = P.Unpack(axis)
out = pack_grad(dout)
return (out,)
return bprop
@ -298,6 +314,7 @@ def get_bprop_unpack(self):
unpack_grad = P.Pack(axis)
out = unpack_grad(dout)
return (out,)
return bprop
@ -313,6 +330,7 @@ def get_bprop_strided_slice(self):
def bprop(x, begin, end, strides, out, dout):
dx = input_grad(dout, shape_op(x), begin, end, strides)
return dx, zeros_like(begin), zeros_like(end), zeros_like(strides)
return bprop
@ -322,6 +340,7 @@ def get_bprop_eye(self):
def bprop(n, m, t, out, dout):
return zeros_like(n), zeros_like(m), zeros_like(t)
return bprop
@ -332,6 +351,7 @@ def get_bprop_select(self):
def bprop(cond, x, y, out, dout):
return zeros_like(cond), select(cond, dout, zeros_like(x)), select(cond, zeros_like(y), dout)
return bprop
@ -522,9 +542,11 @@ def get_bprop_unsorted_segment_min(self):
def get_bprop_space_to_batch(self):
"""Generate bprop for SpaceToBatch"""
space_to_batch_grad = P.BatchToSpace(self.block_size, self.paddings)
def bprop(x, out, dout):
dx = space_to_batch_grad(dout)
return (dx,)
return bprop
@ -532,9 +554,11 @@ def get_bprop_space_to_batch(self):
def get_bprop_batch_to_space(self):
"""Generate bprop for BatchToSpace"""
batch_to_space_grad = P.SpaceToBatch(self.block_size, self.crops)
def bprop(x, out, dout):
dx = batch_to_space_grad(dout)
return (dx,)
return bprop

View File

@ -15,7 +15,6 @@
"""Define the grad rules of math related operations."""
from functools import reduce
import numpy as np
from .. import functional as F
@ -26,7 +25,6 @@ from ..functional import broadcast_gradient_args, reduced_shape, tuple_div
from .grad_base import bprop_getters
from ..primitive import constexpr
shape_op = P.Shape()
reduce_sum = P.ReduceSum()
reshape = P.Reshape()
@ -129,6 +127,7 @@ def bprop_matmul(self):
else:
dw = mul2(x, dout)
return dx, dw
return bprop
@ -152,6 +151,7 @@ def bprop_batchmatmul(self):
else:
dw = mul2(x, dout)
return dx, dw
return bprop
@ -161,6 +161,7 @@ def get_bprop_tensor_add(self):
def bprop(x, y, out, dout):
return binop_grad_common(x, y, dout, dout)
return bprop
@ -172,6 +173,7 @@ def get_bprop_neg(self):
def bprop(x, out, dout):
dx = neg_grad(dout)
return (dx,)
return bprop
@ -182,6 +184,7 @@ def get_bprop_sub(self):
def bprop(x, y, out, dout):
return binop_grad_common(x, y, dout, neg_func(dout))
return bprop
@ -194,6 +197,7 @@ def get_bprop_mul(self):
bc_dx = mul_func(dout, y)
bc_dy = mul_func(dout, x)
return binop_grad_common(x, y, bc_dx, bc_dy)
return bprop
@ -208,6 +212,7 @@ def get_bprop_real_div(self):
bc_x = div_op(dout, y)
bc_y = neg(mul_op(bc_x, out))
return binop_grad_common(x, y, bc_x, bc_y)
return bprop
@ -222,6 +227,7 @@ def get_bprop_div(self):
bc_x = div_op(dout, y)
bc_y = neg(mul_op(bc_x, out))
return binop_grad_common(x, y, bc_x, bc_y)
return bprop
@ -235,6 +241,7 @@ def get_bprop_floor(self):
def bprop(x, out, dout):
bc_x = fill_(dtype_(x), shape_(x), 0.)
return (bc_x,)
return bprop
@ -249,6 +256,7 @@ def get_bprop_floordiv(self):
bc_x = div_op(dout, y)
bc_y = neg(mul_op(bc_x, out))
return binop_grad_common(x, y, bc_x, bc_y)
return bprop
@ -260,6 +268,7 @@ def get_bprop_floormod(self):
bc_x = dout
bc_y = -dout * (x // y)
return binop_grad_common(x, y, bc_x, bc_y)
return bprop
@ -274,6 +283,7 @@ def get_bprop_square(self):
temp = mul_func(dout, x)
dx = mul_func(fill_func(dtype(temp), shape_op(x), 2.0), temp)
return (dx,)
return bprop
@ -290,6 +300,7 @@ def get_bprop_sqrt(self):
temp = div_op(fill_func(dtype(x), shape_op(x), 0.5), sqrt(x))
dx = mul_func(dout, temp)
return (dx,)
return bprop
@ -298,9 +309,10 @@ def get_bprop_rsqrt(self):
"""Grad definition for `Rsqrt` operation."""
def bprop(x, out, dout):
grad = F.fill(F.dtype(x), F.shape(x), -0.5) / (F.sqrt(x)*x)
grad = F.fill(F.dtype(x), F.shape(x), -0.5) / (F.sqrt(x) * x)
dx = dout * grad
return (dx,)
return bprop
@ -316,6 +328,7 @@ def get_bprop_reciprocal(self):
g = neg(reciprocal(square(x)))
dx = mul(dout, g)
return (dx,)
return bprop
@ -328,6 +341,7 @@ def get_bprop_log(self):
g = reciprocal(x)
dx = g * dout
return dx, 0
return bprop
@ -341,6 +355,7 @@ def get_bprop_log1p(self):
g = reciprocal(x_1p)
dx = g * dout
return dx, 0
return bprop
@ -358,6 +373,7 @@ def get_bprop_erf(self):
x_square = square(x)
dx = dout * half_root_pi * exp(-x_square)
return (dx,)
return bprop
@ -388,6 +404,7 @@ def get_bprop_pow(self):
bc_dx = power * pow_op(x, power - 1.0) * dout
bc_dpower = out * ln(x) * dout
return binop_grad_common(x, power, bc_dx, bc_dpower)
return bprop
@ -400,6 +417,7 @@ def get_bprop_exp(self):
g = exp_(x)
dx = g * dout
return (dx,)
return bprop
@ -411,6 +429,7 @@ def get_bprop_minimum(self):
def bprop(x, y, out, dout):
dx, dy = input_grad(x, y, dout)
return dx, dy
return bprop
@ -422,6 +441,7 @@ def get_bprop_maximum(self):
def bprop(x, y, out, dout):
dx, dy = input_grad(x, y, dout)
return dx, dy
return bprop
@ -432,6 +452,7 @@ def get_bprop_reducesum(self):
def bprop(x, axis, out, dout):
dx = _sum_grad(x, axis, dout)
return dx, zeros_like(axis)
return bprop
@ -442,6 +463,7 @@ def get_bprop_cumsum(self):
def bprop(x, axis, out, dout):
return cumsum(dout, axis), zeros_like(axis)
return bprop
@ -500,6 +522,7 @@ def get_bprop_reduceprod(self):
out = transpose(y, _invert_permutation(perm)) * grad
dx = reshape(out, input_shape)
return dx, zeros_like(axis)
return bprop
@ -515,6 +538,7 @@ def get_bprop_cumprod(self):
prod = cumprod(x, axis)
out = cumsum(prod * dout, axis)
return out / x, zeros_like(axis)
return bprop
@ -524,6 +548,7 @@ def get_bprop_reduceall(self):
def bprop(x, axis, out, dout):
return zeros_like(x), zeros_like(axis)
return bprop
@ -534,6 +559,7 @@ def get_bprop_reducemax(self):
def bprop(x, axis, out, dout):
dx = _min_or_max_grad(x, axis, out, dout)
return (dx, zeros_like(axis))
return bprop
@ -547,6 +573,7 @@ def get_bprop_argmaxwithvalue(self):
def bprop(x, out, dout):
dx = _argmin_or_argmax_grad(x, axis, keep_dims, op, out, dout)
return (dx,)
return bprop
@ -557,6 +584,7 @@ def get_bprop_reducemin(self):
def bprop(x, axis, out, dout):
dx = _min_or_max_grad(x, axis, out, dout)
return (dx, zeros_like(axis))
return bprop
@ -570,6 +598,7 @@ def get_bprop_argminwithvalue(self):
def bprop(x, out, dout):
dx = _argmin_or_argmax_grad(x, axis, keep_dims, op, out, dout)
return (dx,)
return bprop
@ -585,6 +614,7 @@ def get_bprop_reduce_mean(self):
div_shape = F.shape_mul(shape_op(x)) / F.shape_mul(shape_op(out))
dx = div_op(grad, cast(F.scalar_to_array(div_shape), dtype(grad)))
return dx, zeros_like(axis)
return bprop
@ -604,6 +634,7 @@ def get_bprop_not_equal(self):
def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y)
return bprop
@ -613,6 +644,7 @@ def get_bprop_greater(self):
def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y)
return bprop
@ -622,6 +654,7 @@ def get_bprop_greater_equal(self):
def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y)
return bprop
@ -631,6 +664,7 @@ def get_bprop_less(self):
def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y)
return bprop
@ -640,6 +674,7 @@ def get_bprop_less_equal(self):
def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y)
return bprop
@ -649,6 +684,7 @@ def get_bprop_logical_not(self):
def bprop(x, out, dout):
return (zeros_like(x),)
return bprop
@ -658,6 +694,7 @@ def get_bprop_logical_and(self):
def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y)
return bprop
@ -667,6 +704,7 @@ def get_bprop_logical_or(self):
def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y)
return bprop
@ -676,6 +714,7 @@ def get_bprop_npu_alloc_float_status(self):
def bprop(out, dout):
return ()
return bprop
@ -685,6 +724,7 @@ def get_bprop_npu_get_float_status(self):
def bprop(x, out, dout):
return (zeros_like(x),)
return bprop
@ -694,6 +734,7 @@ def get_bprop_npu_clear_float_status(self):
def bprop(x, out, dout):
return (zeros_like(x),)
return bprop
@ -703,6 +744,7 @@ def get_bprop_assign_add(self):
def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y)
return bprop
@ -712,6 +754,7 @@ def get_bprop_assign_sub(self):
def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y)
return bprop
@ -721,8 +764,9 @@ def get_bprop_sin(self):
cos = P.Cos()
def bprop(x, out, dout):
dx = dout*cos(x)
dx = dout * cos(x)
return (dx,)
return bprop
@ -733,8 +777,9 @@ def get_bprop_cos(self):
neg = P.Neg()
def bprop(x, out, dout):
dx = dout*neg(sin(x))
dx = dout * neg(sin(x))
return (dx,)
return bprop
@ -746,6 +791,7 @@ def get_bprop_acos(self):
def bprop(x, out, dout):
dx = input_grad(x, dout)
return (dx,)
return bprop
@ -757,6 +803,7 @@ def get_bprop_acosh(self):
def bprop(x, out, dout):
dx = input_grad(out, dout)
return (dx,)
return bprop
@ -768,6 +815,7 @@ def get_bprop_abs(self):
def bprop(x, out, dout):
dx = abs_grad(x, dout)
return (dx,)
return bprop
@ -777,6 +825,7 @@ def get_bprop_scalar_cast(self):
def bprop(x, t, out, dout):
return F.scalar_cast(dout, F.typeof(x)), zeros_like(t)
return bprop
@ -789,6 +838,7 @@ def get_bprop_scalar_addn(self):
for _ in range(len(x)):
dx = dx + (dout,)
return dx
return bprop
@ -798,6 +848,7 @@ def get_bprop_sign(self):
def bprop(x, out, dout):
return (zeros_like(x),)
return bprop
@ -807,6 +858,7 @@ def get_bprop_round(self):
def bprop(x, out, dout):
return (zeros_like(x),)
return bprop
@ -821,4 +873,5 @@ def get_bprop_atan2(self):
bc_dx = tmp * y
bc_dy = tmp * (-x)
return binop_grad_common(x, y, bc_dx, bc_dy)
return bprop

View File

@ -21,6 +21,7 @@ from ..operations import _grad_ops as G
from ..operations import _inner_ops as inner
from ..composite.multitype_ops.zeros_like_impl import zeros_like
from .grad_base import bprop_getters
from ... import context
@bprop_getters.register(P.BiasAdd)
@ -551,6 +552,14 @@ def get_bprop_lstm(self):
bidirectional=self.bidirectional,
dropout=self.dropout
)
lstm_grad = G.LSTMGrad(
input_size=self.input_size,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
has_bias=self.has_bias,
bidirectional=self.bidirectional,
dropout=self.dropout
)
def bprop(x, hx, cx, w, out, dout):
y, _, _, reserve, state = out
@ -559,6 +568,16 @@ def get_bprop_lstm(self):
dw = lstm_grad_weight(F.depend(x, dx), hx, y, reserve, state)
return dx, dhx, dcx, dw
#
def bprop_cpu(x, hx, cx, w, out, dout):
y, hy, cy, reserve, _ = out
dy, dhy, dcy, _, _ = dout
dx, dhx, dcx, dw = lstm_grad(x, hx, cx, w, y, hy, cy, dy, dhy, dcy, reserve)
return dx, dhx, dcx, dw
if context.get_context('device_target') == "CPU":
return bprop_cpu
return bprop

View File

@ -107,6 +107,7 @@ class BiasAddGrad(Primitive):
class BinaryCrossEntropyGrad(PrimitiveWithInfer):
"""Computes gradients for `BinaryCrossEntropy` operation."""
@prim_attr_register
def __init__(self, reduction='mean'):
self.reduction = validator.check_string('reduction', reduction, ['none', 'mean', 'sum'], self.name)
@ -665,6 +666,62 @@ class LSTMGradWeight(PrimitiveWithInfer):
return hx_dtype
class LSTMGrad(PrimitiveWithInfer):
"""Computes the data and weight gradients of LSTM."""
@prim_attr_register
def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
self.input_size = validator.check_integer('input_size', input_size, 0, Rel.GT, self.name)
self.hidden_size = validator.check_integer('hidden_size', hidden_size, 0, Rel.GT, self.name)
self.num_layers = validator.check_integer('num_layers', num_layers, 0, Rel.GT, self.name)
self.has_bias = validator.check_value_type('has_bias', has_bias, (bool,), self.name)
self.bidirectional = validator.check_value_type('bidirectional', bidirectional, (bool,), self.name)
self.dropout = validator.check_value_type("dropout", dropout, [float], self.name)
self.dropout = validator.check_number_range('dropout', dropout, 0, 1, Rel.INC_BOTH, self.name)
if bidirectional:
self.num_directions = 2
else:
self.num_directions = 1
def infer_shape(self, x_shape, hx_shape, cx_shape, w_shape, y_shape, hy_shape, cy_shape, dy_shape, dhy_shape,
dcy_shape, reserve_shape):
# dhy and dcy should be same shape
validator.check_integer("h_shape", len(dhy_shape), 3, Rel.EQ, self.name)
validator.check_integer("h_shape", len(dhy_shape), len(dcy_shape), Rel.EQ, self.name)
validator.check_integer("h_shape[0]", dhy_shape[0], dcy_shape[0], Rel.EQ, self.name)
validator.check_integer("h_shape[1]", dhy_shape[1], dcy_shape[1], Rel.EQ, self.name)
validator.check_integer("h_shape[2]", dhy_shape[2], dcy_shape[2], Rel.EQ, self.name)
validator.check_integer("h_shape[0]", dhy_shape[0], self.num_layers * self.num_directions, Rel.EQ, self.name)
validator.check_integer("h_shape[2]", dhy_shape[2], self.hidden_size, Rel.EQ, self.name)
# dy: (seq_len, batch_size, hidden_size * num_directions)
validator.check_integer("dy_shape", len(dy_shape), 3, Rel.EQ, self.name)
validator.check_integer("dy[1]", dy_shape[1], dhy_shape[1], Rel.EQ, self.name)
validator.check_integer("dy[2]", dy_shape[2], self.hidden_size * self.num_directions, Rel.EQ, self.name)
# (seq_len, batch_size, input_size)
dx_shape = (y_shape[0], y_shape[1], self.input_size)
dhx_shape = dhy_shape
dcx_shape = dcy_shape
weight_size = 0
gate_size = 4 * self.hidden_size
for layer in range(self.num_layers):
for _ in range(self.num_directions):
input_layer_size = self.input_size if layer == 0 else self.hidden_size * self.num_directions
weight_size += gate_size * input_layer_size
weight_size += gate_size * self.hidden_size
if self.has_bias:
weight_size += gate_size
return (dx_shape, dhx_shape, dcx_shape, (weight_size, 1, 1))
def infer_dtype(self, x_dtype, hx_dtype, cx_dtype, w_dtype, y_dtype, hy_dtype, cy_dtype, dy_dtype, dhy_dtype,
dcy_dtype, reserve_dtype):
return (dy_dtype, dy_dtype, dy_dtype, hx_dtype)
class PReLUGrad(PrimitiveWithInfer):
r"""
Gradients of PReLU operation.
@ -1051,6 +1108,7 @@ class RefToEmbed(Primitive):
__mindspore_signature__ = (
('variable', sig_rw.RW_REF, sig_kind.KIND_POSITIONAL_KEYWORD),
)
@prim_attr_register
def __init__(self):
pass

View File

@ -35,9 +35,11 @@ def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=Fals
"""
Checks whether an argument is a positive int or tuple with 2 or 4(when allow_four is True) positive int elements.
"""
def _raise_message():
raise ValueError(f"For '{prim_name}' attr '{arg_name}' should be an positive int number or a tuple of two "
f"{'or four ' if allow_four else ''}positive int numbers, but got {arg_value}")
def _get_return_value():
if isinstance(arg_value, int):
ret = (1, 1, arg_value, arg_value) if ret_four else (arg_value, arg_value)
@ -50,6 +52,7 @@ def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=Fals
else:
_raise_message()
return ret
validator.check_value_type(arg_name, arg_value, (int, tuple), prim_name)
ret_value = _get_return_value()
for item in ret_value:
@ -58,6 +61,7 @@ def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=Fals
_raise_message()
return ret_value
class Flatten(PrimitiveWithInfer):
r"""
Flattens a tensor without changing its batch size on the 0-th axis.
@ -205,6 +209,7 @@ class Softplus(PrimitiveWithInfer):
>>> softplus(input_x)
[1.3132615, 2.126928, 3.0485873, 4.01815, 5.0067153]
"""
@prim_attr_register
def __init__(self):
"""init Softplus"""
@ -301,6 +306,7 @@ class ReLUV2(PrimitiveWithInfer):
([[[[1., 0.], [0., 4.]], [[0., 6.], [7., 0.]]]],
[[[[1, 0], [2, 0]], [[2, 0], [1, 0]]]])
"""
@prim_attr_register
def __init__(self):
"""init ReLUV2"""
@ -398,6 +404,7 @@ class HSwish(PrimitiveWithInfer):
>>> input_x = Tensor(np.array([-1, -2, 0, 2, 1]), mindspore.float16)
>>> result = hswish(input_x)
"""
@prim_attr_register
def __init__(self):
self.init_prim_io_names(inputs=['x'], outputs=['output'])
@ -1076,6 +1083,7 @@ class MaxPoolWithArgmax(_Pool):
>>> maxpool_arg_op = P.MaxPoolWithArgmax(padding="VALID", ksize=2, strides=1)
>>> output_tensor, argmax = maxpool_arg_op(input_tensor)
"""
def __init__(self, ksize=1, strides=1, padding="valid"):
super(MaxPoolWithArgmax, self).__init__(ksize, strides, padding)
self.is_tbe = context.get_context("device_target") == "Ascend"
@ -1494,6 +1502,7 @@ class ApplyMomentum(PrimitiveWithInfer):
('gradient', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD),
('momentum', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD)
)
@prim_attr_register
def __init__(self, use_nesterov=False, use_locking=False, gradient_scale=1.0):
self.init_prim_io_names(inputs=['variable', 'accumulation', 'learning_rate', 'gradient', 'momentum'],
@ -1583,6 +1592,7 @@ class L2Loss(PrimitiveWithInfer):
>>> l2_loss(input_x)
7.0
"""
@prim_attr_register
def __init__(self):
"""init L2Loss"""
@ -2325,7 +2335,29 @@ class LSTM(PrimitiveWithInfer):
y_shape = (x_shape[0], x_shape[1], self.hidden_size * self.num_directions)
# set arbitrary shape for reserved space
reserved_shape = (1, 1)
type_size = 4
gates_ws_ld = self.get_good_ld(self.hidden_size * 4, type_size)
states_ws_ld = self.get_good_ld(max(self.hidden_size, self.input_size), type_size)
self.ws_gates_size = self.num_layers * self.num_directions * x_shape[0] * x_shape[1] * gates_ws_ld * type_size
self.ws_states_size = (self.num_layers + 1) * self.num_directions * (x_shape[0] + 1) * x_shape[
1] * states_ws_ld * type_size
self.ws_c_states_size = (self.num_layers + 1) * self.num_directions * (x_shape[0] + 1) * x_shape[
1] * states_ws_ld * type_size
self.ws_diff_states_size = (self.num_layers + 1) * self.num_directions * (x_shape[0] + 1) * (2 + 1) * x_shape[
1] * states_ws_ld * type_size
self.ws_grid_comp_size = 0
self.page_size = 4096
current_offset = 0
current_offset += self.ws_gates_size
current_offset = self.rnd_up(current_offset, self.page_size)
current_offset += self.ws_states_size
current_offset = self.rnd_up(current_offset, self.page_size)
current_offset += self.ws_c_states_size
current_offset = self.rnd_up(current_offset, self.page_size)
current_offset += self.ws_diff_states_size
current_offset = self.rnd_up(current_offset, self.page_size)
current_offset += self.ws_grid_comp_size
reserved_shape = (current_offset, 1)
state_shape = (1, 1)
return (y_shape, h_shape, c_shape, reserved_shape, state_shape)
@ -2334,6 +2366,15 @@ class LSTM(PrimitiveWithInfer):
validator.check_tensor_type_same(args, (mstype.float32, mstype.float16), self.name)
return (x_dtype, x_dtype, x_dtype, x_dtype, x_dtype)
def rnd_up(self, current_offset, page_size):
return ((current_offset + page_size - 1) // page_size) * page_size
def get_good_ld(self, dim, type_size):
ld = self.rnd_up(dim, 64 // type_size)
if ld * 256 == 0:
return ld + 64 // type_size
return ld
class SigmoidCrossEntropyWithLogits(PrimitiveWithInfer):
r"""
@ -2999,6 +3040,7 @@ class Dropout(PrimitiveWithInfer):
>>> in = Tensor((20, 16, 50, 50))
>>> out = dropout(in)
"""
@prim_attr_register
def __init__(self, drop_prob=0):
self.drop_prob = validator.check_number_range("drop_prob", drop_prob, 0, 1, Rel.INC_BOTH, self.name)
@ -3033,6 +3075,7 @@ class DropoutGrad(PrimitiveWithInfer):
>>> in = Tensor((20, 16, 50, 50))
>>> out = dropout_grad(in)
"""
@prim_attr_register
def __init__(self, drop_prob=0):
self.drop_prob = validator.check_number_range("drop_prob", drop_prob, 0, 1, Rel.INC_BOTH, self.name)
@ -3083,6 +3126,7 @@ class CTCLoss(PrimitiveWithInfer):
>>> ctc_loss = P.CTCloss()
>>> output = ctc_loss(inputs, labels_indices, labels_values, sequence_length)
"""
@prim_attr_register
def __init__(self, preprocess_collapse_repeated=False, ctc_merge_repeated=False,
ignore_longer_outputs_than_inputs=False):

View File

@ -0,0 +1,335 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import pytest
import mindspore.nn as nn
from mindspore.common.api import ms_function
import numpy as np
import mindspore.context as context
from mindspore.common.initializer import initializer
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from mindspore.common.tensor import Tensor
from mindspore.common.parameter import ParameterTuple, Parameter
context.set_context(device_target='CPU')
class LstmNet(nn.Cell):
def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
super(LstmNet, self).__init__()
num_directions = 1
if bidirectional:
num_directions = 2
self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
input_np = np.array([[[0.6755, -1.6607, 0.1367], [0.4276, -0.7850, -0.3758]],
[[-0.6424, -0.6095, 0.6639], [0.7918, 0.4147, -0.5089]],
[[-1.5612, 0.0120, -0.7289], [-0.6656, -0.6626, -0.5883]],
[[-0.9667, -0.6296, -0.7310], [0.1026, -0.6821, -0.4387]],
[[-0.4710, 0.6558, -0.3144], [-0.8449, -0.2184, -0.1806]]
]).astype(np.float32)
self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
self.h = Parameter(initializer(
Tensor(
np.array([0.1, 0.1, 0.1, 0.1]).reshape((num_layers * num_directions, batch_size, hidden_size)).astype(
np.float32)),
[num_layers * num_directions, batch_size, hidden_size]), name='h')
self.c = Parameter(initializer(
Tensor(
np.array([0.2, 0.2, 0.2, 0.2]).reshape((num_layers * num_directions, batch_size, hidden_size)).astype(
np.float32)),
[num_layers * num_directions, batch_size, hidden_size]), name='c')
wih = np.array([[3.4021e-01, -4.6622e-01, 4.5117e-01],
[-6.4257e-02, -2.4807e-01, 1.3550e-02], # i
[-3.2140e-01, 5.5578e-01, 6.3589e-01],
[1.6547e-01, -7.9030e-02, -2.0045e-01],
[-6.9863e-01, 5.9773e-01, -3.9062e-01],
[-3.0253e-01, -1.9464e-01, 7.0591e-01],
[-4.0835e-01, 3.6751e-01, 4.7989e-01],
[-5.6894e-01, -5.0359e-01, 4.7491e-01]]).astype(np.float32) # .reshape([1,-1])
whh = np.array([[-0.4820, -0.2350],
[-0.1195, 0.0519],
[0.2162, -0.1178],
[0.6237, 0.0711],
[0.4511, -0.3961],
[-0.5962, 0.0906],
[0.1867, -0.1225],
[0.1831, 0.0850]]).astype(np.float32) # .reshape([1,-1])
wih = wih.transpose((1, 0))
whh = whh.transpose((1, 0))
bih = np.zeros((1, 8)).astype(np.float32)
w_np = np.concatenate((wih, whh, bih), axis=0).reshape([-1, 1, 1])
self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')
@ms_function
def construct(self):
return self.lstm(self.x, self.h, self.c, self.w)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_lstm():
seq_len = 5
batch_size = 2
input_size = 3
hidden_size = 2
num_layers = 1
has_bias = True
bidirectional = False
dropout = 0.0
num_directions = 1
if bidirectional:
num_directions = 2
net = LstmNet(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
y, h, c, _, _ = net()
print(y)
print(c)
print(h)
expect_y = np.array([[[-0.16709016, 0.13125697],
[-0.08438572, -0.01969833]],
[[-0.2746155, 0.32764038],
[-0.06504016, -0.07770399]],
[[-0.00140004, 0.17706314],
[0.03244496, -0.10135599]],
[[0.08328028, 0.06437367],
[-0.04133911, -0.11072896]],
[[0.19004421, -0.02852732],
[0.09138509, -0.00344161]]]
)
error = np.ones([num_layers, batch_size, hidden_size]) * 1.0e-4
diff = y.asnumpy() - expect_y
assert np.all(diff < error)
assert np.all(-diff < error)
#
expect_h = np.array([[[0.19004421, -0.02852732],
[0.09138509, -0.00344161]]])
error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4
diff = h.asnumpy() - expect_h
assert np.all(diff < error)
assert np.all(-diff < error)
#
expect_c = np.array([[[0.34533143, -0.06313794],
[0.169008, -0.00555446]]])
error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4
diff = c.asnumpy() - expect_c
assert np.all(diff < error)
assert np.all(-diff < error)
class MultiLayerBiLstmNet(nn.Cell):
def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
super(MultiLayerBiLstmNet, self).__init__()
num_directions = 1
if bidirectional:
num_directions = 2
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias,
bidirectional=bidirectional, dropout=dropout)
input_np = np.array([[[-0.1887, -0.4144, -0.0235, 0.7489, 0.7522, 0.5969, 0.3342, 1.2198, 0.6786, -0.9404],
[-0.8643, -1.6835, -2.4965, 2.8093, 0.1741, 0.2707, 0.7387, -0.0939, -1.7990, 0.4765]],
[[-0.5963, -1.2598, -0.7226, 1.1365, -1.7320, -0.7302, 0.1221, -0.2111, -1.6173, -0.0706],
[0.8964, 0.1737, -1.0077, -0.1389, 0.4889, 0.4391, 0.7911, 0.3614, -1.9533, -0.9936]],
[[0.3260, -1.3312, 0.0601, 1.0726, -1.6010, -1.8733, -1.5775, 1.1579, -0.8801, -0.5742],
[-2.2998, -0.6344, -0.5409, -0.9221, -0.6500, 0.1206, 1.5215, 0.7517, 1.3691, 2.0021]],
[[-0.1245, -0.3690, 2.1193, 1.3852, -0.1841, -0.8899, -0.3646, -0.8575, -0.3131, 0.2026],
[1.0218, -1.4331, 0.1744, 0.5442, -0.7808, 0.2527, 0.1566, 1.1484, -0.7766, -0.6747]],
[[-0.6752, 0.9906, -0.4973, 0.3471, -0.1202, -0.4213, 2.0213, 0.0441, 0.9016, 1.0365],
[1.2223, -1.3248, 0.1207, -0.8256, 0.1816, 0.7057, -0.3105, 0.5713, 0.2804,
-1.0685]]]).astype(np.float32)
self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
self.h0 = Parameter(initializer(
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
[num_directions, batch_size, hidden_size]), name='h0')
self.c0 = Parameter(initializer(
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
[num_directions, batch_size, hidden_size]), name='c0')
self.h1 = Parameter(initializer(
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
[num_directions, batch_size, hidden_size]), name='h1')
self.c1 = Parameter(initializer(
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
[num_directions, batch_size, hidden_size]), name='c1')
self.h = ParameterTuple((self.h0, self.h1))
self.c = ParameterTuple((self.c0, self.c1))
@ms_function
def construct(self):
return self.lstm(self.x, (self.h, self.c))
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_multi_layer_bilstm():
seq_len = 5
batch_size = 2
input_size = 10
hidden_size = 2
num_layers = 2
has_bias = True
bidirectional = True
dropout = 0.0
num_directions = 1
if bidirectional:
num_directions = 2
net = MultiLayerBiLstmNet(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional,
dropout)
y, h, c, _, _ = net()
print(y)
print(h)
print(c)
class Grad(nn.Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
self.weights = ParameterTuple(network.trainable_params())
self.grad = C.GradOperation('grad',
get_by_list=True,
sens_param=True)
@ms_function
def construct(self, output_grad):
weights = self.weights
grads = self.grad(self.network, weights)(output_grad)
return grads
class Net(nn.Cell):
def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
super(Net, self).__init__()
num_directions = 1
if bidirectional:
num_directions = 2
input_np = np.array([[[-0.5907, 1.0557, 1.7283, 0.6706, -1.2550, -0.5298, -0.2290, -0.6735, 0.8555, 1.4836],
[-1.7070, -0.5347, -0.9105, -0.2598, 0.0588, 1.5496, 1.0757, 0.3760, -1.2020, -0.2868]],
[[0.0151, 0.2126, 0.8090, -0.5292, -2.5590, 0.4279, -0.3081, -1.4706, -0.0498, 1.2301],
[0.4165, -0.5391, -0.0996, 0.1928, -0.4909, -0.1255, 0.4444, -1.3687, 1.3096, 0.6553]],
[[-0.7802, -0.2083, -0.6388, 1.3757, 0.4293, 0.5363, 0.3202, -0.6687, -1.3864, -0.2953],
[1.0799, -0.7204, 0.1130, -0.5857, -0.4855, -1.1068, 1.0126, 0.8716, 1.5460, -0.7392]],
[[2.2645, -0.6586, -0.2227, 1.4290, -0.5006, -1.6576, -0.1793, 0.5319, 0.1360, 0.2707],
[-0.4071, 0.1575, 1.4199, -0.9156, 0.1855, 0.4947, 1.0460, -0.6365, 0.1191, -0.6374]],
[[0.2468, 1.0815, -0.4893, 0.0664, 0.6405, -2.2967, 0.7612, 0.8759, 0.5685, -1.0999],
[-0.7272, -1.7750, -0.1164, -0.7159, 0.0061, -0.7839, -1.8329, 0.3434, -0.5634,
0.5384]]]).astype(np.float32)
self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
self.h0 = Parameter(initializer(
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
[num_directions, batch_size, hidden_size]), name='h0')
self.c0 = Parameter(initializer(
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
[num_directions, batch_size, hidden_size]), name='c0')
wih_l0 = np.array([[0.2300, 0.6668, 0.4703, 0.0425, 0.0464, 0.6825, 0.2249, -0.4315, -0.2449, 0.2964],
[-0.2811, -0.3444, 0.2557, -0.5137, -0.5518, 0.1652, -0.6720, 0.1066, 0.3586, 0.6299],
[0.5728, -0.1784, 0.5661, 0.4012, 0.3856, -0.1899, 0.3102, 0.3717, -0.5651, 0.1952],
[0.1026, -0.0527, 0.1198, -0.3080, 0.2292, 0.5757, -0.3567, -0.2731, -0.0586, -0.2849],
[0.2194, -0.1622, 0.3219, -0.3008, -0.3713, -0.3034, -0.2385, 0.0412, -0.5205, 0.0280],
[-0.5499, -0.0733, -0.5236, -0.6753, -0.7045, -0.1839, -0.1037, -0.5026, -0.4055, -0.3416],
[0.1573, -0.1301, -0.2882, -0.3464, 0.6643, 0.1980, -0.6804, 0.5359, 0.5996, 0.0124],
[-0.6436, 0.0587, -0.6520, -0.0471, 0.1667, 0.6042, 0.5752, -0.6296, -0.2976,
-0.3757]]).astype(np.float32).reshape([1, -1])
whh_l0 = np.array([[0.3358, 0.2790],
[-0.5355, 0.0989],
[-0.1402, 0.5120],
[0.1335, 0.1653],
[0.3533, -0.3531],
[0.4166, -0.4420],
[-0.5454, -0.1720],
[0.0041, -0.0799]]).astype(np.float32).reshape([1, -1])
bih_l0 = np.array([0.5518, 0.1083, 0.4829, 0.0607, -0.1770, -0.6944, 0.3059, 0.5354]).astype(
np.float32).reshape([1, -1])
bhh_l0 = np.array([0.5025, -0.1261, -0.5405, 0.3220, -0.3441, 0.6488, -0.0284, -0.2334]).astype(
np.float32).reshape([1, -1])
w0_np = np.concatenate(
(wih_l0, whh_l0, bih_l0 + bhh_l0),
axis=1).reshape([-1, 1, 1])
self.w0 = Parameter(initializer(Tensor(w0_np), w0_np.shape), name='w0')
self.lstm = P.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
has_bias=has_bias, bidirectional=bidirectional, dropout=dropout)
@ms_function
def construct(self):
return self.lstm(self.x, self.h0, self.c0, self.w0)[0]
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_grad():
seq_len = 5
batch_size = 2
input_size = 10
hidden_size = 2
num_layers = 1
has_bias = True
bidirectional = False
dropout = 0.0
num_directions = 1
if bidirectional:
num_directions = 2
net = Grad(Net(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout))
dy = np.array([[[-3.5471e-01, 7.0540e-01],
[2.7161e-01, 1.0865e+00]],
[[-4.2431e-01, 1.4955e+00],
[-4.0418e-01, -2.3282e-01]],
[[-1.3654e+00, 1.9251e+00],
[-4.6481e-01, 1.3138e+00]],
[[1.2914e+00, -2.3753e-01],
[5.3589e-01, -1.0981e-01]],
[[-1.6032e+00, -1.8818e-01],
[1.0065e-01, 9.2045e-01]]]).astype(np.float32)
dx, dhx, dcx, dw = net(Tensor(dy))
print(dx)
print(dhx)
print(dcx)
print(dw)
# test_multi_layer_bilstm()
# test_lstm()
# tf_lstm_test()
# test_grad()