This commit is contained in:
baihuawei 2020-05-28 09:09:56 +08:00
parent 650a45b233
commit 9bcdf4cbdc
15 changed files with 1187 additions and 39 deletions

View File

@ -0,0 +1,120 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/lstm_cpu_kernel.h"
#include <string>
#include "common/utils.h"
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
void LstmCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional");
input_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "input_size");
hidden_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "hidden_size");
num_layers_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "num_layers");
batch_size_ = SizeToInt(src_shape[1]);
seq_len_ = SizeToInt(src_shape[0]);
num_directions_ = 1;
if (bidirectional_) {
num_directions_ = 2;
}
int gate_size = 4 * hidden_size_;
for (int i = 0; i < num_layers_; ++i) {
weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_);
weight_h_size_ += gate_size * hidden_size_;
}
weight_size_ = weight_size_ * num_directions_;
weight_h_size_ = weight_h_size_ * num_directions_;
}
bool LstmCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
using dt = dnnl::memory::data_type;
using tag = dnnl::memory::format_tag;
using dim = dnnl::memory::dims;
auto eng = MKLKernelEngine::Get().engine();
dnnl::stream s(eng);
auto formatted_md = [](dim dimensions, tag layout) { return dnnl::memory::desc{{dimensions}, dt::f32, layout}; };
dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional;
if (bidirectional_) {
direction = dnnl::rnn_direction::bidirectional_concat;
}
dim src_dims = {seq_len_, batch_size_, input_size_};
dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim weights_dims = {num_layers_, num_directions_, input_size_, 4, hidden_size_};
dim weights_h_dims = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_};
dim bias_dims = {num_layers_, num_directions_, 4, hidden_size_};
dim dst_dims = {seq_len_, batch_size_, hidden_size_ * num_directions_};
dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc);
dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc);
dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc);
dnnl::memory::desc weights_desc = formatted_md(weights_dims, tag::ldigo);
dnnl::memory::desc weights_h_desc = formatted_md(weights_h_dims, tag::ldigo);
dnnl::memory::desc bias_desc = formatted_md(bias_dims, tag::ldgo);
dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc);
dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc);
dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc);
dnnl::lstm_forward::desc desc =
dnnl::lstm_forward::desc(dnnl::prop_kind::forward_training, direction, src_desc, src_h_desc, src_c_desc,
weights_desc, weights_h_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc);
auto prim_desc = dnnl::lstm_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
auto workspace_memory = dnnl::memory(prim_desc.workspace_desc(), eng);
auto src_memory = dnnl::memory(formatted_md(src_dims, tag::tnc), eng);
write_to_dnnl_memory(inputs[0]->addr, src_memory);
auto src_h_memory = dnnl::memory(prim_desc.src_iter_desc(), eng);
auto src_c_memory = dnnl::memory(prim_desc.src_iter_c_desc(), eng);
write_to_dnnl_memory(inputs[1]->addr, src_h_memory);
write_to_dnnl_memory(inputs[2]->addr, src_c_memory);
auto weights_memory = dnnl::memory(formatted_md(weights_dims, tag::ldigo), eng);
auto weights_h_memory = dnnl::memory(formatted_md(weights_h_dims, tag::ldigo), eng);
auto bias_memory = dnnl::memory(formatted_md(bias_dims, tag::ldgo), eng);
write_to_dnnl_memory(inputs[3]->addr, weights_memory);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_, weights_h_memory);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_ + weight_h_size_, bias_memory);
auto dst_memory = dnnl::memory(formatted_md(dst_dims, tag::tnc), eng);
auto dst_h_memory = dnnl::memory(prim_desc.dst_iter_desc(), eng);
auto dst_c_memory = dnnl::memory(prim_desc.dst_iter_c_desc(), eng);
dnnl::lstm_forward fw_layer(prim_desc);
workspace_memory.set_data_handle(outputs[3]->addr);
dst_memory.set_data_handle(outputs[0]->addr);
dst_h_memory.set_data_handle(outputs[1]->addr);
dst_c_memory.set_data_handle(outputs[2]->addr);
fw_layer.execute(s, {{DNNL_ARG_SRC_LAYER, src_memory},
{DNNL_ARG_SRC_ITER, src_h_memory},
{DNNL_ARG_SRC_ITER_C, src_c_memory},
{DNNL_ARG_WEIGHTS_LAYER, weights_memory},
{DNNL_ARG_WEIGHTS_ITER, weights_h_memory},
{DNNL_ARG_BIAS, bias_memory},
{DNNL_ARG_DST_LAYER, dst_memory},
{DNNL_ARG_DST_ITER, dst_h_memory},
{DNNL_ARG_DST_ITER_C, dst_c_memory},
{DNNL_ARG_WORKSPACE, workspace_memory}});
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,59 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H
#define MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H
#include <vector>
#include <memory>
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {
class LstmCPUKernel : public MKLCPUKernel {
public:
LstmCPUKernel() = default;
~LstmCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
int weight_size_ = 0;
int weight_h_size_ = 0;
int input_size_;
int hidden_size_;
int num_layers_;
int batch_size_;
int seq_len_;
int num_directions_;
bool bidirectional_;
};
MS_REG_CPU_KERNEL(LSTM,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
LstmCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_CPU_LSTM_CPU_KERNEL_H

View File

@ -0,0 +1,169 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kernel/cpu/mkldnn/lstm_grad_cpu_kernel.h"
#include <cstring>
#include <cmath>
#include <numeric>
#include <string>
#include "common/utils.h"
#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
#include "device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
void LSTMGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional");
input_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "input_size");
hidden_size_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "hidden_size");
num_layers_ = AnfAlgo::GetNodeAttr<int>(kernel_node, "num_layers");
batch_size_ = SizeToInt(src_shape[1]);
seq_len_ = SizeToInt(src_shape[0]);
num_directions_ = 1;
if (bidirectional_) {
num_directions_ = 2;
}
int gate_size = 4 * hidden_size_;
for (int i = 0; i < num_layers_; ++i) {
weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_);
weight_h_size_ += gate_size * hidden_size_;
}
weight_size_ = weight_size_ * num_directions_;
weight_h_size_ = weight_h_size_ * num_directions_;
}
bool LSTMGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
using tag = dnnl::memory::format_tag;
using dt = dnnl::memory::data_type;
using dim = dnnl::memory::dims;
auto eng = MKLKernelEngine::Get().engine();
dnnl::stream s(eng);
auto formatted_md = [](dim dimensions, tag layout) { return dnnl::memory::desc{{dimensions}, dt::f32, layout}; };
auto generic_md = [](dim dimensions) { return dnnl::memory::desc{{dimensions}, dt::f32, tag::any}; };
dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional;
if (bidirectional_) {
direction = dnnl::rnn_direction::bidirectional_concat;
}
dim src_dims = {seq_len_, batch_size_, input_size_};
dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim weights_dims = {num_layers_, num_directions_, input_size_, 4, hidden_size_};
dim weights_h_dims = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_};
dim bias_dims = {num_layers_, num_directions_, 4, hidden_size_};
dim dst_dims = {seq_len_, batch_size_, hidden_size_ * num_directions_};
dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};
dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc);
dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc);
dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc);
dnnl::memory::desc weights_desc = formatted_md(weights_dims, tag::ldigo);
dnnl::memory::desc weights_h_desc = formatted_md(weights_h_dims, tag::ldigo);
dnnl::memory::desc bias_desc = formatted_md(bias_dims, tag::ldgo);
dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc);
dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc);
dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc);
dnnl::lstm_forward::desc forward_desc =
dnnl::lstm_forward::desc(dnnl::prop_kind::forward_training, direction, src_desc, src_h_desc, src_c_desc,
weights_desc, weights_h_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc);
auto prim_forward_desc = dnnl::lstm_forward::primitive_desc(forward_desc, eng);
dnnl::lstm_backward::desc backward_desc = dnnl::lstm_backward::desc(
dnnl::prop_kind::backward, direction, src_desc, src_h_desc, src_c_desc, generic_md(weights_dims),
generic_md(weights_h_dims), generic_md(bias_dims), dst_desc, dst_h_desc, dst_c_desc, src_desc, src_h_desc,
src_c_desc, weights_desc, weights_h_desc, bias_desc, dst_desc, dst_h_desc, dst_c_desc);
auto prim_backward_desc = dnnl::lstm_backward::primitive_desc(backward_desc, eng, prim_forward_desc);
// construct fw memory
auto src_memory = dnnl::memory(formatted_md(src_dims, tag::tnc), eng);
write_to_dnnl_memory(inputs[0]->addr, src_memory);
auto src_h_memory = dnnl::memory(prim_forward_desc.src_iter_desc(), eng);
auto src_c_memory = dnnl::memory(prim_forward_desc.src_iter_c_desc(), eng);
write_to_dnnl_memory(inputs[1]->addr, src_h_memory);
write_to_dnnl_memory(inputs[2]->addr, src_c_memory);
auto user_weights_memory = dnnl::memory(formatted_md(weights_dims, tag::ldigo), eng);
auto user_weights_h_memory = dnnl::memory(formatted_md(weights_h_dims, tag::ldigo), eng);
auto user_bias_memory = dnnl::memory(formatted_md(bias_dims, tag::ldgo), eng);
write_to_dnnl_memory(inputs[3]->addr, user_weights_memory);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_, user_weights_h_memory);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_ + weight_h_size_, user_bias_memory);
auto weights_memory = dnnl::memory(prim_backward_desc.weights_layer_desc(), eng);
auto weights_h_memory = dnnl::memory(prim_backward_desc.weights_iter_desc(), eng);
auto bias_memory = dnnl::memory(prim_forward_desc.bias_desc(), eng);
dnnl::reorder(user_weights_memory, weights_memory).execute(s, user_weights_memory, weights_memory);
dnnl::reorder(user_weights_h_memory, weights_h_memory).execute(s, user_weights_h_memory, weights_h_memory);
dnnl::reorder(user_bias_memory, bias_memory).execute(s, user_bias_memory, bias_memory);
auto dst_memory = dnnl::memory(formatted_md(dst_dims, tag::tnc), eng);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[4]->addr), dst_memory);
auto dst_h_memory = dnnl::memory(prim_backward_desc.dst_iter_desc(), eng);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[5]->addr), dst_h_memory);
auto dst_c_memory = dnnl::memory(prim_backward_desc.dst_iter_c_desc(), eng);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[6]->addr), dst_c_memory);
auto workspace_memory = dnnl::memory(prim_forward_desc.workspace_desc(), eng);
write_to_dnnl_memory(inputs[10]->addr, workspace_memory);
// construct diff memory
auto diff_src_memory = dnnl::memory(formatted_md(src_dims, tag::tnc), eng);
auto diff_src_h_memory = dnnl::memory(prim_backward_desc.diff_src_iter_desc(), eng);
auto diff_src_c_memory = dnnl::memory(prim_backward_desc.diff_src_iter_c_desc(), eng);
auto diff_weights_memory = dnnl::memory(prim_backward_desc.diff_weights_layer_desc(), eng);
auto diff_weights_h_memory = dnnl::memory(prim_backward_desc.diff_weights_iter_desc(), eng);
auto diff_bias_memory = dnnl::memory(prim_backward_desc.diff_bias_desc(), eng);
auto diff_dst_memory = dnnl::memory(formatted_md(dst_dims, tag::tnc), eng);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[7]->addr), diff_dst_memory);
auto diff_dst_h_memory = dnnl::memory(prim_backward_desc.diff_dst_iter_desc(), eng);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[8]->addr), diff_dst_h_memory);
auto diff_dst_c_memory = dnnl::memory(prim_backward_desc.diff_dst_iter_c_desc(), eng);
write_to_dnnl_memory(reinterpret_cast<float *>(inputs[9]->addr), diff_dst_c_memory);
diff_src_memory.set_data_handle(outputs[0]->addr);
diff_src_h_memory.set_data_handle(outputs[1]->addr);
diff_src_c_memory.set_data_handle(outputs[2]->addr);
diff_weights_memory.set_data_handle(outputs[3]->addr);
diff_weights_h_memory.set_data_handle(reinterpret_cast<float *>(outputs[3]->addr) + weight_size_);
diff_bias_memory.set_data_handle(reinterpret_cast<float *>(outputs[3]->addr) + weight_size_ + weight_h_size_);
dnnl::lstm_backward bwd_layer(prim_backward_desc);
bwd_layer.execute(s, {{DNNL_ARG_SRC_LAYER, src_memory},
{DNNL_ARG_SRC_ITER, src_h_memory},
{DNNL_ARG_SRC_ITER_C, src_c_memory},
{DNNL_ARG_WEIGHTS_LAYER, weights_memory},
{DNNL_ARG_WEIGHTS_ITER, weights_h_memory},
{DNNL_ARG_BIAS, bias_memory},
{DNNL_ARG_DST_LAYER, dst_memory},
{DNNL_ARG_DST_ITER, dst_h_memory},
{DNNL_ARG_DST_ITER_C, dst_c_memory},
{DNNL_ARG_DIFF_SRC_LAYER, diff_src_memory},
{DNNL_ARG_DIFF_SRC_ITER, diff_src_h_memory},
{DNNL_ARG_DIFF_SRC_ITER_C, diff_src_c_memory},
{DNNL_ARG_DIFF_WEIGHTS_LAYER, diff_weights_memory},
{DNNL_ARG_DIFF_WEIGHTS_ITER, diff_weights_h_memory},
{DNNL_ARG_DIFF_BIAS, diff_bias_memory},
{DNNL_ARG_DIFF_DST_LAYER, diff_dst_memory},
{DNNL_ARG_DIFF_DST_ITER, diff_dst_h_memory},
{DNNL_ARG_DIFF_DST_ITER_C, diff_dst_c_memory},
{DNNL_ARG_WORKSPACE, workspace_memory}});
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,67 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {
class LSTMGradCPUKernel : public MKLCPUKernel {
public:
LSTMGradCPUKernel() = default;
~LSTMGradCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
int weight_size_ = 0;
int weight_h_size_ = 0;
int input_size_;
int hidden_size_;
int num_layers_;
int batch_size_;
int seq_len_;
int num_directions_;
bool bidirectional_;
};
MS_REG_CPU_KERNEL(LSTMGrad,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
LSTMGradCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_KERNEL_CPU_LSTM_GRAD_CPU_KERNEL_H_

View File

@ -1,5 +1,5 @@
/** /**
* Copyright 2019 Huawei Technologies Co., Ltd * Copyright 2020 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@ -98,5 +98,11 @@ void MKLCPUKernel::SetArgumentHandle(int arg_key, void *ptr) {
} }
void MKLCPUKernel::ExecutePrimitive() { MKLKernelEngine::Get().Execute(primitive_, arguments_); } void MKLCPUKernel::ExecutePrimitive() { MKLKernelEngine::Get().Execute(primitive_, arguments_); }
void MKLCPUKernel::write_to_dnnl_memory(void *handle, const dnnl::memory &mem) {
MKLKernelEngine::Get().write_to_dnnl_memory(handle, mem);
}
void MKLCPUKernel::read_from_dnnl_memory(void *handle, const dnnl::memory &mem) {
MKLKernelEngine::Get().read_from_dnnl_memory(handle, mem);
}
} // namespace kernel } // namespace kernel
} // namespace mindspore } // namespace mindspore

View File

@ -1,5 +1,5 @@
/** /**
* Copyright 2019 Huawei Technologies Co., Ltd * Copyright 2020 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@ -39,6 +39,8 @@ class MKLCPUKernel : public CPUKernel {
dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const; dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const;
dnnl::memory::desc GetDefaultMemDesc(const std::vector<size_t> &shape); dnnl::memory::desc GetDefaultMemDesc(const std::vector<size_t> &shape);
void ExecutePrimitive(); void ExecutePrimitive();
void write_to_dnnl_memory(void *handle, const dnnl::memory &mem);
void read_from_dnnl_memory(void *handle, const dnnl::memory &mem);
std::unordered_map<int, dnnl::memory> arguments_; std::unordered_map<int, dnnl::memory> arguments_;
std::shared_ptr<dnnl::primitive> primitive_{nullptr}; std::shared_ptr<dnnl::primitive> primitive_{nullptr};
}; };

View File

@ -1,5 +1,5 @@
/** /**
* Copyright 2019 Huawei Technologies Co., Ltd * Copyright 2020 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.

View File

@ -1,5 +1,5 @@
/** /**
* Copyright 2019 Huawei Technologies Co., Ltd * Copyright 2020 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@ -15,7 +15,10 @@
*/ */
#ifndef MINDSPORE_MKL_KERNEL_ENGINE_H_ #ifndef MINDSPORE_MKL_KERNEL_ENGINE_H_
#define MINDSPORE_MKL_KERNEL_ENGINE_H_ #define MINDSPORE_MKL_KERNEL_ENGINE_H_
#include <cstdlib>
#include <algorithm>
#include <iostream>
#include <string>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include <memory> #include <memory>
@ -39,6 +42,30 @@ class MKLKernelEngine {
void Execute(const std::shared_ptr<dnnl::primitive> &primitive, void Execute(const std::shared_ptr<dnnl::primitive> &primitive,
const std::unordered_map<int, dnnl::memory> &arguments); const std::unordered_map<int, dnnl::memory> &arguments);
inline void read_from_dnnl_memory(void *handle, const dnnl::memory &mem) {
dnnl::engine eng = mem.get_engine();
size_t bytes = mem.get_desc().get_size();
if (eng.get_kind() == dnnl::engine::kind::cpu) {
auto dst = reinterpret_cast<uint8_t *>(handle);
uint8_t *src = reinterpret_cast<uint8_t *>(mem.get_data_handle());
for (size_t i = 0; i < bytes; ++i) {
dst[i] = src[i];
}
}
}
// Read from handle, write to memory
inline void write_to_dnnl_memory(void *handle, const dnnl::memory &mem) {
dnnl::engine eng = mem.get_engine();
size_t bytes = mem.get_desc().get_size();
if (eng.get_kind() == dnnl::engine::kind::cpu) {
auto src = reinterpret_cast<uint8_t *>(handle);
uint8_t *dst = reinterpret_cast<uint8_t *>(mem.get_data_handle());
for (size_t i = 0; i < bytes; ++i) {
dst[i] = src[i];
}
}
}
private: private:
MKLKernelEngine() : engine_(dnnl::engine::kind::cpu, 0), stream_(engine_) {} MKLKernelEngine() : engine_(dnnl::engine::kind::cpu, 0), stream_(engine_) {}
~MKLKernelEngine() = default; ~MKLKernelEngine() = default;

View File

@ -18,8 +18,13 @@ from mindspore.nn.cell import Cell
from mindspore.common.parameter import Parameter from mindspore.common.parameter import Parameter
from mindspore.common.initializer import initializer from mindspore.common.initializer import initializer
from mindspore._checkparam import Validator as validator from mindspore._checkparam import Validator as validator
from mindspore import context
import mindspore.nn as nn
from mindspore.common.tensor import Tensor
import numpy as np
__all__ = ['LSTM', 'LSTMCell']
__all__ = ['LSTM']
class LSTM(Cell): class LSTM(Cell):
r""" r"""
@ -102,6 +107,7 @@ class LSTM(Cell):
>>> c0 = Tensor(np.ones([1 * 2, 3, 12]).astype(np.float32)) >>> c0 = Tensor(np.ones([1 * 2, 3, 12]).astype(np.float32))
>>> output, (hn, cn) = net(input, h0, c0) >>> output, (hn, cn) = net(input, h0, c0)
""" """
def __init__(self, def __init__(self,
input_size, input_size,
hidden_size, hidden_size,
@ -118,39 +124,198 @@ class LSTM(Cell):
self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name) self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name)
self.dropout = float(dropout) self.dropout = float(dropout)
self.bidirectional = bidirectional self.bidirectional = bidirectional
if self.batch_first: if self.batch_first:
self.transpose1 = P.Transpose() self.transpose1 = P.Transpose()
self.transpose2 = P.Transpose() self.transpose2 = P.Transpose()
self.lstm = P.LSTM(input_size=self.input_size,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
has_bias=self.has_bias,
bidirectional=self.bidirectional,
dropout=self.dropout)
num_directions = 2 if self.bidirectional else 1 num_directions = 2 if self.bidirectional else 1
self.cpu_target = False
weight_size = 0 if context.get_context("device_target") == "CPU":
gate_size = 4 * self.hidden_size self.cpu_target = True
for layer in range(self.num_layers): if not self.cpu_target:
input_layer_size = self.input_size if layer == 0 else self.hidden_size * num_directions self.lstm = P.LSTM(input_size=self.input_size,
increment_size = gate_size * input_layer_size hidden_size=self.hidden_size,
increment_size += gate_size * self.hidden_size num_layers=self.num_layers,
if self.has_bias: has_bias=self.has_bias,
increment_size += 2 * gate_size bidirectional=self.bidirectional,
weight_size += increment_size * num_directions dropout=self.dropout)
weight_size = 0
self.weight = Parameter(initializer(0.0, [weight_size, 1, 1]), name='weight') gate_size = 4 * self.hidden_size
for layer in range(self.num_layers):
input_layer_size = self.input_size if layer == 0 else self.hidden_size * num_directions
increment_size = gate_size * input_layer_size
increment_size += gate_size * self.hidden_size
if self.has_bias:
increment_size += 2 * gate_size
weight_size += increment_size * num_directions
self.weight = Parameter(initializer(0.0, [weight_size, 1, 1]), name='weight')
else:
layer = []
layer.append(nn.LSTMCell(input_size=self.input_size,
hidden_size=self.hidden_size,
layer_index=0,
has_bias=self.has_bias,
bidirectional=self.bidirectional,
dropout=self.dropout))
for i in range(num_layers - 1):
layer.append(nn.LSTMCell(input_size=self.hidden_size * num_directions,
hidden_size=self.hidden_size,
layer_index=i + 1,
has_bias=self.has_bias,
bidirectional=self.bidirectional,
dropout=self.dropout))
self.lstms = layer
self.fill = P.Fill() self.fill = P.Fill()
self.shape = P.Shape() self.shape = P.Shape()
def construct(self, x, hx): def construct(self, x, hx):
if self.batch_first: if self.batch_first:
x = self.transpose1(x, (1, 0, 2)) x = self.transpose1(x, (1, 0, 2))
h0, c0 = hx if not self.cpu_target:
output, hn, cn, _, _ = self.lstm(x, h0, c0, self.weight) h, c = hx
output, h, c, _, _ = self.lstm(x, h, c, self.weight)
if self.batch_first:
output = self.transpose2(output, (1, 0, 2))
return (output, (h, c))
h, c = hx
output, hn, cn, _, _ = self.lstms[0](x, h[0], c[0])
for i in range(1, self.num_layers):
output, hn, cn, _, _ = self.lstms[i](output, h[i], c[i])
if self.batch_first: if self.batch_first:
output = self.transpose2(output, (1, 0, 2)) output = self.transpose2(output, (1, 0, 2))
return (output, (hn, cn)) return output, hn, cn, _, _
class LSTMCell(Cell):
r"""
LSTM (Long Short-Term Memory) layer.
Applies a LSTM layer to the input.
There are two pipelines connecting two consecutive cells in a LSTM model; one is cell state pipeline
and another is hidden state pipeline. Denote two consecutive time nodes as :math:`t-1` and :math:`t`.
Given an input :math:`x_t` at time :math:`t`, an hidden state :math:`h_{t-1}` and an cell
state :math:`c_{t-1}` of the layer at time :math:`{t-1}`, the cell state and hidden state at
time :math:`t` is computed using an gating mechanism. Input gate :math:`i_t` is designed to protect the cell
from perturbation by irrelevant inputs. Forget gate :math:`f_t` affords protection of the cell by forgetting
some information in the past, which is stored in :math:`h_{t-1}`. Output gate :math:`o_t` protects other
units from perturbation by currently irrelevant memory contents. Candidate cell state :math:`\tilde{c}_t` is
calculated with the current input, on which the input gate will be applied. Finally, current cell state
:math:`c_{t}` and hidden state :math:`h_{t}` are computed with the calculated gates and cell states. The complete
formulation is as follows.
.. math::
\begin{array}{ll} \\
i_t = \sigma(W_{ix} x_t + b_{ix} + W_{ih} h_{(t-1)} + b_{ih}) \\
f_t = \sigma(W_{fx} x_t + b_{fx} + W_{fh} h_{(t-1)} + b_{fh}) \\
\tilde{c}_t = \tanh(W_{cx} x_t + b_{cx} + W_{ch} h_{(t-1)} + b_{ch}) \\
o_t = \sigma(W_{ox} x_t + b_{ox} + W_{oh} h_{(t-1)} + b_{oh}) \\
c_t = f_t * c_{(t-1)} + i_t * \tilde{c}_t \\
h_t = o_t * \tanh(c_t) \\
\end{array}
Here :math:`\sigma` is the sigmoid function, and :math:`*` is the Hadamard product. :math:`W, b`
are learnable weights between the output and the input in the formula. For instance,
:math:`W_{ix}, b_{ix}` are the weight and bias used to transform from input :math:`x` to :math:`i`.
Details can be found in paper `LONG SHORT-TERM MEMORY
<https://www.bioinf.jku.at/publications/older/2604.pdf>`_ and
`Long Short-Term Memory Recurrent Neural Network Architectures for Large Scale Acoustic Modeling
<https://static.googleusercontent.com/media/research.google.com/zh-CN//pubs/archive/43905.pdf>`_.
Args:
input_size (int): Number of features of input.
hidden_size (int): Number of features of hidden layer.
layer_index (int): index of current layer of stacked LSTM . Default: 0.
has_bias (bool): Specifies whether has bias `b_ih` and `b_hh`. Default: True.
batch_first (bool): Specifies whether the first dimension of input is batch_size. Default: False.
dropout (float, int): If not 0, append `Dropout` layer on the outputs of each
LSTM layer except the last layer. Default 0. The range of dropout is [0.0, 1.0].
bidirectional (bool): Specifies whether this is a bidirectional LSTM. If set True,
number of directions will be 2 otherwise number of directions is 1. Default: False.
Inputs:
- **input** (Tensor) - Tensor of shape (seq_len, batch_size, `input_size`).
- **h** - data type mindspore.float32 or
mindspore.float16 and shape (num_directions * `num_layers`, batch_size, `hidden_size`).
- **c** - data type mindspore.float32 or
mindspore.float16 and shape (num_directions * `num_layers`, batch_size, `hidden_size`).
Data type of `h' and 'c' should be the same of `input`.
Outputs:
`output`, `h_n`, `c_n`, 'reserve', 'state'.
- **output** (Tensor) - Tensor of shape (seq_len, batch_size, num_directions * `hidden_size`).
- **h** - A Tensor with shape (num_directions * `num_layers`, batch_size, `hidden_size`).
- **c** - A Tensor with shape (num_directions * `num_layers`, batch_size, `hidden_size`).
- **reserve** - reserved
- **state** - reserved
Examples:
>>> class LstmNet(nn.Cell):
>>> def __init__(self, input_size, hidden_size, layer_index, has_bias, batch_first, bidirectional):
>>> super(LstmNet, self).__init__()
>>> self.lstm = nn.LSTMCell(input_size=input_size,
>>> hidden_size=hidden_size,
>>> layer_index=layer_index,
>>> has_bias=has_bias,
>>> batch_first=batch_first,
>>> bidirectional=bidirectional,
>>> dropout=0.0)
>>>
>>> def construct(self, inp, h0, c0):
>>> return self.lstm(inp, (h0, c0))
>>>
>>> net = LstmNet(10, 12, 2, has_bias=True, batch_first=True, bidirectional=False)
>>> input = Tensor(np.ones([3, 5, 10]).astype(np.float32))
>>> h0 = Tensor(np.ones([1 * 2, 3, 12]).astype(np.float32))
>>> c0 = Tensor(np.ones([1 * 2, 3, 12]).astype(np.float32))
>>> output, hn, cn, _, _ = net(input, h0, c0)
"""
def __init__(self,
input_size,
hidden_size,
layer_index=0,
has_bias=True,
batch_first=False,
dropout=0,
bidirectional=False):
super(LSTMCell, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = 1
self.layer_index = layer_index
self.has_bias = has_bias
self.batch_first = validator.check_value_type("batch_first", batch_first, [bool], self.cls_name)
self.dropout = float(dropout)
self.bidirectional = bidirectional
self.num_directions = 1
if self.bidirectional:
self.num_directions = 2
if self.batch_first:
self.transpose1 = P.Transpose()
self.transpose2 = P.Transpose()
w_np = np.ones([(self.input_size + self.hidden_size) * self.num_directions * self.hidden_size * 4, 1]).astype(
np.float32) * 0.01
if has_bias:
b_np = np.ones([self.num_directions * self.hidden_size * 4, 1]).astype(
np.float32) * 0.01
else:
b_np = np.zeros([self.num_directions * self.hidden_size * 4, 1]).astype(
np.float32) * 0.01
wb_np = np.concatenate((w_np, b_np), axis=0).reshape([-1, 1, 1])
self.w = Parameter(initializer(Tensor(wb_np), wb_np.shape), name='w' + str(self.layer_index))
self.lstm = P.LSTM(input_size=self.input_size,
hidden_size=self.hidden_size,
num_layers=1,
has_bias=self.has_bias,
bidirectional=self.bidirectional,
dropout=self.dropout)
def construct(self, x, h, c):
if self.batch_first:
x = self.transpose1(x, (1, 0, 2))
output, hn, cn, _, _ = self.lstm(x, h, c, self.w)
if self.batch_first:
output = self.transpose2(output, (1, 0, 2))
return output, hn, cn, _, _

View File

@ -49,6 +49,7 @@ def get_bprop_dtype(self):
def bprop(x, out, dout): def bprop(x, out, dout):
return (zeros_like(x),) return (zeros_like(x),)
return bprop return bprop
@ -61,6 +62,7 @@ def get_bprop_cast(self):
def bprop(x, t, out, dout): def bprop(x, t, out, dout):
dx = cast(dout, get_dtype(x)) dx = cast(dout, get_dtype(x))
return dx, zeros_like(t) return dx, zeros_like(t)
return bprop return bprop
@ -70,6 +72,7 @@ def get_bprop_shape(self):
def bprop(x, out, dout): def bprop(x, out, dout):
return (zeros_like(x),) return (zeros_like(x),)
return bprop return bprop
@ -82,6 +85,7 @@ def get_bprop_split(self):
concat_op = P.Concat(axis) concat_op = P.Concat(axis)
dx = concat_op(dout) dx = concat_op(dout)
return (dx,) return (dx,)
return bprop return bprop
@ -91,6 +95,7 @@ def get_bprop_rank(self):
def bprop(x, out, dout): def bprop(x, out, dout):
return (zeros_like(x),) return (zeros_like(x),)
return bprop return bprop
@ -101,6 +106,7 @@ def get_bprop_reshape(self):
def bprop(x, shp, out, dout): def bprop(x, shp, out, dout):
shapex = shape_op(x) shapex = shape_op(x)
return reshape(dout, shapex), zeros_like(shp) return reshape(dout, shapex), zeros_like(shp)
return bprop return bprop
@ -111,6 +117,7 @@ def get_bprop_expand_dims(self):
def bprop(x, axis, out, dout): def bprop(x, axis, out, dout):
shapex = shape_op(x) shapex = shape_op(x)
return reshape(dout, shapex), zeros_like(axis) return reshape(dout, shapex), zeros_like(axis)
return bprop return bprop
@ -121,6 +128,7 @@ def get_bprop_squeeze(self):
def bprop(x, out, dout): def bprop(x, out, dout):
shapex = shape_op(x) shapex = shape_op(x)
return (reshape(dout, shapex),) return (reshape(dout, shapex),)
return bprop return bprop
@ -132,6 +140,7 @@ def get_bprop_flatten(self):
def bprop(x, out, dout): def bprop(x, out, dout):
dx = flatten_grad(dout, shape_op(x)) dx = flatten_grad(dout, shape_op(x))
return (dx,) return (dx,)
return bprop return bprop
@ -166,6 +175,7 @@ def _tile_shape(multiples, shapex):
@bprop_getters.register(P.Tile) @bprop_getters.register(P.Tile)
def get_bprop_tile(self): def get_bprop_tile(self):
"""Generate bprop for Tile""" """Generate bprop for Tile"""
def bprop(x, multiples, out, dout): def bprop(x, multiples, out, dout):
shapex = shape_op(x) shapex = shape_op(x)
r_shape = _tile_shape(multiples, shapex) r_shape = _tile_shape(multiples, shapex)
@ -174,6 +184,7 @@ def get_bprop_tile(self):
dx = reduce_sum(reshape(dout, r_shape), axis) dx = reduce_sum(reshape(dout, r_shape), axis)
dx = reshape(dx, shapex) dx = reshape(dx, shapex)
return dx, zeros_like(multiples) return dx, zeros_like(multiples)
return bprop return bprop
@ -183,6 +194,7 @@ def get_bprop_transpose(self):
def bprop(x, perm, out, dout): def bprop(x, perm, out, dout):
return transpose(dout, invert_permutation(perm)), zeros_like(perm) return transpose(dout, invert_permutation(perm)), zeros_like(perm)
return bprop return bprop
@ -198,6 +210,7 @@ def get_bprop_concat(self):
slice_out = P.Slice()(dout, out_offset[i], shape_op(x[i])) slice_out = P.Slice()(dout, out_offset[i], shape_op(x[i]))
dx = dx + (slice_out,) dx = dx + (slice_out,)
return (dx,) return (dx,)
return bprop return bprop
@ -215,12 +228,12 @@ def get_bprop_slice(self):
dx = P.Pad(_slice_grad_pad(begin, size, shape_op(x)))(dout) dx = P.Pad(_slice_grad_pad(begin, size, shape_op(x)))(dout)
return (dx, zeros_like(begin), zeros_like(size)) return (dx, zeros_like(begin), zeros_like(size))
def bprop_gpu(x, begin, size, out, dout): def bprop_grad(x, begin, size, out, dout):
dx = dx = G.SliceGrad()(dout, x, begin, size) dx = dx = G.SliceGrad()(dout, x, begin, size)
return (dx, zeros_like(begin), zeros_like(size)) return (dx, zeros_like(begin), zeros_like(size))
if context.get_context('device_target') == "GPU": if context.get_context('device_target') == "GPU" or context.get_context('device_target') == "CPU":
return bprop_gpu return bprop_grad
return bprop return bprop
@ -249,6 +262,7 @@ def _generate_inverse_index(x_shape, axis):
@bprop_getters.register(P.GatherV2) @bprop_getters.register(P.GatherV2)
def get_bprop_gather_v2(self): def get_bprop_gather_v2(self):
"""Generate bprop for GatherV2""" """Generate bprop for GatherV2"""
def bprop(x, indices, axis, out, dout): def bprop(x, indices, axis, out, dout):
if F.rank(dout) == 0: if F.rank(dout) == 0:
dout = P.ExpandDims()(dout, -1) dout = P.ExpandDims()(dout, -1)
@ -265,6 +279,7 @@ def get_bprop_gather_v2(self):
perm_2 = _generate_inverse_index(x_shp, axis) perm_2 = _generate_inverse_index(x_shp, axis)
params_grad = transpose(params_grad, perm_2) params_grad = transpose(params_grad, perm_2)
return params_grad, zeros_like(indices), zeros_like(axis) return params_grad, zeros_like(indices), zeros_like(axis)
return bprop return bprop
@ -286,6 +301,7 @@ def get_bprop_pack(self):
pack_grad = P.Unpack(axis) pack_grad = P.Unpack(axis)
out = pack_grad(dout) out = pack_grad(dout)
return (out,) return (out,)
return bprop return bprop
@ -298,6 +314,7 @@ def get_bprop_unpack(self):
unpack_grad = P.Pack(axis) unpack_grad = P.Pack(axis)
out = unpack_grad(dout) out = unpack_grad(dout)
return (out,) return (out,)
return bprop return bprop
@ -313,6 +330,7 @@ def get_bprop_strided_slice(self):
def bprop(x, begin, end, strides, out, dout): def bprop(x, begin, end, strides, out, dout):
dx = input_grad(dout, shape_op(x), begin, end, strides) dx = input_grad(dout, shape_op(x), begin, end, strides)
return dx, zeros_like(begin), zeros_like(end), zeros_like(strides) return dx, zeros_like(begin), zeros_like(end), zeros_like(strides)
return bprop return bprop
@ -322,6 +340,7 @@ def get_bprop_eye(self):
def bprop(n, m, t, out, dout): def bprop(n, m, t, out, dout):
return zeros_like(n), zeros_like(m), zeros_like(t) return zeros_like(n), zeros_like(m), zeros_like(t)
return bprop return bprop
@ -332,6 +351,7 @@ def get_bprop_select(self):
def bprop(cond, x, y, out, dout): def bprop(cond, x, y, out, dout):
return zeros_like(cond), select(cond, dout, zeros_like(x)), select(cond, zeros_like(y), dout) return zeros_like(cond), select(cond, dout, zeros_like(x)), select(cond, zeros_like(y), dout)
return bprop return bprop
@ -522,9 +542,11 @@ def get_bprop_unsorted_segment_min(self):
def get_bprop_space_to_batch(self): def get_bprop_space_to_batch(self):
"""Generate bprop for SpaceToBatch""" """Generate bprop for SpaceToBatch"""
space_to_batch_grad = P.BatchToSpace(self.block_size, self.paddings) space_to_batch_grad = P.BatchToSpace(self.block_size, self.paddings)
def bprop(x, out, dout): def bprop(x, out, dout):
dx = space_to_batch_grad(dout) dx = space_to_batch_grad(dout)
return (dx,) return (dx,)
return bprop return bprop
@ -532,7 +554,9 @@ def get_bprop_space_to_batch(self):
def get_bprop_batch_to_space(self): def get_bprop_batch_to_space(self):
"""Generate bprop for BatchToSpace""" """Generate bprop for BatchToSpace"""
batch_to_space_grad = P.SpaceToBatch(self.block_size, self.crops) batch_to_space_grad = P.SpaceToBatch(self.block_size, self.crops)
def bprop(x, out, dout): def bprop(x, out, dout):
dx = batch_to_space_grad(dout) dx = batch_to_space_grad(dout)
return (dx,) return (dx,)
return bprop return bprop

View File

@ -15,7 +15,6 @@
"""Define the grad rules of math related operations.""" """Define the grad rules of math related operations."""
from functools import reduce from functools import reduce
import numpy as np import numpy as np
from .. import functional as F from .. import functional as F
@ -26,7 +25,6 @@ from ..functional import broadcast_gradient_args, reduced_shape, tuple_div
from .grad_base import bprop_getters from .grad_base import bprop_getters
from ..primitive import constexpr from ..primitive import constexpr
shape_op = P.Shape() shape_op = P.Shape()
reduce_sum = P.ReduceSum() reduce_sum = P.ReduceSum()
reshape = P.Reshape() reshape = P.Reshape()
@ -129,6 +127,7 @@ def bprop_matmul(self):
else: else:
dw = mul2(x, dout) dw = mul2(x, dout)
return dx, dw return dx, dw
return bprop return bprop
@ -152,6 +151,7 @@ def bprop_batchmatmul(self):
else: else:
dw = mul2(x, dout) dw = mul2(x, dout)
return dx, dw return dx, dw
return bprop return bprop
@ -161,6 +161,7 @@ def get_bprop_tensor_add(self):
def bprop(x, y, out, dout): def bprop(x, y, out, dout):
return binop_grad_common(x, y, dout, dout) return binop_grad_common(x, y, dout, dout)
return bprop return bprop
@ -172,6 +173,7 @@ def get_bprop_neg(self):
def bprop(x, out, dout): def bprop(x, out, dout):
dx = neg_grad(dout) dx = neg_grad(dout)
return (dx,) return (dx,)
return bprop return bprop
@ -182,6 +184,7 @@ def get_bprop_sub(self):
def bprop(x, y, out, dout): def bprop(x, y, out, dout):
return binop_grad_common(x, y, dout, neg_func(dout)) return binop_grad_common(x, y, dout, neg_func(dout))
return bprop return bprop
@ -194,6 +197,7 @@ def get_bprop_mul(self):
bc_dx = mul_func(dout, y) bc_dx = mul_func(dout, y)
bc_dy = mul_func(dout, x) bc_dy = mul_func(dout, x)
return binop_grad_common(x, y, bc_dx, bc_dy) return binop_grad_common(x, y, bc_dx, bc_dy)
return bprop return bprop
@ -208,6 +212,7 @@ def get_bprop_real_div(self):
bc_x = div_op(dout, y) bc_x = div_op(dout, y)
bc_y = neg(mul_op(bc_x, out)) bc_y = neg(mul_op(bc_x, out))
return binop_grad_common(x, y, bc_x, bc_y) return binop_grad_common(x, y, bc_x, bc_y)
return bprop return bprop
@ -222,6 +227,7 @@ def get_bprop_div(self):
bc_x = div_op(dout, y) bc_x = div_op(dout, y)
bc_y = neg(mul_op(bc_x, out)) bc_y = neg(mul_op(bc_x, out))
return binop_grad_common(x, y, bc_x, bc_y) return binop_grad_common(x, y, bc_x, bc_y)
return bprop return bprop
@ -235,6 +241,7 @@ def get_bprop_floor(self):
def bprop(x, out, dout): def bprop(x, out, dout):
bc_x = fill_(dtype_(x), shape_(x), 0.) bc_x = fill_(dtype_(x), shape_(x), 0.)
return (bc_x,) return (bc_x,)
return bprop return bprop
@ -249,6 +256,7 @@ def get_bprop_floordiv(self):
bc_x = div_op(dout, y) bc_x = div_op(dout, y)
bc_y = neg(mul_op(bc_x, out)) bc_y = neg(mul_op(bc_x, out))
return binop_grad_common(x, y, bc_x, bc_y) return binop_grad_common(x, y, bc_x, bc_y)
return bprop return bprop
@ -260,6 +268,7 @@ def get_bprop_floormod(self):
bc_x = dout bc_x = dout
bc_y = -dout * (x // y) bc_y = -dout * (x // y)
return binop_grad_common(x, y, bc_x, bc_y) return binop_grad_common(x, y, bc_x, bc_y)
return bprop return bprop
@ -274,6 +283,7 @@ def get_bprop_square(self):
temp = mul_func(dout, x) temp = mul_func(dout, x)
dx = mul_func(fill_func(dtype(temp), shape_op(x), 2.0), temp) dx = mul_func(fill_func(dtype(temp), shape_op(x), 2.0), temp)
return (dx,) return (dx,)
return bprop return bprop
@ -290,6 +300,7 @@ def get_bprop_sqrt(self):
temp = div_op(fill_func(dtype(x), shape_op(x), 0.5), sqrt(x)) temp = div_op(fill_func(dtype(x), shape_op(x), 0.5), sqrt(x))
dx = mul_func(dout, temp) dx = mul_func(dout, temp)
return (dx,) return (dx,)
return bprop return bprop
@ -298,9 +309,10 @@ def get_bprop_rsqrt(self):
"""Grad definition for `Rsqrt` operation.""" """Grad definition for `Rsqrt` operation."""
def bprop(x, out, dout): def bprop(x, out, dout):
grad = F.fill(F.dtype(x), F.shape(x), -0.5) / (F.sqrt(x)*x) grad = F.fill(F.dtype(x), F.shape(x), -0.5) / (F.sqrt(x) * x)
dx = dout * grad dx = dout * grad
return (dx,) return (dx,)
return bprop return bprop
@ -316,6 +328,7 @@ def get_bprop_reciprocal(self):
g = neg(reciprocal(square(x))) g = neg(reciprocal(square(x)))
dx = mul(dout, g) dx = mul(dout, g)
return (dx,) return (dx,)
return bprop return bprop
@ -328,6 +341,7 @@ def get_bprop_log(self):
g = reciprocal(x) g = reciprocal(x)
dx = g * dout dx = g * dout
return dx, 0 return dx, 0
return bprop return bprop
@ -341,6 +355,7 @@ def get_bprop_log1p(self):
g = reciprocal(x_1p) g = reciprocal(x_1p)
dx = g * dout dx = g * dout
return dx, 0 return dx, 0
return bprop return bprop
@ -358,6 +373,7 @@ def get_bprop_erf(self):
x_square = square(x) x_square = square(x)
dx = dout * half_root_pi * exp(-x_square) dx = dout * half_root_pi * exp(-x_square)
return (dx,) return (dx,)
return bprop return bprop
@ -388,6 +404,7 @@ def get_bprop_pow(self):
bc_dx = power * pow_op(x, power - 1.0) * dout bc_dx = power * pow_op(x, power - 1.0) * dout
bc_dpower = out * ln(x) * dout bc_dpower = out * ln(x) * dout
return binop_grad_common(x, power, bc_dx, bc_dpower) return binop_grad_common(x, power, bc_dx, bc_dpower)
return bprop return bprop
@ -400,6 +417,7 @@ def get_bprop_exp(self):
g = exp_(x) g = exp_(x)
dx = g * dout dx = g * dout
return (dx,) return (dx,)
return bprop return bprop
@ -411,6 +429,7 @@ def get_bprop_minimum(self):
def bprop(x, y, out, dout): def bprop(x, y, out, dout):
dx, dy = input_grad(x, y, dout) dx, dy = input_grad(x, y, dout)
return dx, dy return dx, dy
return bprop return bprop
@ -422,6 +441,7 @@ def get_bprop_maximum(self):
def bprop(x, y, out, dout): def bprop(x, y, out, dout):
dx, dy = input_grad(x, y, dout) dx, dy = input_grad(x, y, dout)
return dx, dy return dx, dy
return bprop return bprop
@ -432,6 +452,7 @@ def get_bprop_reducesum(self):
def bprop(x, axis, out, dout): def bprop(x, axis, out, dout):
dx = _sum_grad(x, axis, dout) dx = _sum_grad(x, axis, dout)
return dx, zeros_like(axis) return dx, zeros_like(axis)
return bprop return bprop
@ -442,6 +463,7 @@ def get_bprop_cumsum(self):
def bprop(x, axis, out, dout): def bprop(x, axis, out, dout):
return cumsum(dout, axis), zeros_like(axis) return cumsum(dout, axis), zeros_like(axis)
return bprop return bprop
@ -500,6 +522,7 @@ def get_bprop_reduceprod(self):
out = transpose(y, _invert_permutation(perm)) * grad out = transpose(y, _invert_permutation(perm)) * grad
dx = reshape(out, input_shape) dx = reshape(out, input_shape)
return dx, zeros_like(axis) return dx, zeros_like(axis)
return bprop return bprop
@ -515,6 +538,7 @@ def get_bprop_cumprod(self):
prod = cumprod(x, axis) prod = cumprod(x, axis)
out = cumsum(prod * dout, axis) out = cumsum(prod * dout, axis)
return out / x, zeros_like(axis) return out / x, zeros_like(axis)
return bprop return bprop
@ -524,6 +548,7 @@ def get_bprop_reduceall(self):
def bprop(x, axis, out, dout): def bprop(x, axis, out, dout):
return zeros_like(x), zeros_like(axis) return zeros_like(x), zeros_like(axis)
return bprop return bprop
@ -534,6 +559,7 @@ def get_bprop_reducemax(self):
def bprop(x, axis, out, dout): def bprop(x, axis, out, dout):
dx = _min_or_max_grad(x, axis, out, dout) dx = _min_or_max_grad(x, axis, out, dout)
return (dx, zeros_like(axis)) return (dx, zeros_like(axis))
return bprop return bprop
@ -547,6 +573,7 @@ def get_bprop_argmaxwithvalue(self):
def bprop(x, out, dout): def bprop(x, out, dout):
dx = _argmin_or_argmax_grad(x, axis, keep_dims, op, out, dout) dx = _argmin_or_argmax_grad(x, axis, keep_dims, op, out, dout)
return (dx,) return (dx,)
return bprop return bprop
@ -557,6 +584,7 @@ def get_bprop_reducemin(self):
def bprop(x, axis, out, dout): def bprop(x, axis, out, dout):
dx = _min_or_max_grad(x, axis, out, dout) dx = _min_or_max_grad(x, axis, out, dout)
return (dx, zeros_like(axis)) return (dx, zeros_like(axis))
return bprop return bprop
@ -570,6 +598,7 @@ def get_bprop_argminwithvalue(self):
def bprop(x, out, dout): def bprop(x, out, dout):
dx = _argmin_or_argmax_grad(x, axis, keep_dims, op, out, dout) dx = _argmin_or_argmax_grad(x, axis, keep_dims, op, out, dout)
return (dx,) return (dx,)
return bprop return bprop
@ -585,6 +614,7 @@ def get_bprop_reduce_mean(self):
div_shape = F.shape_mul(shape_op(x)) / F.shape_mul(shape_op(out)) div_shape = F.shape_mul(shape_op(x)) / F.shape_mul(shape_op(out))
dx = div_op(grad, cast(F.scalar_to_array(div_shape), dtype(grad))) dx = div_op(grad, cast(F.scalar_to_array(div_shape), dtype(grad)))
return dx, zeros_like(axis) return dx, zeros_like(axis)
return bprop return bprop
@ -604,6 +634,7 @@ def get_bprop_not_equal(self):
def bprop(x, y, out, dout): def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y) return zeros_like(x), zeros_like(y)
return bprop return bprop
@ -613,6 +644,7 @@ def get_bprop_greater(self):
def bprop(x, y, out, dout): def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y) return zeros_like(x), zeros_like(y)
return bprop return bprop
@ -622,6 +654,7 @@ def get_bprop_greater_equal(self):
def bprop(x, y, out, dout): def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y) return zeros_like(x), zeros_like(y)
return bprop return bprop
@ -631,6 +664,7 @@ def get_bprop_less(self):
def bprop(x, y, out, dout): def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y) return zeros_like(x), zeros_like(y)
return bprop return bprop
@ -640,6 +674,7 @@ def get_bprop_less_equal(self):
def bprop(x, y, out, dout): def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y) return zeros_like(x), zeros_like(y)
return bprop return bprop
@ -649,6 +684,7 @@ def get_bprop_logical_not(self):
def bprop(x, out, dout): def bprop(x, out, dout):
return (zeros_like(x),) return (zeros_like(x),)
return bprop return bprop
@ -658,6 +694,7 @@ def get_bprop_logical_and(self):
def bprop(x, y, out, dout): def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y) return zeros_like(x), zeros_like(y)
return bprop return bprop
@ -667,6 +704,7 @@ def get_bprop_logical_or(self):
def bprop(x, y, out, dout): def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y) return zeros_like(x), zeros_like(y)
return bprop return bprop
@ -676,6 +714,7 @@ def get_bprop_npu_alloc_float_status(self):
def bprop(out, dout): def bprop(out, dout):
return () return ()
return bprop return bprop
@ -685,6 +724,7 @@ def get_bprop_npu_get_float_status(self):
def bprop(x, out, dout): def bprop(x, out, dout):
return (zeros_like(x),) return (zeros_like(x),)
return bprop return bprop
@ -694,6 +734,7 @@ def get_bprop_npu_clear_float_status(self):
def bprop(x, out, dout): def bprop(x, out, dout):
return (zeros_like(x),) return (zeros_like(x),)
return bprop return bprop
@ -703,6 +744,7 @@ def get_bprop_assign_add(self):
def bprop(x, y, out, dout): def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y) return zeros_like(x), zeros_like(y)
return bprop return bprop
@ -712,6 +754,7 @@ def get_bprop_assign_sub(self):
def bprop(x, y, out, dout): def bprop(x, y, out, dout):
return zeros_like(x), zeros_like(y) return zeros_like(x), zeros_like(y)
return bprop return bprop
@ -721,8 +764,9 @@ def get_bprop_sin(self):
cos = P.Cos() cos = P.Cos()
def bprop(x, out, dout): def bprop(x, out, dout):
dx = dout*cos(x) dx = dout * cos(x)
return (dx,) return (dx,)
return bprop return bprop
@ -733,8 +777,9 @@ def get_bprop_cos(self):
neg = P.Neg() neg = P.Neg()
def bprop(x, out, dout): def bprop(x, out, dout):
dx = dout*neg(sin(x)) dx = dout * neg(sin(x))
return (dx,) return (dx,)
return bprop return bprop
@ -746,6 +791,7 @@ def get_bprop_acos(self):
def bprop(x, out, dout): def bprop(x, out, dout):
dx = input_grad(x, dout) dx = input_grad(x, dout)
return (dx,) return (dx,)
return bprop return bprop
@ -757,6 +803,7 @@ def get_bprop_acosh(self):
def bprop(x, out, dout): def bprop(x, out, dout):
dx = input_grad(out, dout) dx = input_grad(out, dout)
return (dx,) return (dx,)
return bprop return bprop
@ -768,6 +815,7 @@ def get_bprop_abs(self):
def bprop(x, out, dout): def bprop(x, out, dout):
dx = abs_grad(x, dout) dx = abs_grad(x, dout)
return (dx,) return (dx,)
return bprop return bprop
@ -777,6 +825,7 @@ def get_bprop_scalar_cast(self):
def bprop(x, t, out, dout): def bprop(x, t, out, dout):
return F.scalar_cast(dout, F.typeof(x)), zeros_like(t) return F.scalar_cast(dout, F.typeof(x)), zeros_like(t)
return bprop return bprop
@ -789,6 +838,7 @@ def get_bprop_scalar_addn(self):
for _ in range(len(x)): for _ in range(len(x)):
dx = dx + (dout,) dx = dx + (dout,)
return dx return dx
return bprop return bprop
@ -798,6 +848,7 @@ def get_bprop_sign(self):
def bprop(x, out, dout): def bprop(x, out, dout):
return (zeros_like(x),) return (zeros_like(x),)
return bprop return bprop
@ -807,6 +858,7 @@ def get_bprop_round(self):
def bprop(x, out, dout): def bprop(x, out, dout):
return (zeros_like(x),) return (zeros_like(x),)
return bprop return bprop
@ -821,4 +873,5 @@ def get_bprop_atan2(self):
bc_dx = tmp * y bc_dx = tmp * y
bc_dy = tmp * (-x) bc_dy = tmp * (-x)
return binop_grad_common(x, y, bc_dx, bc_dy) return binop_grad_common(x, y, bc_dx, bc_dy)
return bprop return bprop

View File

@ -21,6 +21,7 @@ from ..operations import _grad_ops as G
from ..operations import _inner_ops as inner from ..operations import _inner_ops as inner
from ..composite.multitype_ops.zeros_like_impl import zeros_like from ..composite.multitype_ops.zeros_like_impl import zeros_like
from .grad_base import bprop_getters from .grad_base import bprop_getters
from ... import context
@bprop_getters.register(P.BiasAdd) @bprop_getters.register(P.BiasAdd)
@ -551,6 +552,14 @@ def get_bprop_lstm(self):
bidirectional=self.bidirectional, bidirectional=self.bidirectional,
dropout=self.dropout dropout=self.dropout
) )
lstm_grad = G.LSTMGrad(
input_size=self.input_size,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
has_bias=self.has_bias,
bidirectional=self.bidirectional,
dropout=self.dropout
)
def bprop(x, hx, cx, w, out, dout): def bprop(x, hx, cx, w, out, dout):
y, _, _, reserve, state = out y, _, _, reserve, state = out
@ -559,6 +568,16 @@ def get_bprop_lstm(self):
dw = lstm_grad_weight(F.depend(x, dx), hx, y, reserve, state) dw = lstm_grad_weight(F.depend(x, dx), hx, y, reserve, state)
return dx, dhx, dcx, dw return dx, dhx, dcx, dw
#
def bprop_cpu(x, hx, cx, w, out, dout):
y, hy, cy, reserve, _ = out
dy, dhy, dcy, _, _ = dout
dx, dhx, dcx, dw = lstm_grad(x, hx, cx, w, y, hy, cy, dy, dhy, dcy, reserve)
return dx, dhx, dcx, dw
if context.get_context('device_target') == "CPU":
return bprop_cpu
return bprop return bprop

View File

@ -107,6 +107,7 @@ class BiasAddGrad(Primitive):
class BinaryCrossEntropyGrad(PrimitiveWithInfer): class BinaryCrossEntropyGrad(PrimitiveWithInfer):
"""Computes gradients for `BinaryCrossEntropy` operation.""" """Computes gradients for `BinaryCrossEntropy` operation."""
@prim_attr_register @prim_attr_register
def __init__(self, reduction='mean'): def __init__(self, reduction='mean'):
self.reduction = validator.check_string('reduction', reduction, ['none', 'mean', 'sum'], self.name) self.reduction = validator.check_string('reduction', reduction, ['none', 'mean', 'sum'], self.name)
@ -665,6 +666,62 @@ class LSTMGradWeight(PrimitiveWithInfer):
return hx_dtype return hx_dtype
class LSTMGrad(PrimitiveWithInfer):
"""Computes the data and weight gradients of LSTM."""
@prim_attr_register
def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
self.input_size = validator.check_integer('input_size', input_size, 0, Rel.GT, self.name)
self.hidden_size = validator.check_integer('hidden_size', hidden_size, 0, Rel.GT, self.name)
self.num_layers = validator.check_integer('num_layers', num_layers, 0, Rel.GT, self.name)
self.has_bias = validator.check_value_type('has_bias', has_bias, (bool,), self.name)
self.bidirectional = validator.check_value_type('bidirectional', bidirectional, (bool,), self.name)
self.dropout = validator.check_value_type("dropout", dropout, [float], self.name)
self.dropout = validator.check_number_range('dropout', dropout, 0, 1, Rel.INC_BOTH, self.name)
if bidirectional:
self.num_directions = 2
else:
self.num_directions = 1
def infer_shape(self, x_shape, hx_shape, cx_shape, w_shape, y_shape, hy_shape, cy_shape, dy_shape, dhy_shape,
dcy_shape, reserve_shape):
# dhy and dcy should be same shape
validator.check_integer("h_shape", len(dhy_shape), 3, Rel.EQ, self.name)
validator.check_integer("h_shape", len(dhy_shape), len(dcy_shape), Rel.EQ, self.name)
validator.check_integer("h_shape[0]", dhy_shape[0], dcy_shape[0], Rel.EQ, self.name)
validator.check_integer("h_shape[1]", dhy_shape[1], dcy_shape[1], Rel.EQ, self.name)
validator.check_integer("h_shape[2]", dhy_shape[2], dcy_shape[2], Rel.EQ, self.name)
validator.check_integer("h_shape[0]", dhy_shape[0], self.num_layers * self.num_directions, Rel.EQ, self.name)
validator.check_integer("h_shape[2]", dhy_shape[2], self.hidden_size, Rel.EQ, self.name)
# dy: (seq_len, batch_size, hidden_size * num_directions)
validator.check_integer("dy_shape", len(dy_shape), 3, Rel.EQ, self.name)
validator.check_integer("dy[1]", dy_shape[1], dhy_shape[1], Rel.EQ, self.name)
validator.check_integer("dy[2]", dy_shape[2], self.hidden_size * self.num_directions, Rel.EQ, self.name)
# (seq_len, batch_size, input_size)
dx_shape = (y_shape[0], y_shape[1], self.input_size)
dhx_shape = dhy_shape
dcx_shape = dcy_shape
weight_size = 0
gate_size = 4 * self.hidden_size
for layer in range(self.num_layers):
for _ in range(self.num_directions):
input_layer_size = self.input_size if layer == 0 else self.hidden_size * self.num_directions
weight_size += gate_size * input_layer_size
weight_size += gate_size * self.hidden_size
if self.has_bias:
weight_size += gate_size
return (dx_shape, dhx_shape, dcx_shape, (weight_size, 1, 1))
def infer_dtype(self, x_dtype, hx_dtype, cx_dtype, w_dtype, y_dtype, hy_dtype, cy_dtype, dy_dtype, dhy_dtype,
dcy_dtype, reserve_dtype):
return (dy_dtype, dy_dtype, dy_dtype, hx_dtype)
class PReLUGrad(PrimitiveWithInfer): class PReLUGrad(PrimitiveWithInfer):
r""" r"""
Gradients of PReLU operation. Gradients of PReLU operation.
@ -1051,6 +1108,7 @@ class RefToEmbed(Primitive):
__mindspore_signature__ = ( __mindspore_signature__ = (
('variable', sig_rw.RW_REF, sig_kind.KIND_POSITIONAL_KEYWORD), ('variable', sig_rw.RW_REF, sig_kind.KIND_POSITIONAL_KEYWORD),
) )
@prim_attr_register @prim_attr_register
def __init__(self): def __init__(self):
pass pass

View File

@ -35,9 +35,11 @@ def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=Fals
""" """
Checks whether an argument is a positive int or tuple with 2 or 4(when allow_four is True) positive int elements. Checks whether an argument is a positive int or tuple with 2 or 4(when allow_four is True) positive int elements.
""" """
def _raise_message(): def _raise_message():
raise ValueError(f"For '{prim_name}' attr '{arg_name}' should be an positive int number or a tuple of two " raise ValueError(f"For '{prim_name}' attr '{arg_name}' should be an positive int number or a tuple of two "
f"{'or four ' if allow_four else ''}positive int numbers, but got {arg_value}") f"{'or four ' if allow_four else ''}positive int numbers, but got {arg_value}")
def _get_return_value(): def _get_return_value():
if isinstance(arg_value, int): if isinstance(arg_value, int):
ret = (1, 1, arg_value, arg_value) if ret_four else (arg_value, arg_value) ret = (1, 1, arg_value, arg_value) if ret_four else (arg_value, arg_value)
@ -50,6 +52,7 @@ def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=Fals
else: else:
_raise_message() _raise_message()
return ret return ret
validator.check_value_type(arg_name, arg_value, (int, tuple), prim_name) validator.check_value_type(arg_name, arg_value, (int, tuple), prim_name)
ret_value = _get_return_value() ret_value = _get_return_value()
for item in ret_value: for item in ret_value:
@ -58,6 +61,7 @@ def _check_positive_int_or_tuple(arg_name, arg_value, prim_name, allow_four=Fals
_raise_message() _raise_message()
return ret_value return ret_value
class Flatten(PrimitiveWithInfer): class Flatten(PrimitiveWithInfer):
r""" r"""
Flattens a tensor without changing its batch size on the 0-th axis. Flattens a tensor without changing its batch size on the 0-th axis.
@ -205,6 +209,7 @@ class Softplus(PrimitiveWithInfer):
>>> softplus(input_x) >>> softplus(input_x)
[1.3132615, 2.126928, 3.0485873, 4.01815, 5.0067153] [1.3132615, 2.126928, 3.0485873, 4.01815, 5.0067153]
""" """
@prim_attr_register @prim_attr_register
def __init__(self): def __init__(self):
"""init Softplus""" """init Softplus"""
@ -301,6 +306,7 @@ class ReLUV2(PrimitiveWithInfer):
([[[[1., 0.], [0., 4.]], [[0., 6.], [7., 0.]]]], ([[[[1., 0.], [0., 4.]], [[0., 6.], [7., 0.]]]],
[[[[1, 0], [2, 0]], [[2, 0], [1, 0]]]]) [[[[1, 0], [2, 0]], [[2, 0], [1, 0]]]])
""" """
@prim_attr_register @prim_attr_register
def __init__(self): def __init__(self):
"""init ReLUV2""" """init ReLUV2"""
@ -398,6 +404,7 @@ class HSwish(PrimitiveWithInfer):
>>> input_x = Tensor(np.array([-1, -2, 0, 2, 1]), mindspore.float16) >>> input_x = Tensor(np.array([-1, -2, 0, 2, 1]), mindspore.float16)
>>> result = hswish(input_x) >>> result = hswish(input_x)
""" """
@prim_attr_register @prim_attr_register
def __init__(self): def __init__(self):
self.init_prim_io_names(inputs=['x'], outputs=['output']) self.init_prim_io_names(inputs=['x'], outputs=['output'])
@ -1077,6 +1084,7 @@ class MaxPoolWithArgmax(_Pool):
>>> maxpool_arg_op = P.MaxPoolWithArgmax(padding="VALID", ksize=2, strides=1) >>> maxpool_arg_op = P.MaxPoolWithArgmax(padding="VALID", ksize=2, strides=1)
>>> output_tensor, argmax = maxpool_arg_op(input_tensor) >>> output_tensor, argmax = maxpool_arg_op(input_tensor)
""" """
def __init__(self, ksize=1, strides=1, padding="valid"): def __init__(self, ksize=1, strides=1, padding="valid"):
super(MaxPoolWithArgmax, self).__init__(ksize, strides, padding) super(MaxPoolWithArgmax, self).__init__(ksize, strides, padding)
self.is_tbe = context.get_context("device_target") == "Ascend" self.is_tbe = context.get_context("device_target") == "Ascend"
@ -1495,6 +1503,7 @@ class ApplyMomentum(PrimitiveWithInfer):
('gradient', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD), ('gradient', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD),
('momentum', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD) ('momentum', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD)
) )
@prim_attr_register @prim_attr_register
def __init__(self, use_nesterov=False, use_locking=False, gradient_scale=1.0): def __init__(self, use_nesterov=False, use_locking=False, gradient_scale=1.0):
self.init_prim_io_names(inputs=['variable', 'accumulation', 'learning_rate', 'gradient', 'momentum'], self.init_prim_io_names(inputs=['variable', 'accumulation', 'learning_rate', 'gradient', 'momentum'],
@ -1584,6 +1593,7 @@ class L2Loss(PrimitiveWithInfer):
>>> l2_loss(input_x) >>> l2_loss(input_x)
7.0 7.0
""" """
@prim_attr_register @prim_attr_register
def __init__(self): def __init__(self):
"""init L2Loss""" """init L2Loss"""
@ -2326,7 +2336,29 @@ class LSTM(PrimitiveWithInfer):
y_shape = (x_shape[0], x_shape[1], self.hidden_size * self.num_directions) y_shape = (x_shape[0], x_shape[1], self.hidden_size * self.num_directions)
# set arbitrary shape for reserved space # set arbitrary shape for reserved space
reserved_shape = (1, 1) type_size = 4
gates_ws_ld = self.get_good_ld(self.hidden_size * 4, type_size)
states_ws_ld = self.get_good_ld(max(self.hidden_size, self.input_size), type_size)
self.ws_gates_size = self.num_layers * self.num_directions * x_shape[0] * x_shape[1] * gates_ws_ld * type_size
self.ws_states_size = (self.num_layers + 1) * self.num_directions * (x_shape[0] + 1) * x_shape[
1] * states_ws_ld * type_size
self.ws_c_states_size = (self.num_layers + 1) * self.num_directions * (x_shape[0] + 1) * x_shape[
1] * states_ws_ld * type_size
self.ws_diff_states_size = (self.num_layers + 1) * self.num_directions * (x_shape[0] + 1) * (2 + 1) * x_shape[
1] * states_ws_ld * type_size
self.ws_grid_comp_size = 0
self.page_size = 4096
current_offset = 0
current_offset += self.ws_gates_size
current_offset = self.rnd_up(current_offset, self.page_size)
current_offset += self.ws_states_size
current_offset = self.rnd_up(current_offset, self.page_size)
current_offset += self.ws_c_states_size
current_offset = self.rnd_up(current_offset, self.page_size)
current_offset += self.ws_diff_states_size
current_offset = self.rnd_up(current_offset, self.page_size)
current_offset += self.ws_grid_comp_size
reserved_shape = (current_offset, 1)
state_shape = (1, 1) state_shape = (1, 1)
return (y_shape, h_shape, c_shape, reserved_shape, state_shape) return (y_shape, h_shape, c_shape, reserved_shape, state_shape)
@ -2335,6 +2367,15 @@ class LSTM(PrimitiveWithInfer):
validator.check_tensor_type_same(args, (mstype.float32, mstype.float16), self.name) validator.check_tensor_type_same(args, (mstype.float32, mstype.float16), self.name)
return (x_dtype, x_dtype, x_dtype, x_dtype, x_dtype) return (x_dtype, x_dtype, x_dtype, x_dtype, x_dtype)
def rnd_up(self, current_offset, page_size):
return ((current_offset + page_size - 1) // page_size) * page_size
def get_good_ld(self, dim, type_size):
ld = self.rnd_up(dim, 64 // type_size)
if ld * 256 == 0:
return ld + 64 // type_size
return ld
class SigmoidCrossEntropyWithLogits(PrimitiveWithInfer): class SigmoidCrossEntropyWithLogits(PrimitiveWithInfer):
r""" r"""
@ -3000,6 +3041,7 @@ class Dropout(PrimitiveWithInfer):
>>> in = Tensor((20, 16, 50, 50)) >>> in = Tensor((20, 16, 50, 50))
>>> out = dropout(in) >>> out = dropout(in)
""" """
@prim_attr_register @prim_attr_register
def __init__(self, drop_prob=0): def __init__(self, drop_prob=0):
self.drop_prob = validator.check_number_range("drop_prob", drop_prob, 0, 1, Rel.INC_BOTH, self.name) self.drop_prob = validator.check_number_range("drop_prob", drop_prob, 0, 1, Rel.INC_BOTH, self.name)
@ -3034,6 +3076,7 @@ class DropoutGrad(PrimitiveWithInfer):
>>> in = Tensor((20, 16, 50, 50)) >>> in = Tensor((20, 16, 50, 50))
>>> out = dropout_grad(in) >>> out = dropout_grad(in)
""" """
@prim_attr_register @prim_attr_register
def __init__(self, drop_prob=0): def __init__(self, drop_prob=0):
self.drop_prob = validator.check_number_range("drop_prob", drop_prob, 0, 1, Rel.INC_BOTH, self.name) self.drop_prob = validator.check_number_range("drop_prob", drop_prob, 0, 1, Rel.INC_BOTH, self.name)
@ -3084,6 +3127,7 @@ class CTCLoss(PrimitiveWithInfer):
>>> ctc_loss = P.CTCloss() >>> ctc_loss = P.CTCloss()
>>> output = ctc_loss(inputs, labels_indices, labels_values, sequence_length) >>> output = ctc_loss(inputs, labels_indices, labels_values, sequence_length)
""" """
@prim_attr_register @prim_attr_register
def __init__(self, preprocess_collapse_repeated=False, ctc_merge_repeated=False, def __init__(self, preprocess_collapse_repeated=False, ctc_merge_repeated=False,
ignore_longer_outputs_than_inputs=False): ignore_longer_outputs_than_inputs=False):

View File

@ -0,0 +1,335 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import pytest
import mindspore.nn as nn
from mindspore.common.api import ms_function
import numpy as np
import mindspore.context as context
from mindspore.common.initializer import initializer
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from mindspore.common.tensor import Tensor
from mindspore.common.parameter import ParameterTuple, Parameter
context.set_context(device_target='CPU')
class LstmNet(nn.Cell):
def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
super(LstmNet, self).__init__()
num_directions = 1
if bidirectional:
num_directions = 2
self.lstm = P.LSTM(input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
input_np = np.array([[[0.6755, -1.6607, 0.1367], [0.4276, -0.7850, -0.3758]],
[[-0.6424, -0.6095, 0.6639], [0.7918, 0.4147, -0.5089]],
[[-1.5612, 0.0120, -0.7289], [-0.6656, -0.6626, -0.5883]],
[[-0.9667, -0.6296, -0.7310], [0.1026, -0.6821, -0.4387]],
[[-0.4710, 0.6558, -0.3144], [-0.8449, -0.2184, -0.1806]]
]).astype(np.float32)
self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
self.h = Parameter(initializer(
Tensor(
np.array([0.1, 0.1, 0.1, 0.1]).reshape((num_layers * num_directions, batch_size, hidden_size)).astype(
np.float32)),
[num_layers * num_directions, batch_size, hidden_size]), name='h')
self.c = Parameter(initializer(
Tensor(
np.array([0.2, 0.2, 0.2, 0.2]).reshape((num_layers * num_directions, batch_size, hidden_size)).astype(
np.float32)),
[num_layers * num_directions, batch_size, hidden_size]), name='c')
wih = np.array([[3.4021e-01, -4.6622e-01, 4.5117e-01],
[-6.4257e-02, -2.4807e-01, 1.3550e-02], # i
[-3.2140e-01, 5.5578e-01, 6.3589e-01],
[1.6547e-01, -7.9030e-02, -2.0045e-01],
[-6.9863e-01, 5.9773e-01, -3.9062e-01],
[-3.0253e-01, -1.9464e-01, 7.0591e-01],
[-4.0835e-01, 3.6751e-01, 4.7989e-01],
[-5.6894e-01, -5.0359e-01, 4.7491e-01]]).astype(np.float32) # .reshape([1,-1])
whh = np.array([[-0.4820, -0.2350],
[-0.1195, 0.0519],
[0.2162, -0.1178],
[0.6237, 0.0711],
[0.4511, -0.3961],
[-0.5962, 0.0906],
[0.1867, -0.1225],
[0.1831, 0.0850]]).astype(np.float32) # .reshape([1,-1])
wih = wih.transpose((1, 0))
whh = whh.transpose((1, 0))
bih = np.zeros((1, 8)).astype(np.float32)
w_np = np.concatenate((wih, whh, bih), axis=0).reshape([-1, 1, 1])
self.w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')
@ms_function
def construct(self):
return self.lstm(self.x, self.h, self.c, self.w)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_lstm():
seq_len = 5
batch_size = 2
input_size = 3
hidden_size = 2
num_layers = 1
has_bias = True
bidirectional = False
dropout = 0.0
num_directions = 1
if bidirectional:
num_directions = 2
net = LstmNet(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout)
y, h, c, _, _ = net()
print(y)
print(c)
print(h)
expect_y = np.array([[[-0.16709016, 0.13125697],
[-0.08438572, -0.01969833]],
[[-0.2746155, 0.32764038],
[-0.06504016, -0.07770399]],
[[-0.00140004, 0.17706314],
[0.03244496, -0.10135599]],
[[0.08328028, 0.06437367],
[-0.04133911, -0.11072896]],
[[0.19004421, -0.02852732],
[0.09138509, -0.00344161]]]
)
error = np.ones([num_layers, batch_size, hidden_size]) * 1.0e-4
diff = y.asnumpy() - expect_y
assert np.all(diff < error)
assert np.all(-diff < error)
#
expect_h = np.array([[[0.19004421, -0.02852732],
[0.09138509, -0.00344161]]])
error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4
diff = h.asnumpy() - expect_h
assert np.all(diff < error)
assert np.all(-diff < error)
#
expect_c = np.array([[[0.34533143, -0.06313794],
[0.169008, -0.00555446]]])
error = np.ones((num_layers * num_directions, batch_size, hidden_size)) * 1.0e-4
diff = c.asnumpy() - expect_c
assert np.all(diff < error)
assert np.all(-diff < error)
class MultiLayerBiLstmNet(nn.Cell):
def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
super(MultiLayerBiLstmNet, self).__init__()
num_directions = 1
if bidirectional:
num_directions = 2
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias,
bidirectional=bidirectional, dropout=dropout)
input_np = np.array([[[-0.1887, -0.4144, -0.0235, 0.7489, 0.7522, 0.5969, 0.3342, 1.2198, 0.6786, -0.9404],
[-0.8643, -1.6835, -2.4965, 2.8093, 0.1741, 0.2707, 0.7387, -0.0939, -1.7990, 0.4765]],
[[-0.5963, -1.2598, -0.7226, 1.1365, -1.7320, -0.7302, 0.1221, -0.2111, -1.6173, -0.0706],
[0.8964, 0.1737, -1.0077, -0.1389, 0.4889, 0.4391, 0.7911, 0.3614, -1.9533, -0.9936]],
[[0.3260, -1.3312, 0.0601, 1.0726, -1.6010, -1.8733, -1.5775, 1.1579, -0.8801, -0.5742],
[-2.2998, -0.6344, -0.5409, -0.9221, -0.6500, 0.1206, 1.5215, 0.7517, 1.3691, 2.0021]],
[[-0.1245, -0.3690, 2.1193, 1.3852, -0.1841, -0.8899, -0.3646, -0.8575, -0.3131, 0.2026],
[1.0218, -1.4331, 0.1744, 0.5442, -0.7808, 0.2527, 0.1566, 1.1484, -0.7766, -0.6747]],
[[-0.6752, 0.9906, -0.4973, 0.3471, -0.1202, -0.4213, 2.0213, 0.0441, 0.9016, 1.0365],
[1.2223, -1.3248, 0.1207, -0.8256, 0.1816, 0.7057, -0.3105, 0.5713, 0.2804,
-1.0685]]]).astype(np.float32)
self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
self.h0 = Parameter(initializer(
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
[num_directions, batch_size, hidden_size]), name='h0')
self.c0 = Parameter(initializer(
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
[num_directions, batch_size, hidden_size]), name='c0')
self.h1 = Parameter(initializer(
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
[num_directions, batch_size, hidden_size]), name='h1')
self.c1 = Parameter(initializer(
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
[num_directions, batch_size, hidden_size]), name='c1')
self.h = ParameterTuple((self.h0, self.h1))
self.c = ParameterTuple((self.c0, self.c1))
@ms_function
def construct(self):
return self.lstm(self.x, (self.h, self.c))
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_multi_layer_bilstm():
seq_len = 5
batch_size = 2
input_size = 10
hidden_size = 2
num_layers = 2
has_bias = True
bidirectional = True
dropout = 0.0
num_directions = 1
if bidirectional:
num_directions = 2
net = MultiLayerBiLstmNet(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional,
dropout)
y, h, c, _, _ = net()
print(y)
print(h)
print(c)
class Grad(nn.Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.network = network
self.weights = ParameterTuple(network.trainable_params())
self.grad = C.GradOperation('grad',
get_by_list=True,
sens_param=True)
@ms_function
def construct(self, output_grad):
weights = self.weights
grads = self.grad(self.network, weights)(output_grad)
return grads
class Net(nn.Cell):
def __init__(self, seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
super(Net, self).__init__()
num_directions = 1
if bidirectional:
num_directions = 2
input_np = np.array([[[-0.5907, 1.0557, 1.7283, 0.6706, -1.2550, -0.5298, -0.2290, -0.6735, 0.8555, 1.4836],
[-1.7070, -0.5347, -0.9105, -0.2598, 0.0588, 1.5496, 1.0757, 0.3760, -1.2020, -0.2868]],
[[0.0151, 0.2126, 0.8090, -0.5292, -2.5590, 0.4279, -0.3081, -1.4706, -0.0498, 1.2301],
[0.4165, -0.5391, -0.0996, 0.1928, -0.4909, -0.1255, 0.4444, -1.3687, 1.3096, 0.6553]],
[[-0.7802, -0.2083, -0.6388, 1.3757, 0.4293, 0.5363, 0.3202, -0.6687, -1.3864, -0.2953],
[1.0799, -0.7204, 0.1130, -0.5857, -0.4855, -1.1068, 1.0126, 0.8716, 1.5460, -0.7392]],
[[2.2645, -0.6586, -0.2227, 1.4290, -0.5006, -1.6576, -0.1793, 0.5319, 0.1360, 0.2707],
[-0.4071, 0.1575, 1.4199, -0.9156, 0.1855, 0.4947, 1.0460, -0.6365, 0.1191, -0.6374]],
[[0.2468, 1.0815, -0.4893, 0.0664, 0.6405, -2.2967, 0.7612, 0.8759, 0.5685, -1.0999],
[-0.7272, -1.7750, -0.1164, -0.7159, 0.0061, -0.7839, -1.8329, 0.3434, -0.5634,
0.5384]]]).astype(np.float32)
self.x = Parameter(initializer(Tensor(input_np), [seq_len, batch_size, input_size]), name='x')
self.h0 = Parameter(initializer(
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
[num_directions, batch_size, hidden_size]), name='h0')
self.c0 = Parameter(initializer(
Tensor(np.ones((num_directions, batch_size, hidden_size)).astype(np.float32)),
[num_directions, batch_size, hidden_size]), name='c0')
wih_l0 = np.array([[0.2300, 0.6668, 0.4703, 0.0425, 0.0464, 0.6825, 0.2249, -0.4315, -0.2449, 0.2964],
[-0.2811, -0.3444, 0.2557, -0.5137, -0.5518, 0.1652, -0.6720, 0.1066, 0.3586, 0.6299],
[0.5728, -0.1784, 0.5661, 0.4012, 0.3856, -0.1899, 0.3102, 0.3717, -0.5651, 0.1952],
[0.1026, -0.0527, 0.1198, -0.3080, 0.2292, 0.5757, -0.3567, -0.2731, -0.0586, -0.2849],
[0.2194, -0.1622, 0.3219, -0.3008, -0.3713, -0.3034, -0.2385, 0.0412, -0.5205, 0.0280],
[-0.5499, -0.0733, -0.5236, -0.6753, -0.7045, -0.1839, -0.1037, -0.5026, -0.4055, -0.3416],
[0.1573, -0.1301, -0.2882, -0.3464, 0.6643, 0.1980, -0.6804, 0.5359, 0.5996, 0.0124],
[-0.6436, 0.0587, -0.6520, -0.0471, 0.1667, 0.6042, 0.5752, -0.6296, -0.2976,
-0.3757]]).astype(np.float32).reshape([1, -1])
whh_l0 = np.array([[0.3358, 0.2790],
[-0.5355, 0.0989],
[-0.1402, 0.5120],
[0.1335, 0.1653],
[0.3533, -0.3531],
[0.4166, -0.4420],
[-0.5454, -0.1720],
[0.0041, -0.0799]]).astype(np.float32).reshape([1, -1])
bih_l0 = np.array([0.5518, 0.1083, 0.4829, 0.0607, -0.1770, -0.6944, 0.3059, 0.5354]).astype(
np.float32).reshape([1, -1])
bhh_l0 = np.array([0.5025, -0.1261, -0.5405, 0.3220, -0.3441, 0.6488, -0.0284, -0.2334]).astype(
np.float32).reshape([1, -1])
w0_np = np.concatenate(
(wih_l0, whh_l0, bih_l0 + bhh_l0),
axis=1).reshape([-1, 1, 1])
self.w0 = Parameter(initializer(Tensor(w0_np), w0_np.shape), name='w0')
self.lstm = P.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
has_bias=has_bias, bidirectional=bidirectional, dropout=dropout)
@ms_function
def construct(self):
return self.lstm(self.x, self.h0, self.c0, self.w0)[0]
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_grad():
seq_len = 5
batch_size = 2
input_size = 10
hidden_size = 2
num_layers = 1
has_bias = True
bidirectional = False
dropout = 0.0
num_directions = 1
if bidirectional:
num_directions = 2
net = Grad(Net(seq_len, batch_size, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout))
dy = np.array([[[-3.5471e-01, 7.0540e-01],
[2.7161e-01, 1.0865e+00]],
[[-4.2431e-01, 1.4955e+00],
[-4.0418e-01, -2.3282e-01]],
[[-1.3654e+00, 1.9251e+00],
[-4.6481e-01, 1.3138e+00]],
[[1.2914e+00, -2.3753e-01],
[5.3589e-01, -1.0981e-01]],
[[-1.6032e+00, -1.8818e-01],
[1.0065e-01, 9.2045e-01]]]).astype(np.float32)
dx, dhx, dcx, dw = net(Tensor(dy))
print(dx)
print(dhx)
print(dcx)
print(dw)
# test_multi_layer_bilstm()
# test_lstm()
# tf_lstm_test()
# test_grad()