forked from mindspore-Ecosystem/mindspore
!27835 Add Gru GPU fused ops backward by using cudnn api
Merge pull request !27835 from YijieChen/gru_ops_back
This commit is contained in:
commit
b6081c5fb9
|
@ -0,0 +1,46 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/rl/gru_grad_data_gpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_ONE(GruGradData,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
GruGradDataGpuKernel, float)
|
||||
MS_REG_GPU_KERNEL_ONE(GruGradData,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddOutputAttr(kNumberTypeFloat16)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
GruGradDataGpuKernel, half)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,341 @@
|
|||
/**
|
||||
* Copyright 2019-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_GRU_GRAD_DATA_GPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_GRU_GRAD_DATA_GPU_KERNEL_H_
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/kernel_constants.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
constexpr size_t kIndexEight = 8;
|
||||
constexpr size_t DimOfTensor = 3;
|
||||
constexpr size_t LeastWeightShape = 3;
|
||||
constexpr size_t LeastInputShapeSize = 2;
|
||||
template <typename T>
|
||||
class GruGradDataGpuKernel : public GpuKernel {
|
||||
public:
|
||||
GruGradDataGpuKernel()
|
||||
: batch_size_(0),
|
||||
seq_len_(0),
|
||||
input_size_(0),
|
||||
hidden_size_(0),
|
||||
num_layers_(0),
|
||||
has_bias_(false),
|
||||
bidirectional_(false),
|
||||
states_init_(false),
|
||||
is_null_input_(false),
|
||||
dropout_(0),
|
||||
weight_size_(0),
|
||||
reserved_size_(0),
|
||||
rnn_desc_(nullptr),
|
||||
y_desc_(nullptr),
|
||||
dy_desc_(nullptr),
|
||||
dhy_desc_(nullptr),
|
||||
dcy_desc_(nullptr),
|
||||
w_desc_(nullptr),
|
||||
hx_desc_(nullptr),
|
||||
cx_desc_(nullptr),
|
||||
dropout_desc_(nullptr),
|
||||
dx_desc_(nullptr),
|
||||
dhx_desc_(nullptr),
|
||||
dcx_desc_(nullptr),
|
||||
handle_(nullptr),
|
||||
cudnn_data_type_(CUDNN_DATA_FLOAT) {}
|
||||
~GruGradDataGpuKernel() override { DestroyResource(); }
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
auto y_addr = GetDeviceAddress<T>(inputs, 0);
|
||||
auto dy_addr = GetDeviceAddress<T>(inputs, 1);
|
||||
auto dhy_addr = GetDeviceAddress<T>(inputs, 2);
|
||||
auto dcy_addr = nullptr;
|
||||
auto w_addr = GetDeviceAddress<T>(inputs, 3);
|
||||
auto hx_addr = GetDeviceAddress<T>(inputs, 4);
|
||||
auto cx_addr = nullptr;
|
||||
auto reserved_addr = GetDeviceAddress<T>(inputs, 5);
|
||||
auto states_addr = GetDeviceAddress<T>(inputs, 6);
|
||||
auto dx_addr = GetDeviceAddress<T>(outputs, 0);
|
||||
auto dhx_addr = GetDeviceAddress<T>(outputs, 1);
|
||||
auto dcx_addr = nullptr;
|
||||
void *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
|
||||
|
||||
if (!states_init_) {
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_,
|
||||
cudnnRestoreDropoutDescriptor(dropout_desc_, handle_, dropout_, states_addr, input_size_list_[kIndexEight], 0),
|
||||
"restore dropout state failed");
|
||||
states_init_ = true;
|
||||
}
|
||||
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_,
|
||||
cudnnRNNBackwardData(handle_, rnn_desc_, seq_len_, y_desc_.get(), y_addr, dy_desc_.get(), dy_addr, dhy_desc_,
|
||||
dhy_addr, dcy_desc_, dcy_addr, w_desc_, w_addr, hx_desc_, hx_addr, cx_desc_, cx_addr,
|
||||
dx_desc_.get(), dx_addr, dhx_desc_, dhx_addr, dcx_desc_, dcx_addr, workspace_addr,
|
||||
workspace_size_list_[0], reserved_addr, reserved_size_),
|
||||
"launch gru back data kernel failed");
|
||||
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr)),
|
||||
"stream synchronize failed.");
|
||||
return true;
|
||||
}
|
||||
void GetAttrs(const CNodePtr &kernel_node) {
|
||||
input_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "input_size"));
|
||||
hidden_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "hidden_size"));
|
||||
num_layers_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "num_layers"));
|
||||
has_bias_ = GetAttr<bool>(kernel_node, "has_bias");
|
||||
bidirectional_ = GetAttr<bool>(kernel_node, "bidirectional");
|
||||
dropout_ = GetAttr<float>(kernel_node, "dropout");
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (input_shape.size() < LeastInputShapeSize) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 2, but got "
|
||||
<< input_shape.size();
|
||||
}
|
||||
seq_len_ = SizeToInt(input_shape[0]);
|
||||
batch_size_ = SizeToInt(input_shape[1]);
|
||||
GetAttrs(kernel_node);
|
||||
cudnnRNNInputMode_t input_mode = CUDNN_LINEAR_INPUT;
|
||||
cudnnDirectionMode_t direction = bidirectional_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL;
|
||||
cudnnRNNMode_t rnn_mode = CUDNN_GRU;
|
||||
cudnnRNNAlgo_t algo = CUDNN_RNN_ALGO_STANDARD;
|
||||
CreateTensorDescGrp();
|
||||
int hx_dims[3]{num_layers_ * (bidirectional_ ? 2 : 1), batch_size_, hidden_size_};
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_, cudnnSetTensorNdDescriptorEx(dhy_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, hx_dims),
|
||||
"set dhy_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_, cudnnSetTensorNdDescriptorEx(dcy_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, hx_dims),
|
||||
"set dcy_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_, cudnnSetTensorNdDescriptorEx(hx_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, hx_dims),
|
||||
"set hx_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_, cudnnSetTensorNdDescriptorEx(cx_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, hx_dims),
|
||||
"set cx_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_, cudnnSetTensorNdDescriptorEx(dhx_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, hx_dims),
|
||||
"set dhx_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_, cudnnSetTensorNdDescriptorEx(dcx_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, hx_dims),
|
||||
"set dcx_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudnnSetDropoutDescriptor(dropout_desc_, handle_, dropout_, nullptr, 0, 0),
|
||||
"set dropout_desc failed");
|
||||
cudnnRNNBiasMode_t bias_mode = has_bias_ ? CUDNN_RNN_DOUBLE_BIAS : CUDNN_RNN_NO_BIAS;
|
||||
#if CUDNN_VERSION < 8000
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudnnSetRNNDescriptor_v6(handle_, rnn_desc_, hidden_size_, num_layers_, dropout_desc_,
|
||||
input_mode, direction, rnn_mode, algo, cudnn_data_type_),
|
||||
"set rnn_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetRNNBiasMode(rnn_desc_, bias_mode), "set bias_mode failed");
|
||||
#else
|
||||
cudnnMathType_t math_type = (cudnn_data_type_ == CUDNN_DATA_HALF) ? CUDNN_TENSOR_OP_MATH : CUDNN_FMA_MATH;
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudnnSetRNNDescriptor_v8(rnn_desc_, algo, rnn_mode, bias_mode, direction, input_mode,
|
||||
cudnn_data_type_, cudnn_data_type_, math_type, input_size_,
|
||||
hidden_size_, hidden_size_, num_layers_, dropout_desc_, 0),
|
||||
"set rnn_desc failed");
|
||||
#endif
|
||||
auto weight_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "weight");
|
||||
if (is_null_input_) {
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (weight_shape.size() < LeastWeightShape) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of weight cannot be less than 3, but got "
|
||||
<< weight_shape.size();
|
||||
}
|
||||
size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[2] * sizeof(T);
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudnnGetRNNParamsSize(handle_, rnn_desc_, dx_desc_[0], &weight_size_, cudnn_data_type_),
|
||||
"get weight_size_ failed");
|
||||
if (weight_size != weight_size_) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the size of weight should be equal to " << weight_size_
|
||||
<< " but got " << weight_size;
|
||||
}
|
||||
int w_dims[3] = {SizeToInt(weight_size_ / sizeof(T)), 1, 1};
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_, cudnnSetFilterNdDescriptor(w_desc_, cudnn_data_type_, CUDNN_TENSOR_NCHW, DimOfTensor, w_dims),
|
||||
"set w_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_, cudnnGetRNNTrainingReserveSize(handle_, rnn_desc_, seq_len_, dx_desc_.get(), &reserved_size_),
|
||||
"get size failed");
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
void DestroyResource() noexcept override {
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyRNNDescriptor(rnn_desc_), "destroy rnn_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyDropoutDescriptor(dropout_desc_),
|
||||
"destroy dropout_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(dcx_desc_), "destroy dcx_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(dhx_desc_), "destroy dhx_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyFilterDescriptor(w_desc_), "destroy w_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(cx_desc_), "destroy cx_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(hx_desc_), "destroy hx_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(dcy_desc_), "destroy dcy_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(dhy_desc_), "destroy dhy_desc_ failed");
|
||||
DestroyTensorDescGrp();
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitResource() override {
|
||||
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&dhy_desc_), "create dhy_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&dcy_desc_), "create dcy_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&hx_desc_), "create hx_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&cx_desc_), "create cx_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateFilterDescriptor(&w_desc_), "create w_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&dhx_desc_), "create dhx_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&dcx_desc_), "create dcx_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateDropoutDescriptor(&dropout_desc_),
|
||||
"create dropout_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateRNNDescriptor(&rnn_desc_), "create rnn_desc failed");
|
||||
}
|
||||
|
||||
void InitSizeLists() override {
|
||||
size_t y_size = IntToSize(seq_len_ * batch_size_ * hidden_size_ * (bidirectional_ ? 2 : 1)) * sizeof(T);
|
||||
|
||||
size_t h_size = 0;
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnGetTensorSizeInBytes(hx_desc_, &h_size), "get h size failed");
|
||||
|
||||
input_size_list_.push_back(y_size);
|
||||
input_size_list_.push_back(y_size);
|
||||
input_size_list_.push_back(h_size);
|
||||
input_size_list_.push_back(h_size);
|
||||
input_size_list_.push_back(weight_size_);
|
||||
input_size_list_.push_back(h_size);
|
||||
input_size_list_.push_back(h_size);
|
||||
input_size_list_.push_back(reserved_size_);
|
||||
size_t state_size = 0;
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnDropoutGetStatesSize(handle_, &state_size),
|
||||
"get dropout states size failed");
|
||||
input_size_list_.push_back(state_size);
|
||||
|
||||
size_t x_size = IntToSize(seq_len_ * batch_size_ * input_size_) * sizeof(T);
|
||||
output_size_list_.push_back(x_size);
|
||||
output_size_list_.push_back(h_size);
|
||||
|
||||
size_t workspace_size = 0;
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudnnGetRNNWorkspaceSize(handle_, rnn_desc_, seq_len_, dx_desc_.get(), &workspace_size),
|
||||
"get workspace size failed");
|
||||
workspace_size_list_.push_back(workspace_size);
|
||||
}
|
||||
|
||||
private:
|
||||
void CreateTensorDescGrp() {
|
||||
int x_dims[3]{batch_size_, input_size_, 1};
|
||||
int y_dims[3]{batch_size_, hidden_size_ * (bidirectional_ ? 2 : 1), 1};
|
||||
|
||||
dx_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
|
||||
y_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
|
||||
dy_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
|
||||
for (size_t i = 0; i < IntToSize(seq_len_); ++i) {
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&dx_desc_[i]), "create x_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_,
|
||||
cudnnSetTensorNdDescriptorEx(dx_desc_[i], CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, x_dims),
|
||||
"set dx_desc failed");
|
||||
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&y_desc_[i]), "create y_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_,
|
||||
cudnnSetTensorNdDescriptorEx(y_desc_[i], CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, y_dims),
|
||||
"set y_desc failed");
|
||||
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&dy_desc_[i]), "create dy_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_,
|
||||
cudnnSetTensorNdDescriptorEx(dy_desc_[i], CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, y_dims),
|
||||
"set dy_desc_ failed");
|
||||
}
|
||||
}
|
||||
|
||||
void DestroyTensorDescGrp() {
|
||||
for (size_t i = 0; i < IntToSize(seq_len_); ++i) {
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(dy_desc_[i]), "destroy dy_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(y_desc_[i]), "destroy y_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(dx_desc_[i]), "destroy x_desc failed");
|
||||
}
|
||||
}
|
||||
|
||||
int batch_size_;
|
||||
int seq_len_;
|
||||
int input_size_;
|
||||
int hidden_size_;
|
||||
int num_layers_;
|
||||
|
||||
bool has_bias_;
|
||||
bool bidirectional_;
|
||||
bool states_init_;
|
||||
bool is_null_input_;
|
||||
float dropout_;
|
||||
|
||||
size_t weight_size_;
|
||||
size_t reserved_size_;
|
||||
|
||||
cudnnRNNDescriptor_t rnn_desc_;
|
||||
|
||||
// input desc
|
||||
std::unique_ptr<cudnnTensorDescriptor_t[]> y_desc_;
|
||||
std::unique_ptr<cudnnTensorDescriptor_t[]> dy_desc_;
|
||||
cudnnTensorDescriptor_t dhy_desc_;
|
||||
cudnnTensorDescriptor_t dcy_desc_;
|
||||
cudnnFilterDescriptor_t w_desc_;
|
||||
cudnnTensorDescriptor_t hx_desc_;
|
||||
cudnnTensorDescriptor_t cx_desc_;
|
||||
|
||||
cudnnDropoutDescriptor_t dropout_desc_;
|
||||
|
||||
// output desc
|
||||
std::unique_ptr<cudnnTensorDescriptor_t[]> dx_desc_;
|
||||
cudnnTensorDescriptor_t dhx_desc_;
|
||||
cudnnTensorDescriptor_t dcx_desc_;
|
||||
|
||||
cudnnHandle_t handle_;
|
||||
cudnnDataType_t cudnn_data_type_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_GRU_GRAD_DATA_GPU_KERNEL_H_
|
|
@ -0,0 +1,40 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/rl/gru_grad_weight_gpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_ONE(GruGradWeight,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
GruGradWeightGpuKernel, float)
|
||||
MS_REG_GPU_KERNEL_ONE(GruGradWeight,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
GruGradWeightGpuKernel, half)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,283 @@
|
|||
/**
|
||||
* Copyright 2019-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_GRU_GRAD_WEIGHT_GPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_GRU_GRAD_WEIGHT_GPU_KERNEL_H_
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/kernel_constants.h"
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
constexpr size_t kIndexFour = 4;
|
||||
constexpr size_t DimOfTensor = 3;
|
||||
constexpr size_t LeastWeightShape = 3;
|
||||
constexpr size_t LeastInputShapeSize = 2;
|
||||
template <typename T>
|
||||
class GruGradWeightGpuKernel : public GpuKernel {
|
||||
public:
|
||||
GruGradWeightGpuKernel()
|
||||
: batch_size_(0),
|
||||
seq_len_(0),
|
||||
input_size_(0),
|
||||
hidden_size_(0),
|
||||
num_layers_(0),
|
||||
has_bias_(false),
|
||||
bidirectional_(false),
|
||||
states_init_(false),
|
||||
is_null_input_(false),
|
||||
dropout_(0),
|
||||
weight_size_(0),
|
||||
reserved_size_(0),
|
||||
rnn_desc_(nullptr),
|
||||
dropout_desc_(nullptr),
|
||||
x_desc_(nullptr),
|
||||
hx_desc_(nullptr),
|
||||
y_desc_(nullptr),
|
||||
dw_desc_(nullptr),
|
||||
handle_(nullptr),
|
||||
cudnn_data_type_(CUDNN_DATA_FLOAT) {}
|
||||
~GruGradWeightGpuKernel() override { DestroyResource(); }
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
auto x_addr = GetDeviceAddress<T>(inputs, 0);
|
||||
auto hx_addr = GetDeviceAddress<T>(inputs, 1);
|
||||
auto y_addr = GetDeviceAddress<T>(inputs, 2);
|
||||
auto reserved_addr = GetDeviceAddress<T>(inputs, 3);
|
||||
auto states_addr = GetDeviceAddress<T>(inputs, 4);
|
||||
auto dw_addr = GetDeviceAddress<T>(outputs, 0);
|
||||
void *workspace_addr = GetDeviceAddress<T>(workspace, 0);
|
||||
|
||||
if (!states_init_) {
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_,
|
||||
cudnnRestoreDropoutDescriptor(dropout_desc_, handle_, dropout_, states_addr, input_size_list_[kIndexFour], 0),
|
||||
"restore dropout state failed");
|
||||
states_init_ = true;
|
||||
}
|
||||
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(
|
||||
kernel_node_, cudaMemsetAsync(dw_addr, 0, outputs[0]->size, reinterpret_cast<cudaStream_t>(stream_ptr)),
|
||||
"cudaMemSet Failed");
|
||||
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_,
|
||||
cudnnRNNBackwardWeights(handle_, rnn_desc_, seq_len_, x_desc_.get(), x_addr, hx_desc_, hx_addr, y_desc_.get(),
|
||||
y_addr, workspace_addr, workspace_size_list_[0], dw_desc_, dw_addr, reserved_addr,
|
||||
reserved_size_),
|
||||
"launch gru back weight kernel failed");
|
||||
|
||||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
|
||||
if (is_null_input_) {
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (input_shape.size() < LeastInputShapeSize) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 2, but got "
|
||||
<< input_shape.size();
|
||||
}
|
||||
seq_len_ = SizeToInt(input_shape[0]);
|
||||
batch_size_ = SizeToInt(input_shape[1]);
|
||||
|
||||
input_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "input_size"));
|
||||
hidden_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "hidden_size"));
|
||||
num_layers_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "num_layers"));
|
||||
has_bias_ = GetAttr<bool>(kernel_node, "has_bias");
|
||||
bidirectional_ = GetAttr<bool>(kernel_node, "bidirectional");
|
||||
dropout_ = GetAttr<float>(kernel_node, "dropout");
|
||||
|
||||
cudnnRNNInputMode_t input_mode = CUDNN_LINEAR_INPUT;
|
||||
cudnnDirectionMode_t direction = bidirectional_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL;
|
||||
cudnnRNNMode_t rnn_mode = CUDNN_GRU;
|
||||
cudnnRNNAlgo_t algo = CUDNN_RNN_ALGO_STANDARD;
|
||||
|
||||
CreateTensorDescGrp();
|
||||
int hx_dims[3]{num_layers_ * (bidirectional_ ? 2 : 1), batch_size_, hidden_size_};
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_, cudnnSetTensorNdDescriptorEx(hx_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, hx_dims),
|
||||
"set hx_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudnnSetDropoutDescriptor(dropout_desc_, handle_, dropout_, nullptr, 0, 0),
|
||||
"set dropout_desc failed");
|
||||
cudnnRNNBiasMode_t bias_mode = has_bias_ ? CUDNN_RNN_DOUBLE_BIAS : CUDNN_RNN_NO_BIAS;
|
||||
#if CUDNN_VERSION < 8000
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudnnSetRNNDescriptor_v6(handle_, rnn_desc_, hidden_size_, num_layers_, dropout_desc_,
|
||||
input_mode, direction, rnn_mode, algo, cudnn_data_type_),
|
||||
"set rnn_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetRNNBiasMode(rnn_desc_, bias_mode), "set bias_mode failed");
|
||||
#else
|
||||
cudnnMathType_t math_type = (cudnn_data_type_ == CUDNN_DATA_HALF) ? CUDNN_TENSOR_OP_MATH : CUDNN_FMA_MATH;
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudnnSetRNNDescriptor_v8(rnn_desc_, algo, rnn_mode, bias_mode, direction, input_mode,
|
||||
cudnn_data_type_, cudnn_data_type_, math_type, input_size_,
|
||||
hidden_size_, hidden_size_, num_layers_, dropout_desc_, 0),
|
||||
"set rnn_desc failed");
|
||||
#endif
|
||||
auto weight_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ = CHECK_SHAPE_NULL(weight_shape, kernel_name, "weight");
|
||||
if (is_null_input_) {
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
if (weight_shape.size() < LeastWeightShape) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of weight cannot be less than 3, but got "
|
||||
<< weight_shape.size();
|
||||
}
|
||||
size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[2] * sizeof(T);
|
||||
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudnnGetRNNParamsSize(handle_, rnn_desc_, x_desc_[0], &weight_size_, cudnn_data_type_),
|
||||
"get weight_size_ failed");
|
||||
if (weight_size != weight_size_) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the size of weight should be equal to " << weight_size_
|
||||
<< " but got " << weight_size;
|
||||
}
|
||||
int w_dims[3] = {SizeToInt(weight_size_ / sizeof(T)), 1, 1};
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_, cudnnSetFilterNdDescriptor(dw_desc_, cudnn_data_type_, CUDNN_TENSOR_NCHW, DimOfTensor, w_dims),
|
||||
"set dw_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_, cudnnGetRNNTrainingReserveSize(handle_, rnn_desc_, seq_len_, x_desc_.get(), &reserved_size_),
|
||||
"get reserve size failed");
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitResource() override {
|
||||
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&hx_desc_), "create hx_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateFilterDescriptor(&dw_desc_), "create dw_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateDropoutDescriptor(&dropout_desc_),
|
||||
"create dropout_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateRNNDescriptor(&rnn_desc_), "create rnn_desc failed");
|
||||
}
|
||||
void InitSizeLists() override {
|
||||
size_t x_size = IntToSize(seq_len_ * batch_size_ * input_size_) * sizeof(T);
|
||||
|
||||
size_t h_size = 0;
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnGetTensorSizeInBytes(hx_desc_, &h_size), "get h size failed");
|
||||
|
||||
size_t y_size = IntToSize(seq_len_ * batch_size_ * hidden_size_ * (bidirectional_ ? 2 : 1)) * sizeof(T);
|
||||
input_size_list_.push_back(x_size);
|
||||
input_size_list_.push_back(h_size);
|
||||
input_size_list_.push_back(y_size);
|
||||
input_size_list_.push_back(reserved_size_);
|
||||
size_t state_size = 0;
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnDropoutGetStatesSize(handle_, &state_size),
|
||||
"get dropout states size failed");
|
||||
input_size_list_.push_back(state_size);
|
||||
|
||||
output_size_list_.push_back(weight_size_);
|
||||
|
||||
size_t workspace_size = 0;
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudnnGetRNNWorkspaceSize(handle_, rnn_desc_, seq_len_, x_desc_.get(), &workspace_size),
|
||||
"get workspace size failed");
|
||||
workspace_size_list_.push_back(workspace_size);
|
||||
}
|
||||
void DestroyResource() noexcept override {
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyRNNDescriptor(rnn_desc_), "destroy rnn_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyDropoutDescriptor(dropout_desc_),
|
||||
"destroy dropout_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyFilterDescriptor(dw_desc_), "destroy dw_desc_ failed");
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(hx_desc_), "destroy hx_desc_ failed");
|
||||
DestroyTensorDescGrp();
|
||||
}
|
||||
|
||||
private:
|
||||
void CreateTensorDescGrp() {
|
||||
int x_dims[3]{batch_size_, input_size_, 1};
|
||||
int y_dims[3]{batch_size_, hidden_size_ * (bidirectional_ ? 2 : 1), 1};
|
||||
|
||||
x_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
|
||||
y_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
|
||||
|
||||
for (size_t i = 0; i < IntToSize(seq_len_); ++i) {
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&x_desc_[i]), "create x_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_,
|
||||
cudnnSetTensorNdDescriptorEx(x_desc_[i], CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, x_dims),
|
||||
"set x_desc failed");
|
||||
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&y_desc_[i]), "create y_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||
kernel_node_,
|
||||
cudnnSetTensorNdDescriptorEx(y_desc_[i], CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, y_dims),
|
||||
"set y_desc failed");
|
||||
}
|
||||
}
|
||||
void DestroyTensorDescGrp() {
|
||||
for (size_t i = 0; i < IntToSize(seq_len_); ++i) {
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(y_desc_[i]), "destroy y_desc failed");
|
||||
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(x_desc_[i]), "destroy x_desc failed");
|
||||
}
|
||||
}
|
||||
|
||||
int batch_size_;
|
||||
int seq_len_;
|
||||
int input_size_;
|
||||
int hidden_size_;
|
||||
int num_layers_;
|
||||
|
||||
bool has_bias_;
|
||||
bool bidirectional_;
|
||||
bool states_init_;
|
||||
bool is_null_input_;
|
||||
float dropout_;
|
||||
|
||||
size_t weight_size_;
|
||||
size_t reserved_size_;
|
||||
|
||||
cudnnRNNDescriptor_t rnn_desc_;
|
||||
cudnnDropoutDescriptor_t dropout_desc_;
|
||||
|
||||
// input desc
|
||||
std::unique_ptr<cudnnTensorDescriptor_t[]> x_desc_;
|
||||
cudnnTensorDescriptor_t hx_desc_;
|
||||
std::unique_ptr<cudnnTensorDescriptor_t[]> y_desc_;
|
||||
|
||||
// output desc
|
||||
cudnnFilterDescriptor_t dw_desc_;
|
||||
|
||||
cudnnHandle_t handle_;
|
||||
cudnnDataType_t cudnn_data_type_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_GRU_GRAD_WEIGHT_GPU_KERNEL_H_
|
|
@ -23,6 +23,7 @@ from ...common import dtype as mstype
|
|||
from ..composite.multitype_ops.zeros_like_impl import zeros_like
|
||||
from ..operations import _grad_ops as G
|
||||
from ..operations import _inner_ops as inner
|
||||
from ..operations import _rl_inner_ops as rl_ops
|
||||
from ... import context
|
||||
from .._utils.utils import range_op, get_1d_shape
|
||||
|
||||
|
@ -955,6 +956,37 @@ def get_bprop_lstm(self):
|
|||
return bprop
|
||||
|
||||
|
||||
@bprop_getters.register(rl_ops.CudnnGRU)
|
||||
def get_bprop_gru(self):
|
||||
"""Grad definition for `GRU` operation."""
|
||||
gru_grad_data = G.GruGradData(
|
||||
input_size=self.input_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_layers=self.num_layers,
|
||||
has_bias=self.has_bias,
|
||||
bidirectional=self.bidirectional,
|
||||
dropout=self.dropout
|
||||
)
|
||||
|
||||
gru_grad_weight = G.GruGradWeight(
|
||||
input_size=self.input_size,
|
||||
hidden_size=self.hidden_size,
|
||||
num_layers=self.num_layers,
|
||||
has_bias=self.has_bias,
|
||||
bidirectional=self.bidirectional,
|
||||
dropout=self.dropout
|
||||
)
|
||||
|
||||
def bprop(x, hx, w, out, dout):
|
||||
y, _, reserve, state = out
|
||||
dy, dhy, _, _ = dout
|
||||
dx, dhx = gru_grad_data(y, dy, dhy, w, hx, reserve, state)
|
||||
dw = gru_grad_weight(F.depend(x, dx), hx, y, reserve, state)
|
||||
return dx, dhx, dw
|
||||
|
||||
return bprop
|
||||
|
||||
|
||||
@bprop_getters.register(P.DynamicRNN)
|
||||
def get_bprop_dynamic_rnn(self):
|
||||
"""Grad definition for `DynamicRNN` operation."""
|
||||
|
|
|
@ -1,20 +1,24 @@
|
|||
|
||||
0.1.0 MindSpore*1.6.0:·
|
||||
›
|
||||
bprop.111:dout
|
||||
bprop.111:y
|
||||
bprop.111:keep_probbprop.111:[CNode]112:1bprop.111:[CNode]112:1"S-Prim-DropoutDoMask:!Default/S-Prim-DropoutDoMask-op80
|
||||
<EFBFBD>
|
||||
bprop.111:ybprop.111:[CNode]113:2bprop.111:[CNode]113:2"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op81
|
||||
˜
|
||||
bprop.111:keep_probbprop.111:[CNode]114:3bprop.111:[CNode]114:3"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op82
|
||||
©
|
||||
bprop.111:[CNode]112:1
|
||||
bprop.111:[CNode]113:2
|
||||
bprop.111:[CNode]114:3bprop.111:[CNode]115:4bprop.111:[CNode]115:4"S-Prim-MakeTuple:Default/S-Prim-MakeTuple-op83 bprop.111*
|
||||
bprop.111:x*
|
||||
bprop.111:y*
|
||||
bprop.111:keep_prob*
|
||||
bprop.111:out*
|
||||
bprop.111:dout2
|
||||
bprop.111:[CNode]115:4:@ff3ca86129a77e54496a41a353e9f6355edfba79f3c040cf990522596d638b7fP
|
||||
0.1.0 MindSpore*1.6.0:”
|
||||
”
|
||||
bprop.13:dout
|
||||
|
||||
bprop.13:y
|
||||
bprop.13:keep_probbprop.13:[CNode]14:1bprop.13:[CNode]14:1"S-Prim-DropoutDoMask:!Default/S-Prim-DropoutDoMask-op10
|
||||
‹
|
||||
|
||||
bprop.13:ybprop.13:[CNode]15:2bprop.13:[CNode]15:2"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op11
|
||||
“
|
||||
bprop.13:keep_probbprop.13:[CNode]16:3bprop.13:[CNode]16:3"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op12
|
||||
Ÿ
|
||||
bprop.13:[CNode]14:1
|
||||
bprop.13:[CNode]15:2
|
||||
bprop.13:[CNode]16:3bprop.13:[CNode]17:4bprop.13:[CNode]17:4"S-Prim-MakeTuple:Default/S-Prim-MakeTuple-op13bprop.13*
|
||||
|
||||
bprop.13:x*
|
||||
|
||||
bprop.13:y*
|
||||
bprop.13:keep_prob*
|
||||
bprop.13:out*
|
||||
bprop.13:dout2
|
||||
bprop.13:[CNode]17:4:@eb722f0a05359151f2578b4ad7b0d067988045071352718c64ca6cf29bc5008aP
|
|
@ -1,14 +1,14 @@
|
|||
|
||||
0.1.0 MindSpore*1.6.0:á
|
||||
<EFBFBD>
|
||||
bprop.51:shapebprop.51:[CNode]52:1bprop.51:[CNode]52:1"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op35
|
||||
“
|
||||
bprop.51:keep_probbprop.51:[CNode]53:2bprop.51:[CNode]53:2"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op36
|
||||
0.1.0 MindSpore*1.6.0:Å
|
||||
<EFBFBD>
|
||||
bprop.51:[CNode]52:1
|
||||
bprop.51:[CNode]53:2bprop.51:[CNode]54:3bprop.51:[CNode]54:3"S-Prim-MakeTuple:Default/S-Prim-MakeTuple-op37bprop.51*
|
||||
bprop.51:shape*
|
||||
bprop.51:keep_prob*
|
||||
bprop.51:out*
|
||||
bprop.51:dout2
|
||||
bprop.51:[CNode]54:3:@ff3ca86129a77e54496a41a353e9f6355edfba79f3c040cf990522596d638b7fP
|
||||
bprop.3:shapebprop.3:[CNode]4:1bprop.3:[CNode]4:1"!S-Prim-hyper_map[zeros_like_leaf]:-Default/S-Prim-hyper_map[zeros_like_leaf]-op2
|
||||
<EFBFBD>
|
||||
bprop.3:keep_probbprop.3:[CNode]5:2bprop.3:[CNode]5:2"!S-Prim-hyper_map[zeros_like_leaf]:-Default/S-Prim-hyper_map[zeros_like_leaf]-op3
|
||||
€
|
||||
bprop.3:[CNode]4:1
|
||||
bprop.3:[CNode]5:2bprop.3:[CNode]6:3bprop.3:[CNode]6:3"S-Prim-MakeTuple:Default/S-Prim-MakeTuple-op4bprop.3*
|
||||
bprop.3:shape*
|
||||
bprop.3:keep_prob*
|
||||
bprop.3:out*
|
||||
bprop.3:dout2
|
||||
bprop.3:[CNode]6:3:@eb722f0a05359151f2578b4ad7b0d067988045071352718c64ca6cf29bc5008aP
|
|
@ -1,22 +1,22 @@
|
|||
|
||||
0.1.0 MindSpore*1.6.0:ß
|
||||
‘
|
||||
bprop.55:indicesbprop.55:[CNode]56:1bprop.55:[CNode]56:1"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op38
|
||||
0.1.0 MindSpore*1.6.0:º
|
||||
‹
|
||||
bprop.7:indicesbprop.7:[CNode]8:1bprop.7:[CNode]8:1"!S-Prim-hyper_map[zeros_like_leaf]:-Default/S-Prim-hyper_map[zeros_like_leaf]-op5
|
||||
‰
|
||||
bprop.7:depthbprop.7:[CNode]9:2bprop.7:[CNode]9:2"!S-Prim-hyper_map[zeros_like_leaf]:-Default/S-Prim-hyper_map[zeros_like_leaf]-op6
|
||||
Ž
|
||||
bprop.7:on_valuebprop.7:[CNode]10:3bprop.7:[CNode]10:3"!S-Prim-hyper_map[zeros_like_leaf]:-Default/S-Prim-hyper_map[zeros_like_leaf]-op7
|
||||
<EFBFBD>
|
||||
bprop.55:depthbprop.55:[CNode]57:2bprop.55:[CNode]57:2"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op39
|
||||
’
|
||||
bprop.55:on_valuebprop.55:[CNode]58:3bprop.55:[CNode]58:3"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op40
|
||||
“
|
||||
bprop.55:off_valuebprop.55:[CNode]59:4bprop.55:[CNode]59:4"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op41
|
||||
µ
|
||||
bprop.55:[CNode]56:1
|
||||
bprop.55:[CNode]57:2
|
||||
bprop.55:[CNode]58:3
|
||||
bprop.55:[CNode]59:4bprop.55:[CNode]60:5bprop.55:[CNode]60:5"S-Prim-MakeTuple:Default/S-Prim-MakeTuple-op42bprop.55*
|
||||
bprop.55:indices*
|
||||
bprop.55:depth*
|
||||
bprop.55:on_value*
|
||||
bprop.55:off_value*
|
||||
bprop.55:out*
|
||||
bprop.55:dout2
|
||||
bprop.55:[CNode]60:5:@ff3ca86129a77e54496a41a353e9f6355edfba79f3c040cf990522596d638b7fP
|
||||
bprop.7:off_valuebprop.7:[CNode]11:4bprop.7:[CNode]11:4"!S-Prim-hyper_map[zeros_like_leaf]:-Default/S-Prim-hyper_map[zeros_like_leaf]-op8
|
||||
¬
|
||||
bprop.7:[CNode]8:1
|
||||
bprop.7:[CNode]9:2
|
||||
bprop.7:[CNode]10:3
|
||||
bprop.7:[CNode]11:4bprop.7:[CNode]12:5bprop.7:[CNode]12:5"S-Prim-MakeTuple:Default/S-Prim-MakeTuple-op9bprop.7*
|
||||
bprop.7:indices*
|
||||
bprop.7:depth*
|
||||
bprop.7:on_value*
|
||||
bprop.7:off_value*
|
||||
bprop.7:out*
|
||||
bprop.7:dout2
|
||||
bprop.7:[CNode]12:5:@eb722f0a05359151f2578b4ad7b0d067988045071352718c64ca6cf29bc5008aP
|
Binary file not shown.
|
@ -11,4 +11,4 @@ f
|
|||
bprop.1:x*
|
||||
bprop.1:out*
|
||||
bprop.1:dout2
|
||||
bprop.1:[CNode]2:2:@ff3ca86129a77e54496a41a353e9f6355edfba79f3c040cf990522596d638b7fP
|
||||
bprop.1:[CNode]2:2:@eb722f0a05359151f2578b4ad7b0d067988045071352718c64ca6cf29bc5008aP
|
|
@ -1,18 +1,21 @@
|
|||
|
||||
0.1.0 MindSpore*1.6.0:¶
|
||||
ä
|
||||
bprop.140:dout
|
||||
bprop.140:ybprop.140:dgrad:1bprop.140:dgrad:1"S-Prim-ReluGrad*0
|
||||
0.1.0 MindSpore*1.6.0:œ
|
||||
ß
|
||||
bprop.18:dout
|
||||
|
||||
bprop.18:ybprop.18:dgrad:1bprop.18:dgrad:1"S-Prim-ReluGrad*0
|
||||
output_namesZoutputzscalar:List[value1,],€*=
|
||||
input_namesZ
|
||||
y_backpropZxzscalar:List[value1,value2,],€:Default/S-Prim-ReluGrad-op101
|
||||
‘
|
||||
bprop.140:ybprop.140:[CNode]141:2bprop.140:[CNode]141:2"!S-Prim-hyper_map[zeros_like_leaf]:/Default/S-Prim-hyper_map[zeros_like_leaf]-op102
|
||||
<EFBFBD>
|
||||
bprop.140:dgrad:1
|
||||
bprop.140:[CNode]141:2bprop.140:[CNode]142:3bprop.140:[CNode]142:3"S-Prim-MakeTuple:Default/S-Prim-MakeTuple-op103 bprop.140*
|
||||
bprop.140:grad*
|
||||
bprop.140:y*
|
||||
bprop.140:out*
|
||||
bprop.140:dout2
|
||||
bprop.140:[CNode]142:3:@ff3ca86129a77e54496a41a353e9f6355edfba79f3c040cf990522596d638b7fP
|
||||
y_backpropZxzscalar:List[value1,value2,],€:Default/S-Prim-ReluGrad-op14
|
||||
‹
|
||||
|
||||
bprop.18:ybprop.18:[CNode]19:2bprop.18:[CNode]19:2"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op15
|
||||
…
|
||||
bprop.18:dgrad:1
|
||||
bprop.18:[CNode]19:2bprop.18:[CNode]20:3bprop.18:[CNode]20:3"S-Prim-MakeTuple:Default/S-Prim-MakeTuple-op16bprop.18*
|
||||
bprop.18:grad*
|
||||
|
||||
bprop.18:y*
|
||||
bprop.18:out*
|
||||
bprop.18:dout2
|
||||
bprop.18:[CNode]20:3:@eb722f0a05359151f2578b4ad7b0d067988045071352718c64ca6cf29bc5008aP
|
|
@ -1381,6 +1381,83 @@ class DynamicRNNGrad(PrimitiveWithInfer):
|
|||
return x_dtype, x_dtype, x_dtype, x_dtype, x_dtype
|
||||
|
||||
|
||||
class GruGradData(PrimitiveWithInfer):
|
||||
"""Computes the data gradients of GRU."""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
|
||||
self.input_size = validator.check_positive_int(input_size, 'input_size', self.name)
|
||||
self.hidden_size = validator.check_positive_int(hidden_size, 'hidden_size', self.name)
|
||||
self.num_layers = validator.check_positive_int(num_layers, 'num_layers', self.name)
|
||||
self.has_bias = validator.check_value_type('has_bias', has_bias, (bool,), self.name)
|
||||
self.bidirectional = validator.check_value_type('bidirectional', bidirectional, (bool,), self.name)
|
||||
self.dropout = validator.check_value_type("dropout", dropout, [float], self.name)
|
||||
self.dropout = validator.check_float_range(dropout, 0, 1, Rel.INC_BOTH, 'dropout', self.name)
|
||||
|
||||
if bidirectional:
|
||||
self.num_directions = 2
|
||||
else:
|
||||
self.num_directions = 1
|
||||
|
||||
def infer_shape(self, y_shape, dy_shape, dhy_shape, w_shape,
|
||||
hx_shape, reserve_shape, state_shape):
|
||||
# dhy and dcy should be same shape
|
||||
validator.check_equal_int(len(dhy_shape), 3, "h_shape", self.name)
|
||||
|
||||
validator.check_int(dhy_shape[0], self.num_layers * self.num_directions, Rel.EQ, "h_shape[0]", self.name)
|
||||
validator.check_equal_int(dhy_shape[2], self.hidden_size, "h_shape[2]", self.name)
|
||||
|
||||
validator.check_equal_int(len(dy_shape), 3, "dy_shape", self.name)
|
||||
validator.check_equal_int(dy_shape[1], dhy_shape[1], "dy[1]", self.name)
|
||||
validator.check_int(dy_shape[2], self.hidden_size * self.num_directions, Rel.EQ, "dy[2]", self.name)
|
||||
|
||||
dx_shape = (y_shape[0], y_shape[1], self.input_size)
|
||||
dhx_shape = dhy_shape
|
||||
|
||||
return (dx_shape, dhx_shape)
|
||||
|
||||
def infer_dtype(self, y_dtype, dy_dtype, dhy_dtype, w_dtype,
|
||||
hx_dtype, reserve_dtype, state_dtype):
|
||||
args = {"dy": dy_dtype, "dhy": dhy_dtype}
|
||||
validator.check_tensors_dtypes_same_and_valid(args, (mstype.float32, mstype.float16), self.name)
|
||||
return (dy_dtype, dy_dtype)
|
||||
|
||||
|
||||
class GruGradWeight(PrimitiveWithInfer):
|
||||
"""Computes the weight gradients of GRU."""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
|
||||
self.input_size = validator.check_positive_int(input_size, 'input_size', self.name)
|
||||
self.hidden_size = validator.check_positive_int(hidden_size, 'hidden_size', self.name)
|
||||
self.num_layers = validator.check_positive_int(num_layers, 'num_layers', self.name)
|
||||
self.has_bias = validator.check_value_type('has_bias', has_bias, (bool,), self.name)
|
||||
self.bidirectional = validator.check_value_type('bidirectional', bidirectional, (bool,), self.name)
|
||||
self.dropout = validator.check_value_type("dropout", dropout, [float], self.name)
|
||||
self.dropout = validator.check_float_range(dropout, 0, 1, Rel.INC_BOTH, 'dropout', self.name)
|
||||
|
||||
if bidirectional:
|
||||
self.num_directions = 2
|
||||
else:
|
||||
self.num_directions = 1
|
||||
|
||||
def infer_shape(self, x_shape, hx_shape, y_shape, reserve_shape, state_shape):
|
||||
weight_size = 0
|
||||
gate_size = 3 * self.hidden_size
|
||||
for layer in range(self.num_layers):
|
||||
for _ in range(self.num_directions):
|
||||
input_layer_size = self.input_size if layer == 0 else self.hidden_size * self.num_directions
|
||||
weight_size += gate_size * input_layer_size
|
||||
weight_size += gate_size * self.hidden_size
|
||||
if self.has_bias:
|
||||
weight_size += 2 * gate_size
|
||||
|
||||
return (weight_size, 1, 1)
|
||||
|
||||
def infer_dtype(self, x_dtype, hx_dtype, y_dtype, reserve_dtype, state_dtype):
|
||||
return hx_dtype
|
||||
|
||||
|
||||
class DynamicGRUV2Grad(PrimitiveWithInfer):
|
||||
r"""
|
||||
Computes the input gradients of DynamicGRUV2.
|
||||
|
|
Loading…
Reference in New Issue