!27835 Add Gru GPU fused ops backward by using cudnn api

Merge pull request !27835 from YijieChen/gru_ops_back
This commit is contained in:
i-robot 2021-12-23 06:28:07 +00:00 committed by Gitee
commit b6081c5fb9
12 changed files with 893 additions and 67 deletions

View File

@ -0,0 +1,46 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/rl/gru_grad_data_gpu_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_ONE(GruGradData,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
GruGradDataGpuKernel, float)
MS_REG_GPU_KERNEL_ONE(GruGradData,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16),
GruGradDataGpuKernel, half)
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,341 @@
/**
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_GRU_GRAD_DATA_GPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_GRU_GRAD_DATA_GPU_KERNEL_H_
#include <cuda_runtime_api.h>
#include <vector>
#include <memory>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/kernel_constants.h"
namespace mindspore {
namespace kernel {
constexpr size_t kIndexEight = 8;
constexpr size_t DimOfTensor = 3;
constexpr size_t LeastWeightShape = 3;
constexpr size_t LeastInputShapeSize = 2;
template <typename T>
class GruGradDataGpuKernel : public GpuKernel {
public:
GruGradDataGpuKernel()
: batch_size_(0),
seq_len_(0),
input_size_(0),
hidden_size_(0),
num_layers_(0),
has_bias_(false),
bidirectional_(false),
states_init_(false),
is_null_input_(false),
dropout_(0),
weight_size_(0),
reserved_size_(0),
rnn_desc_(nullptr),
y_desc_(nullptr),
dy_desc_(nullptr),
dhy_desc_(nullptr),
dcy_desc_(nullptr),
w_desc_(nullptr),
hx_desc_(nullptr),
cx_desc_(nullptr),
dropout_desc_(nullptr),
dx_desc_(nullptr),
dhx_desc_(nullptr),
dcx_desc_(nullptr),
handle_(nullptr),
cudnn_data_type_(CUDNN_DATA_FLOAT) {}
~GruGradDataGpuKernel() override { DestroyResource(); }
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
auto y_addr = GetDeviceAddress<T>(inputs, 0);
auto dy_addr = GetDeviceAddress<T>(inputs, 1);
auto dhy_addr = GetDeviceAddress<T>(inputs, 2);
auto dcy_addr = nullptr;
auto w_addr = GetDeviceAddress<T>(inputs, 3);
auto hx_addr = GetDeviceAddress<T>(inputs, 4);
auto cx_addr = nullptr;
auto reserved_addr = GetDeviceAddress<T>(inputs, 5);
auto states_addr = GetDeviceAddress<T>(inputs, 6);
auto dx_addr = GetDeviceAddress<T>(outputs, 0);
auto dhx_addr = GetDeviceAddress<T>(outputs, 1);
auto dcx_addr = nullptr;
void *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
if (!states_init_) {
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_,
cudnnRestoreDropoutDescriptor(dropout_desc_, handle_, dropout_, states_addr, input_size_list_[kIndexEight], 0),
"restore dropout state failed");
states_init_ = true;
}
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_,
cudnnRNNBackwardData(handle_, rnn_desc_, seq_len_, y_desc_.get(), y_addr, dy_desc_.get(), dy_addr, dhy_desc_,
dhy_addr, dcy_desc_, dcy_addr, w_desc_, w_addr, hx_desc_, hx_addr, cx_desc_, cx_addr,
dx_desc_.get(), dx_addr, dhx_desc_, dhx_addr, dcx_desc_, dcx_addr, workspace_addr,
workspace_size_list_[0], reserved_addr, reserved_size_),
"launch gru back data kernel failed");
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr)),
"stream synchronize failed.");
return true;
}
void GetAttrs(const CNodePtr &kernel_node) {
input_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "input_size"));
hidden_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "hidden_size"));
num_layers_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "num_layers"));
has_bias_ = GetAttr<bool>(kernel_node, "has_bias");
bidirectional_ = GetAttr<bool>(kernel_node, "bidirectional");
dropout_ = GetAttr<float>(kernel_node, "dropout");
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();
return true;
}
if (input_shape.size() < LeastInputShapeSize) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 2, but got "
<< input_shape.size();
}
seq_len_ = SizeToInt(input_shape[0]);
batch_size_ = SizeToInt(input_shape[1]);
GetAttrs(kernel_node);
cudnnRNNInputMode_t input_mode = CUDNN_LINEAR_INPUT;
cudnnDirectionMode_t direction = bidirectional_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL;
cudnnRNNMode_t rnn_mode = CUDNN_GRU;
cudnnRNNAlgo_t algo = CUDNN_RNN_ALGO_STANDARD;
CreateTensorDescGrp();
int hx_dims[3]{num_layers_ * (bidirectional_ ? 2 : 1), batch_size_, hidden_size_};
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_, cudnnSetTensorNdDescriptorEx(dhy_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, hx_dims),
"set dhy_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_, cudnnSetTensorNdDescriptorEx(dcy_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, hx_dims),
"set dcy_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_, cudnnSetTensorNdDescriptorEx(hx_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, hx_dims),
"set hx_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_, cudnnSetTensorNdDescriptorEx(cx_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, hx_dims),
"set cx_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_, cudnnSetTensorNdDescriptorEx(dhx_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, hx_dims),
"set dhx_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_, cudnnSetTensorNdDescriptorEx(dcx_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, hx_dims),
"set dcx_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetDropoutDescriptor(dropout_desc_, handle_, dropout_, nullptr, 0, 0),
"set dropout_desc failed");
cudnnRNNBiasMode_t bias_mode = has_bias_ ? CUDNN_RNN_DOUBLE_BIAS : CUDNN_RNN_NO_BIAS;
#if CUDNN_VERSION < 8000
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetRNNDescriptor_v6(handle_, rnn_desc_, hidden_size_, num_layers_, dropout_desc_,
input_mode, direction, rnn_mode, algo, cudnn_data_type_),
"set rnn_desc failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetRNNBiasMode(rnn_desc_, bias_mode), "set bias_mode failed");
#else
cudnnMathType_t math_type = (cudnn_data_type_ == CUDNN_DATA_HALF) ? CUDNN_TENSOR_OP_MATH : CUDNN_FMA_MATH;
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetRNNDescriptor_v8(rnn_desc_, algo, rnn_mode, bias_mode, direction, input_mode,
cudnn_data_type_, cudnn_data_type_, math_type, input_size_,
hidden_size_, hidden_size_, num_layers_, dropout_desc_, 0),
"set rnn_desc failed");
#endif
auto weight_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "weight");
if (is_null_input_) {
InitSizeLists();
return true;
}
if (weight_shape.size() < LeastWeightShape) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of weight cannot be less than 3, but got "
<< weight_shape.size();
}
size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[2] * sizeof(T);
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnGetRNNParamsSize(handle_, rnn_desc_, dx_desc_[0], &weight_size_, cudnn_data_type_),
"get weight_size_ failed");
if (weight_size != weight_size_) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the size of weight should be equal to " << weight_size_
<< " but got " << weight_size;
}
int w_dims[3] = {SizeToInt(weight_size_ / sizeof(T)), 1, 1};
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_, cudnnSetFilterNdDescriptor(w_desc_, cudnn_data_type_, CUDNN_TENSOR_NCHW, DimOfTensor, w_dims),
"set w_desc failed");
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_, cudnnGetRNNTrainingReserveSize(handle_, rnn_desc_, seq_len_, dx_desc_.get(), &reserved_size_),
"get size failed");
InitSizeLists();
return true;
}
void DestroyResource() noexcept override {
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyRNNDescriptor(rnn_desc_), "destroy rnn_desc failed");
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyDropoutDescriptor(dropout_desc_),
"destroy dropout_desc failed");
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(dcx_desc_), "destroy dcx_desc_ failed");
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(dhx_desc_), "destroy dhx_desc_ failed");
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyFilterDescriptor(w_desc_), "destroy w_desc_ failed");
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(cx_desc_), "destroy cx_desc_ failed");
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(hx_desc_), "destroy hx_desc_ failed");
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(dcy_desc_), "destroy dcy_desc_ failed");
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(dhy_desc_), "destroy dhy_desc_ failed");
DestroyTensorDescGrp();
}
protected:
void InitResource() override {
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&dhy_desc_), "create dhy_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&dcy_desc_), "create dcy_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&hx_desc_), "create hx_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&cx_desc_), "create cx_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateFilterDescriptor(&w_desc_), "create w_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&dhx_desc_), "create dhx_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&dcx_desc_), "create dcx_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateDropoutDescriptor(&dropout_desc_),
"create dropout_desc failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateRNNDescriptor(&rnn_desc_), "create rnn_desc failed");
}
void InitSizeLists() override {
size_t y_size = IntToSize(seq_len_ * batch_size_ * hidden_size_ * (bidirectional_ ? 2 : 1)) * sizeof(T);
size_t h_size = 0;
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnGetTensorSizeInBytes(hx_desc_, &h_size), "get h size failed");
input_size_list_.push_back(y_size);
input_size_list_.push_back(y_size);
input_size_list_.push_back(h_size);
input_size_list_.push_back(h_size);
input_size_list_.push_back(weight_size_);
input_size_list_.push_back(h_size);
input_size_list_.push_back(h_size);
input_size_list_.push_back(reserved_size_);
size_t state_size = 0;
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnDropoutGetStatesSize(handle_, &state_size),
"get dropout states size failed");
input_size_list_.push_back(state_size);
size_t x_size = IntToSize(seq_len_ * batch_size_ * input_size_) * sizeof(T);
output_size_list_.push_back(x_size);
output_size_list_.push_back(h_size);
size_t workspace_size = 0;
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnGetRNNWorkspaceSize(handle_, rnn_desc_, seq_len_, dx_desc_.get(), &workspace_size),
"get workspace size failed");
workspace_size_list_.push_back(workspace_size);
}
private:
void CreateTensorDescGrp() {
int x_dims[3]{batch_size_, input_size_, 1};
int y_dims[3]{batch_size_, hidden_size_ * (bidirectional_ ? 2 : 1), 1};
dx_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
y_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
dy_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
for (size_t i = 0; i < IntToSize(seq_len_); ++i) {
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&dx_desc_[i]), "create x_desc failed");
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_,
cudnnSetTensorNdDescriptorEx(dx_desc_[i], CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, x_dims),
"set dx_desc failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&y_desc_[i]), "create y_desc failed");
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_,
cudnnSetTensorNdDescriptorEx(y_desc_[i], CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, y_dims),
"set y_desc failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&dy_desc_[i]), "create dy_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_,
cudnnSetTensorNdDescriptorEx(dy_desc_[i], CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, y_dims),
"set dy_desc_ failed");
}
}
void DestroyTensorDescGrp() {
for (size_t i = 0; i < IntToSize(seq_len_); ++i) {
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(dy_desc_[i]), "destroy dy_desc failed");
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(y_desc_[i]), "destroy y_desc failed");
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(dx_desc_[i]), "destroy x_desc failed");
}
}
int batch_size_;
int seq_len_;
int input_size_;
int hidden_size_;
int num_layers_;
bool has_bias_;
bool bidirectional_;
bool states_init_;
bool is_null_input_;
float dropout_;
size_t weight_size_;
size_t reserved_size_;
cudnnRNNDescriptor_t rnn_desc_;
// input desc
std::unique_ptr<cudnnTensorDescriptor_t[]> y_desc_;
std::unique_ptr<cudnnTensorDescriptor_t[]> dy_desc_;
cudnnTensorDescriptor_t dhy_desc_;
cudnnTensorDescriptor_t dcy_desc_;
cudnnFilterDescriptor_t w_desc_;
cudnnTensorDescriptor_t hx_desc_;
cudnnTensorDescriptor_t cx_desc_;
cudnnDropoutDescriptor_t dropout_desc_;
// output desc
std::unique_ptr<cudnnTensorDescriptor_t[]> dx_desc_;
cudnnTensorDescriptor_t dhx_desc_;
cudnnTensorDescriptor_t dcx_desc_;
cudnnHandle_t handle_;
cudnnDataType_t cudnn_data_type_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_GRU_GRAD_DATA_GPU_KERNEL_H_

View File

@ -0,0 +1,40 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/rl/gru_grad_weight_gpu_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_ONE(GruGradWeight,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
GruGradWeightGpuKernel, float)
MS_REG_GPU_KERNEL_ONE(GruGradWeight,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16),
GruGradWeightGpuKernel, half)
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,283 @@
/**
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_GRU_GRAD_WEIGHT_GPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_GRU_GRAD_WEIGHT_GPU_KERNEL_H_
#include <cuda_runtime_api.h>
#include <vector>
#include <memory>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/kernel_constants.h"
namespace mindspore {
namespace kernel {
constexpr size_t kIndexFour = 4;
constexpr size_t DimOfTensor = 3;
constexpr size_t LeastWeightShape = 3;
constexpr size_t LeastInputShapeSize = 2;
template <typename T>
class GruGradWeightGpuKernel : public GpuKernel {
public:
GruGradWeightGpuKernel()
: batch_size_(0),
seq_len_(0),
input_size_(0),
hidden_size_(0),
num_layers_(0),
has_bias_(false),
bidirectional_(false),
states_init_(false),
is_null_input_(false),
dropout_(0),
weight_size_(0),
reserved_size_(0),
rnn_desc_(nullptr),
dropout_desc_(nullptr),
x_desc_(nullptr),
hx_desc_(nullptr),
y_desc_(nullptr),
dw_desc_(nullptr),
handle_(nullptr),
cudnn_data_type_(CUDNN_DATA_FLOAT) {}
~GruGradWeightGpuKernel() override { DestroyResource(); }
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
}
auto x_addr = GetDeviceAddress<T>(inputs, 0);
auto hx_addr = GetDeviceAddress<T>(inputs, 1);
auto y_addr = GetDeviceAddress<T>(inputs, 2);
auto reserved_addr = GetDeviceAddress<T>(inputs, 3);
auto states_addr = GetDeviceAddress<T>(inputs, 4);
auto dw_addr = GetDeviceAddress<T>(outputs, 0);
void *workspace_addr = GetDeviceAddress<T>(workspace, 0);
if (!states_init_) {
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_,
cudnnRestoreDropoutDescriptor(dropout_desc_, handle_, dropout_, states_addr, input_size_list_[kIndexFour], 0),
"restore dropout state failed");
states_init_ = true;
}
CHECK_CUDA_RET_WITH_EXCEPT(
kernel_node_, cudaMemsetAsync(dw_addr, 0, outputs[0]->size, reinterpret_cast<cudaStream_t>(stream_ptr)),
"cudaMemSet Failed");
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_,
cudnnRNNBackwardWeights(handle_, rnn_desc_, seq_len_, x_desc_.get(), x_addr, hx_desc_, hx_addr, y_desc_.get(),
y_addr, workspace_addr, workspace_size_list_[0], dw_desc_, dw_addr, reserved_addr,
reserved_size_),
"launch gru back weight kernel failed");
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();
return true;
}
if (input_shape.size() < LeastInputShapeSize) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of input cannot be less than 2, but got "
<< input_shape.size();
}
seq_len_ = SizeToInt(input_shape[0]);
batch_size_ = SizeToInt(input_shape[1]);
input_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "input_size"));
hidden_size_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "hidden_size"));
num_layers_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "num_layers"));
has_bias_ = GetAttr<bool>(kernel_node, "has_bias");
bidirectional_ = GetAttr<bool>(kernel_node, "bidirectional");
dropout_ = GetAttr<float>(kernel_node, "dropout");
cudnnRNNInputMode_t input_mode = CUDNN_LINEAR_INPUT;
cudnnDirectionMode_t direction = bidirectional_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL;
cudnnRNNMode_t rnn_mode = CUDNN_GRU;
cudnnRNNAlgo_t algo = CUDNN_RNN_ALGO_STANDARD;
CreateTensorDescGrp();
int hx_dims[3]{num_layers_ * (bidirectional_ ? 2 : 1), batch_size_, hidden_size_};
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_, cudnnSetTensorNdDescriptorEx(hx_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, hx_dims),
"set hx_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetDropoutDescriptor(dropout_desc_, handle_, dropout_, nullptr, 0, 0),
"set dropout_desc failed");
cudnnRNNBiasMode_t bias_mode = has_bias_ ? CUDNN_RNN_DOUBLE_BIAS : CUDNN_RNN_NO_BIAS;
#if CUDNN_VERSION < 8000
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetRNNDescriptor_v6(handle_, rnn_desc_, hidden_size_, num_layers_, dropout_desc_,
input_mode, direction, rnn_mode, algo, cudnn_data_type_),
"set rnn_desc failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetRNNBiasMode(rnn_desc_, bias_mode), "set bias_mode failed");
#else
cudnnMathType_t math_type = (cudnn_data_type_ == CUDNN_DATA_HALF) ? CUDNN_TENSOR_OP_MATH : CUDNN_FMA_MATH;
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetRNNDescriptor_v8(rnn_desc_, algo, rnn_mode, bias_mode, direction, input_mode,
cudnn_data_type_, cudnn_data_type_, math_type, input_size_,
hidden_size_, hidden_size_, num_layers_, dropout_desc_, 0),
"set rnn_desc failed");
#endif
auto weight_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(weight_shape, kernel_name, "weight");
if (is_null_input_) {
InitSizeLists();
return true;
}
if (weight_shape.size() < LeastWeightShape) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the dimension of weight cannot be less than 3, but got "
<< weight_shape.size();
}
size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[2] * sizeof(T);
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnGetRNNParamsSize(handle_, rnn_desc_, x_desc_[0], &weight_size_, cudnn_data_type_),
"get weight_size_ failed");
if (weight_size != weight_size_) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the size of weight should be equal to " << weight_size_
<< " but got " << weight_size;
}
int w_dims[3] = {SizeToInt(weight_size_ / sizeof(T)), 1, 1};
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_, cudnnSetFilterNdDescriptor(dw_desc_, cudnn_data_type_, CUDNN_TENSOR_NCHW, DimOfTensor, w_dims),
"set dw_desc failed");
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_, cudnnGetRNNTrainingReserveSize(handle_, rnn_desc_, seq_len_, x_desc_.get(), &reserved_size_),
"get reserve size failed");
InitSizeLists();
return true;
}
protected:
void InitResource() override {
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&hx_desc_), "create hx_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateFilterDescriptor(&dw_desc_), "create dw_desc_ failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateDropoutDescriptor(&dropout_desc_),
"create dropout_desc failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateRNNDescriptor(&rnn_desc_), "create rnn_desc failed");
}
void InitSizeLists() override {
size_t x_size = IntToSize(seq_len_ * batch_size_ * input_size_) * sizeof(T);
size_t h_size = 0;
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnGetTensorSizeInBytes(hx_desc_, &h_size), "get h size failed");
size_t y_size = IntToSize(seq_len_ * batch_size_ * hidden_size_ * (bidirectional_ ? 2 : 1)) * sizeof(T);
input_size_list_.push_back(x_size);
input_size_list_.push_back(h_size);
input_size_list_.push_back(y_size);
input_size_list_.push_back(reserved_size_);
size_t state_size = 0;
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnDropoutGetStatesSize(handle_, &state_size),
"get dropout states size failed");
input_size_list_.push_back(state_size);
output_size_list_.push_back(weight_size_);
size_t workspace_size = 0;
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnGetRNNWorkspaceSize(handle_, rnn_desc_, seq_len_, x_desc_.get(), &workspace_size),
"get workspace size failed");
workspace_size_list_.push_back(workspace_size);
}
void DestroyResource() noexcept override {
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyRNNDescriptor(rnn_desc_), "destroy rnn_desc failed");
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyDropoutDescriptor(dropout_desc_),
"destroy dropout_desc failed");
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyFilterDescriptor(dw_desc_), "destroy dw_desc_ failed");
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(hx_desc_), "destroy hx_desc_ failed");
DestroyTensorDescGrp();
}
private:
void CreateTensorDescGrp() {
int x_dims[3]{batch_size_, input_size_, 1};
int y_dims[3]{batch_size_, hidden_size_ * (bidirectional_ ? 2 : 1), 1};
x_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
y_desc_ = std::make_unique<cudnnTensorDescriptor_t[]>(seq_len_);
for (size_t i = 0; i < IntToSize(seq_len_); ++i) {
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&x_desc_[i]), "create x_desc failed");
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_,
cudnnSetTensorNdDescriptorEx(x_desc_[i], CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, x_dims),
"set x_desc failed");
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnCreateTensorDescriptor(&y_desc_[i]), "create y_desc failed");
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_,
cudnnSetTensorNdDescriptorEx(y_desc_[i], CUDNN_TENSOR_NCHW, cudnn_data_type_, DimOfTensor, y_dims),
"set y_desc failed");
}
}
void DestroyTensorDescGrp() {
for (size_t i = 0; i < IntToSize(seq_len_); ++i) {
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(y_desc_[i]), "destroy y_desc failed");
CHECK_CUDNN_RET_WITH_ERROR(kernel_node_, cudnnDestroyTensorDescriptor(x_desc_[i]), "destroy x_desc failed");
}
}
int batch_size_;
int seq_len_;
int input_size_;
int hidden_size_;
int num_layers_;
bool has_bias_;
bool bidirectional_;
bool states_init_;
bool is_null_input_;
float dropout_;
size_t weight_size_;
size_t reserved_size_;
cudnnRNNDescriptor_t rnn_desc_;
cudnnDropoutDescriptor_t dropout_desc_;
// input desc
std::unique_ptr<cudnnTensorDescriptor_t[]> x_desc_;
cudnnTensorDescriptor_t hx_desc_;
std::unique_ptr<cudnnTensorDescriptor_t[]> y_desc_;
// output desc
cudnnFilterDescriptor_t dw_desc_;
cudnnHandle_t handle_;
cudnnDataType_t cudnn_data_type_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_GRU_GRAD_WEIGHT_GPU_KERNEL_H_

View File

@ -23,6 +23,7 @@ from ...common import dtype as mstype
from ..composite.multitype_ops.zeros_like_impl import zeros_like
from ..operations import _grad_ops as G
from ..operations import _inner_ops as inner
from ..operations import _rl_inner_ops as rl_ops
from ... import context
from .._utils.utils import range_op, get_1d_shape
@ -955,6 +956,37 @@ def get_bprop_lstm(self):
return bprop
@bprop_getters.register(rl_ops.CudnnGRU)
def get_bprop_gru(self):
"""Grad definition for `GRU` operation."""
gru_grad_data = G.GruGradData(
input_size=self.input_size,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
has_bias=self.has_bias,
bidirectional=self.bidirectional,
dropout=self.dropout
)
gru_grad_weight = G.GruGradWeight(
input_size=self.input_size,
hidden_size=self.hidden_size,
num_layers=self.num_layers,
has_bias=self.has_bias,
bidirectional=self.bidirectional,
dropout=self.dropout
)
def bprop(x, hx, w, out, dout):
y, _, reserve, state = out
dy, dhy, _, _ = dout
dx, dhx = gru_grad_data(y, dy, dhy, w, hx, reserve, state)
dw = gru_grad_weight(F.depend(x, dx), hx, y, reserve, state)
return dx, dhx, dw
return bprop
@bprop_getters.register(P.DynamicRNN)
def get_bprop_dynamic_rnn(self):
"""Grad definition for `DynamicRNN` operation."""

View File

@ -1,20 +1,24 @@
0.1.0 MindSpore*1.6.0:·

bprop.111:dout
bprop.111:y
bprop.111:keep_probbprop.111:[CNode]112:1bprop.111:[CNode]112:1"S-Prim-DropoutDoMask:!Default/S-Prim-DropoutDoMask-op80
<EFBFBD>
bprop.111:ybprop.111:[CNode]113:2bprop.111:[CNode]113:2"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op81
˜
bprop.111:keep_probbprop.111:[CNode]114:3bprop.111:[CNode]114:3"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op82
©
bprop.111:[CNode]112:1
bprop.111:[CNode]113:2
bprop.111:[CNode]114:3bprop.111:[CNode]115:4bprop.111:[CNode]115:4"S-Prim-MakeTuple:Default/S-Prim-MakeTuple-op83 bprop.111*
bprop.111:x*
bprop.111:y*
bprop.111:keep_prob*
bprop.111:out*
bprop.111:dout2
bprop.111:[CNode]115:4:@ff3ca86129a77e54496a41a353e9f6355edfba79f3c040cf990522596d638b7fP
0.1.0 MindSpore*1.6.0:”

bprop.13:dout
bprop.13:y
bprop.13:keep_probbprop.13:[CNode]14:1bprop.13:[CNode]14:1"S-Prim-DropoutDoMask:!Default/S-Prim-DropoutDoMask-op10

bprop.13:ybprop.13:[CNode]15:2bprop.13:[CNode]15:2"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op11

bprop.13:keep_probbprop.13:[CNode]16:3bprop.13:[CNode]16:3"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op12
Ÿ
bprop.13:[CNode]14:1
bprop.13:[CNode]15:2
bprop.13:[CNode]16:3bprop.13:[CNode]17:4bprop.13:[CNode]17:4"S-Prim-MakeTuple:Default/S-Prim-MakeTuple-op13bprop.13*
bprop.13:x*
bprop.13:y*
bprop.13:keep_prob*
bprop.13:out*
bprop.13:dout2
bprop.13:[CNode]17:4:@eb722f0a05359151f2578b4ad7b0d067988045071352718c64ca6cf29bc5008aP

View File

@ -1,14 +1,14 @@
0.1.0 MindSpore*1.6.0:á
<EFBFBD>
bprop.51:shapebprop.51:[CNode]52:1bprop.51:[CNode]52:1"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op35

bprop.51:keep_probbprop.51:[CNode]53:2bprop.51:[CNode]53:2"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op36
0.1.0 MindSpore*1.6.0:Å
<EFBFBD>
bprop.51:[CNode]52:1
bprop.51:[CNode]53:2bprop.51:[CNode]54:3bprop.51:[CNode]54:3"S-Prim-MakeTuple:Default/S-Prim-MakeTuple-op37bprop.51*
bprop.51:shape*
bprop.51:keep_prob*
bprop.51:out*
bprop.51:dout2
bprop.51:[CNode]54:3:@ff3ca86129a77e54496a41a353e9f6355edfba79f3c040cf990522596d638b7fP
bprop.3:shapebprop.3:[CNode]4:1bprop.3:[CNode]4:1"!S-Prim-hyper_map[zeros_like_leaf]:-Default/S-Prim-hyper_map[zeros_like_leaf]-op2
<EFBFBD>
bprop.3:keep_probbprop.3:[CNode]5:2bprop.3:[CNode]5:2"!S-Prim-hyper_map[zeros_like_leaf]:-Default/S-Prim-hyper_map[zeros_like_leaf]-op3

bprop.3:[CNode]4:1
bprop.3:[CNode]5:2bprop.3:[CNode]6:3bprop.3:[CNode]6:3"S-Prim-MakeTuple:Default/S-Prim-MakeTuple-op4bprop.3*
bprop.3:shape*
bprop.3:keep_prob*
bprop.3:out*
bprop.3:dout2
bprop.3:[CNode]6:3:@eb722f0a05359151f2578b4ad7b0d067988045071352718c64ca6cf29bc5008aP

View File

@ -1,22 +1,22 @@
0.1.0 MindSpore*1.6.0:ß

bprop.55:indicesbprop.55:[CNode]56:1bprop.55:[CNode]56:1"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op38
0.1.0 MindSpore*1.6.0:º

bprop.7:indicesbprop.7:[CNode]8:1bprop.7:[CNode]8:1"!S-Prim-hyper_map[zeros_like_leaf]:-Default/S-Prim-hyper_map[zeros_like_leaf]-op5

bprop.7:depthbprop.7:[CNode]9:2bprop.7:[CNode]9:2"!S-Prim-hyper_map[zeros_like_leaf]:-Default/S-Prim-hyper_map[zeros_like_leaf]-op6
Ž
bprop.7:on_valuebprop.7:[CNode]10:3bprop.7:[CNode]10:3"!S-Prim-hyper_map[zeros_like_leaf]:-Default/S-Prim-hyper_map[zeros_like_leaf]-op7
<EFBFBD>
bprop.55:depthbprop.55:[CNode]57:2bprop.55:[CNode]57:2"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op39

bprop.55:on_valuebprop.55:[CNode]58:3bprop.55:[CNode]58:3"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op40

bprop.55:off_valuebprop.55:[CNode]59:4bprop.55:[CNode]59:4"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op41
µ
bprop.55:[CNode]56:1
bprop.55:[CNode]57:2
bprop.55:[CNode]58:3
bprop.55:[CNode]59:4bprop.55:[CNode]60:5bprop.55:[CNode]60:5"S-Prim-MakeTuple:Default/S-Prim-MakeTuple-op42bprop.55*
bprop.55:indices*
bprop.55:depth*
bprop.55:on_value*
bprop.55:off_value*
bprop.55:out*
bprop.55:dout2
bprop.55:[CNode]60:5:@ff3ca86129a77e54496a41a353e9f6355edfba79f3c040cf990522596d638b7fP
bprop.7:off_valuebprop.7:[CNode]11:4bprop.7:[CNode]11:4"!S-Prim-hyper_map[zeros_like_leaf]:-Default/S-Prim-hyper_map[zeros_like_leaf]-op8
¬
bprop.7:[CNode]8:1
bprop.7:[CNode]9:2
bprop.7:[CNode]10:3
bprop.7:[CNode]11:4bprop.7:[CNode]12:5bprop.7:[CNode]12:5"S-Prim-MakeTuple:Default/S-Prim-MakeTuple-op9bprop.7*
bprop.7:indices*
bprop.7:depth*
bprop.7:on_value*
bprop.7:off_value*
bprop.7:out*
bprop.7:dout2
bprop.7:[CNode]12:5:@eb722f0a05359151f2578b4ad7b0d067988045071352718c64ca6cf29bc5008aP

View File

@ -11,4 +11,4 @@ f
bprop.1:x*
bprop.1:out*
bprop.1:dout2
bprop.1:[CNode]2:2:@ff3ca86129a77e54496a41a353e9f6355edfba79f3c040cf990522596d638b7fP
bprop.1:[CNode]2:2:@eb722f0a05359151f2578b4ad7b0d067988045071352718c64ca6cf29bc5008aP

View File

@ -1,18 +1,21 @@
0.1.0 MindSpore*1.6.0:¶
ä
bprop.140:dout
bprop.140:ybprop.140:dgrad:1bprop.140:dgrad:1"S-Prim-ReluGrad*0
0.1.0 MindSpore*1.6.0:œ
ß
bprop.18:dout
bprop.18:ybprop.18:dgrad:1bprop.18:dgrad:1"S-Prim-ReluGrad*0
output_namesZoutputzscalar:List[value1,],€*=
input_namesZ
y_backpropZxzscalar:List[value1,value2,],€:Default/S-Prim-ReluGrad-op101

bprop.140:ybprop.140:[CNode]141:2bprop.140:[CNode]141:2"!S-Prim-hyper_map[zeros_like_leaf]:/Default/S-Prim-hyper_map[zeros_like_leaf]-op102
<EFBFBD>
bprop.140:dgrad:1
bprop.140:[CNode]141:2bprop.140:[CNode]142:3bprop.140:[CNode]142:3"S-Prim-MakeTuple:Default/S-Prim-MakeTuple-op103 bprop.140*
bprop.140:grad*
bprop.140:y*
bprop.140:out*
bprop.140:dout2
bprop.140:[CNode]142:3:@ff3ca86129a77e54496a41a353e9f6355edfba79f3c040cf990522596d638b7fP
y_backpropZxzscalar:List[value1,value2,],€:Default/S-Prim-ReluGrad-op14

bprop.18:ybprop.18:[CNode]19:2bprop.18:[CNode]19:2"!S-Prim-hyper_map[zeros_like_leaf]:.Default/S-Prim-hyper_map[zeros_like_leaf]-op15

bprop.18:dgrad:1
bprop.18:[CNode]19:2bprop.18:[CNode]20:3bprop.18:[CNode]20:3"S-Prim-MakeTuple:Default/S-Prim-MakeTuple-op16bprop.18*
bprop.18:grad*
bprop.18:y*
bprop.18:out*
bprop.18:dout2
bprop.18:[CNode]20:3:@eb722f0a05359151f2578b4ad7b0d067988045071352718c64ca6cf29bc5008aP

View File

@ -1381,6 +1381,83 @@ class DynamicRNNGrad(PrimitiveWithInfer):
return x_dtype, x_dtype, x_dtype, x_dtype, x_dtype
class GruGradData(PrimitiveWithInfer):
"""Computes the data gradients of GRU."""
@prim_attr_register
def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
self.input_size = validator.check_positive_int(input_size, 'input_size', self.name)
self.hidden_size = validator.check_positive_int(hidden_size, 'hidden_size', self.name)
self.num_layers = validator.check_positive_int(num_layers, 'num_layers', self.name)
self.has_bias = validator.check_value_type('has_bias', has_bias, (bool,), self.name)
self.bidirectional = validator.check_value_type('bidirectional', bidirectional, (bool,), self.name)
self.dropout = validator.check_value_type("dropout", dropout, [float], self.name)
self.dropout = validator.check_float_range(dropout, 0, 1, Rel.INC_BOTH, 'dropout', self.name)
if bidirectional:
self.num_directions = 2
else:
self.num_directions = 1
def infer_shape(self, y_shape, dy_shape, dhy_shape, w_shape,
hx_shape, reserve_shape, state_shape):
# dhy and dcy should be same shape
validator.check_equal_int(len(dhy_shape), 3, "h_shape", self.name)
validator.check_int(dhy_shape[0], self.num_layers * self.num_directions, Rel.EQ, "h_shape[0]", self.name)
validator.check_equal_int(dhy_shape[2], self.hidden_size, "h_shape[2]", self.name)
validator.check_equal_int(len(dy_shape), 3, "dy_shape", self.name)
validator.check_equal_int(dy_shape[1], dhy_shape[1], "dy[1]", self.name)
validator.check_int(dy_shape[2], self.hidden_size * self.num_directions, Rel.EQ, "dy[2]", self.name)
dx_shape = (y_shape[0], y_shape[1], self.input_size)
dhx_shape = dhy_shape
return (dx_shape, dhx_shape)
def infer_dtype(self, y_dtype, dy_dtype, dhy_dtype, w_dtype,
hx_dtype, reserve_dtype, state_dtype):
args = {"dy": dy_dtype, "dhy": dhy_dtype}
validator.check_tensors_dtypes_same_and_valid(args, (mstype.float32, mstype.float16), self.name)
return (dy_dtype, dy_dtype)
class GruGradWeight(PrimitiveWithInfer):
"""Computes the weight gradients of GRU."""
@prim_attr_register
def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
self.input_size = validator.check_positive_int(input_size, 'input_size', self.name)
self.hidden_size = validator.check_positive_int(hidden_size, 'hidden_size', self.name)
self.num_layers = validator.check_positive_int(num_layers, 'num_layers', self.name)
self.has_bias = validator.check_value_type('has_bias', has_bias, (bool,), self.name)
self.bidirectional = validator.check_value_type('bidirectional', bidirectional, (bool,), self.name)
self.dropout = validator.check_value_type("dropout", dropout, [float], self.name)
self.dropout = validator.check_float_range(dropout, 0, 1, Rel.INC_BOTH, 'dropout', self.name)
if bidirectional:
self.num_directions = 2
else:
self.num_directions = 1
def infer_shape(self, x_shape, hx_shape, y_shape, reserve_shape, state_shape):
weight_size = 0
gate_size = 3 * self.hidden_size
for layer in range(self.num_layers):
for _ in range(self.num_directions):
input_layer_size = self.input_size if layer == 0 else self.hidden_size * self.num_directions
weight_size += gate_size * input_layer_size
weight_size += gate_size * self.hidden_size
if self.has_bias:
weight_size += 2 * gate_size
return (weight_size, 1, 1)
def infer_dtype(self, x_dtype, hx_dtype, y_dtype, reserve_dtype, state_dtype):
return hx_dtype
class DynamicGRUV2Grad(PrimitiveWithInfer):
r"""
Computes the input gradients of DynamicGRUV2.