forked from mindspore-Ecosystem/mindspore
!962 GPU conv2dBprop update size init
Merge pull request !962 from VectorSL/conv-init-update
This commit is contained in:
commit
d004ef2234
|
@ -49,6 +49,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
||||||
group_(1),
|
group_(1),
|
||||||
is_null_input_(false),
|
is_null_input_(false),
|
||||||
input_size_(0),
|
input_size_(0),
|
||||||
|
dy_size_(0),
|
||||||
output_size_(0),
|
output_size_(0),
|
||||||
padded_size_(0),
|
padded_size_(0),
|
||||||
workspace_size_(0),
|
workspace_size_(0),
|
||||||
|
@ -152,22 +153,16 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
||||||
}
|
}
|
||||||
void InitSizeLists() override {
|
void InitSizeLists() override {
|
||||||
if (!is_null_input_) {
|
if (!is_null_input_) {
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(dy_desc_, reinterpret_cast<size_t *>(&input_size_)),
|
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(dy_desc_, reinterpret_cast<size_t *>(&dy_size_)),
|
||||||
"cudnnGetTensorSizeInBytes failed");
|
"cudnnGetTensorSizeInBytes failed");
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetFilterSizeInBytes(dw_desc_, reinterpret_cast<size_t *>(&output_size_)),
|
|
||||||
"cudnnGetFilterSizeInBytes failed");
|
|
||||||
}
|
|
||||||
input_size_list_.push_back(input_size_);
|
|
||||||
output_size_list_.push_back(output_size_);
|
|
||||||
|
|
||||||
if (!is_null_input_) {
|
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(x_desc_, reinterpret_cast<size_t *>(&input_size_)),
|
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(x_desc_, reinterpret_cast<size_t *>(&input_size_)),
|
||||||
"cudnnGetTensorSizeInBytes failed");
|
"cudnnGetTensorSizeInBytes failed");
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetFilterSizeInBytes(dw_desc_, reinterpret_cast<size_t *>(&output_size_)),
|
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetFilterSizeInBytes(dw_desc_, reinterpret_cast<size_t *>(&output_size_)),
|
||||||
"cudnnGetFilterSizeInBytes failed");
|
"cudnnGetFilterSizeInBytes failed");
|
||||||
}
|
}
|
||||||
|
input_size_list_.push_back(dy_size_);
|
||||||
input_size_list_.push_back(input_size_);
|
input_size_list_.push_back(input_size_);
|
||||||
input_size_list_.push_back(output_size_);
|
output_size_list_.push_back(output_size_);
|
||||||
|
|
||||||
if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_ && !is_null_input_) {
|
if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_ && !is_null_input_) {
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||||
|
@ -313,6 +308,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
|
||||||
int group_;
|
int group_;
|
||||||
bool is_null_input_;
|
bool is_null_input_;
|
||||||
size_t input_size_;
|
size_t input_size_;
|
||||||
|
size_t dy_size_;
|
||||||
size_t output_size_;
|
size_t output_size_;
|
||||||
size_t padded_size_;
|
size_t padded_size_;
|
||||||
size_t workspace_size_;
|
size_t workspace_size_;
|
||||||
|
|
|
@ -48,7 +48,8 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
||||||
c_(0),
|
c_(0),
|
||||||
group_(1),
|
group_(1),
|
||||||
is_null_input_(false),
|
is_null_input_(false),
|
||||||
input_size_(0),
|
dy_size_(0),
|
||||||
|
w_size_(0),
|
||||||
output_size_(0),
|
output_size_(0),
|
||||||
padded_size_(0),
|
padded_size_(0),
|
||||||
workspace_size_(0),
|
workspace_size_(0),
|
||||||
|
@ -82,7 +83,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
||||||
cudnnConvolutionBackwardData(cudnn_handle_, &alpha, w_desc_, w, dy_desc_, dy, conv_desc_, algo_, work_space,
|
cudnnConvolutionBackwardData(cudnn_handle_, &alpha, w_desc_, w, dy_desc_, dy, conv_desc_, algo_, work_space,
|
||||||
workspace_size_, &beta, padded_descriptor_, padded),
|
workspace_size_, &beta, padded_descriptor_, padded),
|
||||||
"ConvolutionBackwardData failed");
|
"ConvolutionBackwardData failed");
|
||||||
CalPadGrad(input_size_ / sizeof(T), padded, n_, c_, old_height_, old_width_, old_height_ + pad_height_,
|
CalPadGrad(output_size_ / sizeof(T), padded, n_, c_, old_height_, old_width_, old_height_ + pad_height_,
|
||||||
old_width_ + pad_width_, pad_top_, pad_left_, dx, reinterpret_cast<cudaStream_t>(stream_ptr));
|
old_width_ + pad_width_, pad_top_, pad_left_, dx, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||||
} else {
|
} else {
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(
|
CHECK_CUDNN_RET_WITH_EXCEPT(
|
||||||
|
@ -153,23 +154,15 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
||||||
}
|
}
|
||||||
void InitSizeLists() override {
|
void InitSizeLists() override {
|
||||||
if (!is_null_input_) {
|
if (!is_null_input_) {
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(dy_desc_, &input_size_),
|
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(dy_desc_, &dy_size_), "cudnnGetTensorSizeInBytes failed");
|
||||||
"cudnnGetTensorSizeInBytes failed");
|
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetFilterSizeInBytes(w_desc_, &w_size_), "cudnnGetTensorSizeInBytes failed");
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(dx_desc_, &output_size_),
|
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(dx_desc_, &output_size_),
|
||||||
"cudnnGetTensorSizeInBytes failed");
|
"cudnnGetTensorSizeInBytes failed");
|
||||||
}
|
}
|
||||||
input_size_list_.push_back(input_size_);
|
input_size_list_.push_back(dy_size_);
|
||||||
|
input_size_list_.push_back(w_size_);
|
||||||
output_size_list_.push_back(output_size_);
|
output_size_list_.push_back(output_size_);
|
||||||
|
|
||||||
if (!is_null_input_) {
|
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(dx_desc_, &input_size_),
|
|
||||||
"cudnnGetTensorSizeInBytes failed");
|
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetFilterSizeInBytes(w_desc_, &output_size_),
|
|
||||||
"cudnnGetTensorSizeInBytes failed");
|
|
||||||
}
|
|
||||||
input_size_list_.push_back(input_size_);
|
|
||||||
input_size_list_.push_back(output_size_);
|
|
||||||
|
|
||||||
if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_ && !is_null_input_) {
|
if ((pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) && use_pad_ && !is_null_input_) {
|
||||||
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(padded_descriptor_, &padded_size_),
|
CHECK_CUDNN_RET_WITH_EXCEPT(cudnnGetTensorSizeInBytes(padded_descriptor_, &padded_size_),
|
||||||
"cudnnGetTensorSizeInBytes failed");
|
"cudnnGetTensorSizeInBytes failed");
|
||||||
|
@ -309,7 +302,8 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
|
||||||
std::vector<int> dilation_;
|
std::vector<int> dilation_;
|
||||||
int group_;
|
int group_;
|
||||||
bool is_null_input_;
|
bool is_null_input_;
|
||||||
size_t input_size_;
|
size_t dy_size_;
|
||||||
|
size_t w_size_;
|
||||||
size_t output_size_;
|
size_t output_size_;
|
||||||
size_t padded_size_;
|
size_t padded_size_;
|
||||||
size_t workspace_size_;
|
size_t workspace_size_;
|
||||||
|
|
Loading…
Reference in New Issue