diff --git a/docs/api/api_python/nn/mindspore.nn.Conv2dTranspose.rst b/docs/api/api_python/nn/mindspore.nn.Conv2dTranspose.rst index 1636518bbe0..0f9d539130f 100644 --- a/docs/api/api_python/nn/mindspore.nn.Conv2dTranspose.rst +++ b/docs/api/api_python/nn/mindspore.nn.Conv2dTranspose.rst @@ -1,7 +1,7 @@ mindspore.nn.Conv2dTranspose ============================ -.. py:class:: mindspore.nn.Conv2dTranspose(in_channels, out_channels, kernel_size, stride=1, pad_mode="same", padding=0, dilation=1, group=1, has_bias=False, weight_init="normal", bias_init="zeros") +.. py:class:: mindspore.nn.Conv2dTranspose(in_channels, out_channels, kernel_size, stride=1, pad_mode="same", padding=0, output_padding=0, dilation=1, group=1, has_bias=False, weight_init="normal", bias_init="zeros") 计算二维转置卷积,可以视为Conv2d对输入求梯度,也称为反卷积(实际不是真正的反卷积)。 @@ -21,6 +21,7 @@ mindspore.nn.Conv2dTranspose - **pad**:对输入进行填充。在输入的高度和宽度方向上填充 `padding` 大小的0。如果设置此模式, `padding` 必须大于或等于0。 - **padding** (Union[int, tuple[int]]) - 输入的高度和宽度方向上填充的数量。数据类型为整型或包含四个整数的tuple。如果 `padding` 是一个整数,那么上、下、左、右的填充都等于 `padding` 。如果 `padding` 是一个有四个整数的tuple,那么上、下、左、右的填充分别等于 `padding[0]` 、 `padding[1]` 、 `padding[2]` 和 `padding[3]` 。值应该要大于等于0,默认值:0。 + - **output_padding** (Union[int, tuple[int]]) - 输入的高度和宽度方向上填充的数量。数据类型为整型或包含两个整数的tuple。如果 `output_padding` 是一个整数,那么下、右的填充都等于 `output_padding` 。如果 `output_padding` 是一个有两个整数的tuple,那么下、右的填充分别等于 `output_padding[0]` 、 `output_padding[1]` 。值应该要大于等于0,默认值:0。 - **dilation** (Union[int, tuple[int]]) - 二维卷积核膨胀尺寸。数据类型为整型或具有两个整型的tuple。若 :math:`k > 1` ,则kernel间隔 `k` 个元素进行采样。高度和宽度方向上的 `k` ,其取值范围分别为[1, H]和[1, W]。默认值:1。 - **group** (int) - 将过滤器拆分为组, `in_channels` 和 `out_channels` 必须可被 `group` 整除。如果组数等于 `in_channels` 和 `out_channels` ,这个二维卷积层也被称为二维深度卷积层。默认值:1. - **has_bias** (bool) - Conv2dTranspose层是否添加偏置参数。默认值:False。 @@ -56,9 +57,9 @@ mindspore.nn.Conv2dTranspose .. math:: \begin{array}{ll} \\ H_{out} = \text H_{in}\times \text {stride[0]} - (padding[0] + padding[1]) + \text{kernel_size[0]} + (\text{dilation[0]} - 1) \times - (\text{kernel_size[0]} - 1) - \text {stride[0]} \\ + (\text{kernel_size[0]} - 1) - \text {stride[0]} + \text {output_padding[0]} \\ W_{out} = \text W_{in}\times \text {stride[1]} - (padding[2] + padding[3]) + \text{kernel_size[1]} + (\text{dilation[1]} - 1) \times - (\text{kernel_size[1]} - 1) - \text {stride[1]} \\ + (\text{kernel_size[1]} - 1) - \text {stride[1]} + \text {output_padding[1]} \\ \end{array} 异常: diff --git a/mindspore/python/mindspore/nn/layer/conv.py b/mindspore/python/mindspore/nn/layer/conv.py index aecc647e719..3dffc1d7bf4 100644 --- a/mindspore/python/mindspore/nn/layer/conv.py +++ b/mindspore/python/mindspore/nn/layer/conv.py @@ -964,6 +964,11 @@ class Conv2dTranspose(_Conv): If `padding` is a tuple of 4 integers, then the top, bottom, left, and right padding is equal to `padding[0]`, `padding[1]`, `padding[2]`, and `padding[3]` respectively. The value should be greater than or equal to 0. Default: 0. + output_padding (Union[int, tuple[int]]): The number of padding on the height and width directions of the output. + The data type is an integer or a tuple of two integers. If `output_padding` is an integer, + then the bottom and right padding are all equal to `output_padding`. If `output_padding` is a tuple of + 2 integers, then the bottom and right padding is equal to `output_padding[0]`, `output_padding[1]` + respectively. The value should be greater than or equal to 0. Default: 0. dilation (Union[int, tuple[int]]): Dilation size of 2D convolution kernel. The data type is an integer or a tuple of two integers. If :math:`k > 1`, the kernel is sampled every `k` elements. The value of `k` on the height and width directions is in range of [1, H] @@ -1011,10 +1016,10 @@ class Conv2dTranspose(_Conv): \begin{array}{ll} \\ H_{out} = \text H_{in}\times \text {stride[0]} - (padding[0] + padding[1]) + \text{kernel_size[0]} + (\text{dilation[0]} - 1) \times - (\text{kernel_size[0]} - 1) - \text {stride[0]} \\ + (\text{kernel_size[0]} - 1) - \text {stride[0]} + \text {output_padding[0]} \\ W_{out} = \text W_{in}\times \text {stride[1]} - (padding[2] + padding[3]) + \text{kernel_size[1]} + (\text{dilation[1]} - 1) \times - (\text{kernel_size[1]} - 1) - \text {stride[1]} \\ + (\text{kernel_size[1]} - 1) - \text {stride[1]} + \text {output_padding[1]} \\ \end{array} Raises: @@ -1044,6 +1049,7 @@ class Conv2dTranspose(_Conv): stride=1, pad_mode='same', padding=0, + output_padding=0, dilation=1, group=1, has_bias=False, @@ -1056,6 +1062,9 @@ class Conv2dTranspose(_Conv): Validator.check_value_type('padding', padding, (int, tuple), self.cls_name) if isinstance(padding, tuple): Validator.check_equal_int(len(padding), 4, 'padding size', self.cls_name) + Validator.check_value_type('output_padding', output_padding, (int, tuple), self.cls_name) + if isinstance(output_padding, tuple): + Validator.check_equal_int(len(output_padding), 2, 'output_padding size', self.cls_name) # out_channels and in_channels swap. # cause Conv2DBackpropInput's out_channel refers to Conv2D's out_channel, # then Conv2dTranspose's out_channel refers to Conv2DBackpropInput's in_channel. @@ -1080,6 +1089,7 @@ class Conv2dTranspose(_Conv): self.is_valid = self.pad_mode == 'valid' self.is_same = self.pad_mode == 'same' self.is_pad = self.pad_mode == 'pad' + self.output_padding = output_padding if Validator.check_bool(has_bias, "has_bias", self.cls_name): self.bias = Parameter(initializer(bias_init, [out_channels]), name='bias') @@ -1111,7 +1121,29 @@ class Conv2dTranspose(_Conv): if self.has_bias: return self.bias_add(self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out)), self.bias) - return self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out)) + conv2d_trans_ret = self.conv2d_transpose(x, self.weight, (n, self.out_channels, h_out, w_out)) + if isinstance(self.output_padding, tuple): + if self.output_padding[0] < 0 or self.output_padding[0] >= max(self.dilation[0], self.stride[0]): + raise ValueError("output_padding[0] must be in range of [0, max(stride_d, dilation_d)).") + if self.output_padding[1] < 0 or self.output_padding[1] >= max(self.dilation[1], self.stride[1]): + raise ValueError("output_padding[1] must be in range of [0, max(stride_d, dilation_d)).") + if not self.is_pad and (self.output_padding[0] > 0 or self.output_padding[1] > 0): + raise ValueError("when output_padding is not zero, pad_mode must be 'pad'") + + pad = P.Pad(paddings=((0, 0), (0, 0), (0, self.output_padding[0]), (0, self.output_padding[1]))) + return pad(conv2d_trans_ret) + + if self.output_padding == 0: + return conv2d_trans_ret + + if self.output_padding < 0 or self.output_padding >= max(self.dilation[0], self.stride[0]): + raise ValueError("output_padding must be in range of [0, max(stride_d, dilation_d)).") + if self.output_padding < 0 or self.output_padding >= max(self.dilation[1], self.stride[1]): + raise ValueError("output_padding must be in range of [0, max(stride_d, dilation_d)).") + if not self.is_pad and self.output_padding > 0: + raise ValueError("when output_padding is not zero, pad_mode must be 'pad'") + pad = P.Pad(paddings=((0, 0), (0, 0), (0, self.output_padding), (0, self.output_padding))) + return pad(conv2d_trans_ret) class Conv1dTranspose(_Conv): diff --git a/tests/ut/python/nn/test_conv.py b/tests/ut/python/nn/test_conv.py index 360aab6733c..6b168b2fdd0 100644 --- a/tests/ut/python/nn/test_conv.py +++ b/tests/ut/python/nn/test_conv.py @@ -20,10 +20,6 @@ import mindspore.nn as nn from mindspore import Tensor from ..ut_filter import non_graph_engine -weight = Tensor(np.ones([2, 2])) -in_channels = 3 -out_channels = 64 - class Net(nn.Cell): """ Net definition """ @@ -133,6 +129,7 @@ class NetConv2dTranspose(nn.Cell): stride=1, pad_mode="same", padding=0, + output_padding=0, dilation=1, group=1, has_bias=False, @@ -145,6 +142,7 @@ class NetConv2dTranspose(nn.Cell): stride, pad_mode, padding, + output_padding, dilation, group, has_bias, @@ -202,3 +200,14 @@ def test_compile_transpose_dilation_2_pad_mode_pad(): net = NetConv2dTranspose(3, 64, 4, stride=2, dilation=2, pad_mode='pad', weight_init='normal') input_data = Tensor(np.ones([1, 3, 16, 50], dtype=np.float32)) net(input_data) + + +def test_compile_outputpadding(): + """ + Feature: output_padding + Description: compile with attributer output_padding + Expectation: no error + """ + net = NetConv2dTranspose(1, 1, 3, stride=2, pad_mode='pad', output_padding=1) + input_data = Tensor(np.ones([1, 1, 3, 3], dtype=np.float32)) + net(input_data)