From d91cf86d2d3c4e6a8cd9eb3e94c6284b8c0b4e93 Mon Sep 17 00:00:00 2001 From: liuxiao93 Date: Wed, 24 Mar 2021 15:09:32 +0800 Subject: [PATCH] Add nn.Conv3d and nn.Conv3dTranspose. --- mindspore/nn/layer/conv.py | 396 +++++++++++++++++++++++++-- mindspore/ops/_grad/grad_nn_ops.py | 6 +- mindspore/ops/operations/__init__.py | 4 +- mindspore/ops/operations/nn_ops.py | 115 ++++++-- 4 files changed, 470 insertions(+), 51 deletions(-) diff --git a/mindspore/nn/layer/conv.py b/mindspore/nn/layer/conv.py index 69edd06132c..9563bae1f1d 100644 --- a/mindspore/nn/layer/conv.py +++ b/mindspore/nn/layer/conv.py @@ -21,11 +21,11 @@ from mindspore.ops.primitive import constexpr from mindspore.common.parameter import Parameter from mindspore.common.initializer import initializer from mindspore.common.tensor import Tensor -from mindspore._checkparam import Validator, Rel, twice +from mindspore._checkparam import Validator, Rel, twice, triple from mindspore._extends import cell_attr_register from ..cell import Cell -__all__ = ['Conv2d', 'Conv2dTranspose', 'Conv1d', 'Conv1dTranspose'] +__all__ = ['Conv2d', 'Conv2dTranspose', 'Conv1d', 'Conv1dTranspose', 'Conv3d', 'Conv3dTranspose'] class _Conv(Cell): @@ -55,9 +55,11 @@ class _Conv(Cell): self.pad_mode = pad_mode self.weight_init = weight_init self.bias_init = bias_init - self.format = Validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.cls_name) + self.format = Validator.check_string(data_format, ['NCHW', 'NHWC', 'NCDHW'], 'format', self.cls_name) if context.get_context("device_target") != "GPU" and self.format == "NHWC": raise ValueError("NHWC format only support in GPU target.") + if context.get_context("device_target") != "Ascend" and self.format == "NCDHW": + raise ValueError("NCDHW format only support in Ascend target.") if isinstance(padding, int): Validator.check_non_negative_int(padding, 'padding', self.cls_name) self.padding = padding @@ -71,30 +73,23 @@ class _Conv(Cell): self.dilation = dilation self.group = Validator.check_positive_int(group) self.has_bias = has_bias - if (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \ - isinstance(kernel_size[0], bool) or isinstance(kernel_size[1], bool) or \ - kernel_size[0] < 1 or kernel_size[1] < 1: - raise ValueError("Attr 'kernel_size' of 'Conv2D' Op passed " - + str(self.kernel_size) + ", should be a int or tuple and equal to or greater than 1.") - if (not isinstance(stride[0], int)) or (not isinstance(stride[1], int)) or \ - isinstance(stride[0], bool) or isinstance(stride[1], bool) or stride[0] < 1 or stride[1] < 1: - raise ValueError("Attr 'stride' of 'Conv2D' Op passed " - + str(self.stride) + ", should be a int or tuple and equal to or greater than 1.") - if (not isinstance(dilation[0], int)) or (not isinstance(dilation[1], int)) or \ - isinstance(dilation[0], bool) or isinstance(dilation[1], bool) or dilation[0] < 1 or dilation[1] < 1: - raise ValueError("Attr 'dilation' of 'Conv2D' Op passed " - + str(self.dilation) + ", should be a int or tuple and equal to or greater than 1.") + for kernel_size_elem in kernel_size: + Validator.check_positive_int(kernel_size_elem, 'kernel_size item', self.cls_name) + for stride_elem in stride: + Validator.check_positive_int(stride_elem, 'stride item', self.cls_name) + for dilation_elem in dilation: + Validator.check_positive_int(dilation_elem, 'dilation item', self.cls_name) if in_channels % group != 0: - raise ValueError("Attr 'in_channels' of 'Conv2D' Op must be divisible by " - "attr 'group' of 'Conv2D' Op.") + raise ValueError(f"Attr 'in_channels' of {self.cls_name} Op must be divisible by " + f"attr 'group' of {self.cls_name} Op.") if out_channels % group != 0: - raise ValueError("Attr 'out_channels' of 'Conv2D' Op must be divisible by " - "attr 'group' of 'Conv2D' Op.") + raise ValueError(f"Attr 'out_channels' {self.cls_name} Op must be divisible by " + f"attr 'group' of {self.cls_name} Op.") if transposed: shape = [in_channels, out_channels // group, *kernel_size] else: - shape = [out_channels, in_channels // group, *kernel_size] if self.format == "NCHW" else \ - [out_channels, *kernel_size, in_channels // group] + shape = [out_channels, *kernel_size, in_channels // group] if self.format == "NHWC" else \ + [out_channels, in_channels // group, *kernel_size] self.weight = Parameter(initializer(self.weight_init, shape), name='weight') if Validator.check_bool(has_bias): @@ -476,6 +471,361 @@ class Conv1d(_Conv): return s +@constexpr +def _check_input_5dims(input_shape, op_name): + if len(input_shape) != 5: + raise ValueError(f"For {op_name}, input should be 5 dims, but got shape {input_shape}.") + + +class Conv3d(_Conv): + r""" + 3D convolution layer. + + Applies a 3D convolution over an input tensor which is typically of shape + For input shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` and output shape + :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`. where :math:`N` is batch size. :math:`C` is channel number. + the formula is defined as: + + .. math:: + + \operatorname{out}\left(N_{i}, C_{\text {out}_j}\right)=\operatorname{bias}\left(C_{\text {out}_j}\right)+ + \sum_{k=0}^{C_{in}-1} ccor(\text {weight}\left(C_{\text {out}_j}, k\right), + \operatorname{input}\left(N_{i}, k\right)) + + where :math:`ccor` is the cross-correlation operator. + + If the 'pad_mode' is set to be "valid", the output height and width will be + :math:`\left \lfloor{1 + \frac{D_{in} + 2 \times \text{padding} - \text{ks_d} - + (\text{ks_d} - 1) \times (\text{dilation} - 1) }{\text{stride}}} \right \rfloor` and + :math:`\left \lfloor{1 + \frac{H_{in} + 2 \times \text{padding} - \text{ks_h} - + (\text{ks_h} - 1) \times (\text{dilation} - 1) }{\text{stride}}} \right \rfloor` and + :math:`\left \lfloor{1 + \frac{W_{in} + 2 \times \text{padding} - \text{ks_w} - + (\text{ks_w} - 1) \times (\text{dilation} - 1) }{\text{stride}}} \right \rfloor` respectively. + + Args: + in_channels (int): The number of input channel :math:`C_{in}`. + out_channels (int): The number of output channel :math:`C_{out}`. + kernel_size (Union[int, tuple[int]]): The data type is int or a tuple of 3 integers. Specifies the height + and width of the 3D convolution window. Single int means the value is for the depth, height and the width + of the kernel. A tuple of 3 ints means the first value is for the depth, second value is for height + and the other is for the width of the kernel. + stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents + the depth, height and width of movement are both strides, or a tuple of three int numbers that + represent depth, height and width of movement respectively. Default: 1. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "same". + + - same: Adopts the way of completion. The depth, height and width of the output will be the same as + the input. The total number of padding will be calculated in depth, horizontal and vertical + directions and evenly distributed to head and tail, top and bottom, left and right if possible. + Otherwise, the last extra padding will be done from the tail, bottom and the right side. + If this mode is set, `padding` must be 0. + + - valid: Adopts the way of discarding. The possible largest depth, height and width of output + will be returned without padding. Extra pixels will be discarded. If this mode is set, `padding` + must be 0. + + - pad: Implicit paddings on both sides of the input in depth, height, width. The number of `padding` will + be padded to the input Tensor borders. `padding` must be greater than or equal to 0. + + padding (Union(int, tuple[int])): Implicit paddings on both sides of the input. + The data type is int or a tuple of 6 integers. Default: 0. If `padding` is an integer, + the paddings of head, tail, top, bottom, left and right are the same, equal to padding. + If `paddings` is a tuple of three integers, the padding of head, tail, top, bottom, left and right equal to + padding[0], padding[1], padding[2], padding[3], padding[4] and padding[5] correspondingly. + dilation (Union[int, tuple[int]]): The data type is int or a tuple of 3 integers + : math:`(dilation_d, dilation_h, dilation_w)`. Currently, dilation on depth only supports the case of 1. + Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`, + there will be :math:`k - 1` pixels skipped for each sampling location. + Its value must be greater or equal to 1 and bounded by the height and width of the input. Default: 1. + group (int): Splits filter into groups, `in_ channels` and `out_channels` must be + divisible by the number of groups. Default: 1. Only 1 is currently supported. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. + It can be a Tensor, a string, an Initializer or a number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible + Initializer and string are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format. Currently only support "NCDHW". + + Inputs: + - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`. + Currently input data type only support float16 and float32. + + Outputs: + Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`. + + Raises: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` or `dilation` is neither an int not a tuple of three. + ValueError: If `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 6. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0). + ValueError: If `data_format` is not 'NCDHW'. + + Supported Platforms: + ``Ascend`` + + Examples: + >>> input = Tensor(np.ones([16, 3, 10, 32, 32]), mindspore.float32) + >>> conv3d = nn.Conv3d(in_channels=3, out_channels=32, kernel_size=(4, 3, 3)) + >>> output = conv3d(input) + >>> print(output.shape) + (16, 32, 10, 32, 32) + """ + + @cell_attr_register + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + pad_mode='same', + padding=0, + dilation=1, + group=1, + has_bias=False, + weight_init='normal', + bias_init='zeros', + data_format='NCDHW'): + kernel_size = triple(kernel_size) + stride = triple(stride) + dilation = triple(dilation) + Validator.check_value_type('padding', padding, (int, tuple), self.cls_name) + if isinstance(padding, tuple): + Validator.check_equal_int(len(padding), 6, 'padding size', self.cls_name) + super(Conv3d, self).__init__( + in_channels, + out_channels, + kernel_size, + stride, + pad_mode, + padding, + dilation, + group, + has_bias, + weight_init, + bias_init, + data_format) + self.conv3d = P.Conv3D(out_channel=self.out_channels, + kernel_size=self.kernel_size, + mode=1, + pad_mode=self.pad_mode, + pad=self.padding, + stride=self.stride, + dilation=self.dilation, + group=self.group, + data_format=self.format) + self.bias_add = P.BiasAdd(data_format=self.format) + self.shape = P.Shape() + + def construct(self, x): + x_shape = self.shape(x) + _check_input_5dims(x_shape, self.cls_name) + output = self.conv3d(x, self.weight) + if self.has_bias: + output = self.bias_add(output, self.bias) + return output + + def extend_repr(self): + s = 'input_channels={}, output_channels={}, kernel_size={},' \ + 'stride={}, pad_mode={}, padding={}, dilation={}, ' \ + 'group={}, has_bias={}' \ + 'weight_init={}, bias_init={}, format={}'.format( + self.in_channels, + self.out_channels, + self.kernel_size, + self.stride, + self.pad_mode, + self.padding, + self.dilation, + self.group, + self.has_bias, + self.weight_init, + self.bias_init, + self.format) + return s + + +class Conv3dTranspose(_Conv): + r""" + Compute a 3D transposed convolution, which is also known as a deconvolution + (although it is not an actual deconvolution). + + Input is typically of shape :math:`(N, C, D, H, W)`, where :math:`N` is batch size and :math:`C` is channel number. + + If the 'pad_mode' is set to be "pad", the height and width of output are defined as: + + .. math:: + D_{out} = (D_{in} - 1) \times \text{stride_d} - 2 \times \text{padding_d} + \text{dilation_d} \times + (\text{kernel_size_d} - 1) + \text{output_padding_d} + 1 + + H_{out} = (H_{in} - 1) \times \text{stride_h} - 2 \times \text{padding_h} + \text{dilation_h} \times + (\text{kernel_size_h} - 1) + \text{output_padding_h} + 1 + + W_{out} = (W_{in} - 1) \times \text{stride_w} - 2 \times \text{padding_w} + \text{dilation_w} \times + (\text{kernel_size_w} - 1) + \text{output_padding_w} + 1 + + Args: + in_channels (int): The number of input channel :math:`C_{in}`. + out_channels (int): The number of output channel :math:`C_{out}`. + kernel_size (Union[int, tuple[int]]): The kernel size of the 3D convolution. + stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents + the depth, height and width of movement are both strides, or a tuple of three int numbers that + represent depth, height and width of movement respectively. Its value must be equal to or greater than 1. + Default: 1. + pad_mode (str): Select the mode of the pad. The optional values are + "pad", "same", "valid". Default: "same". + + - same: Adopts the way of completion. The depth, height and width of the output will be the same as + the input. The total number of padding will be calculated in depth, horizontal and vertical + directions and evenly distributed to head and tail, top and bottom, left and right if possible. + Otherwise, the last extra padding will be done from the tail, bottom and the right side. + If this mode is set, `padding` and `output_padding` must be 0. + + - valid: Adopts the way of discarding. The possible largest depth, height and width of output + will be returned without padding. Extra pixels will be discarded. If this mode is set, `padding` + and `output_padding` must be 0. + + - pad: Implicit paddings on both sides of the input in depth, height, width. The number of `pad` will + be padded to the input Tensor borders. `padding` must be greater than or equal to 0. + + padding (Union(int, tuple[int])): The pad value to be filled. Default: 0. If `padding` is an integer, + the paddings of head, tail, top, bottom, left and right are the same, equal to padding. + If `padding` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal to + padding[0], padding[1], padding[2], padding[3], padding[4] and padding[5] correspondingly. + dilation (Union(int, tuple[int])): The data type is int or a tuple of 3 integers + : math:`(dilation_d, dilation_h, dilation_w)`. Currently, dilation on depth only supports the case of 1. + Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`, + there will be :math:`k - 1` pixels skipped for each sampling location. + Its value must be greater or equal to 1 and bounded by the height and width of the input. Default: 1. + group (int): Splits filter into groups, `in_ channels` and `out_channels` must be + divisible by the number of groups. Default: 1. Only 1 is currently supported. + output_padding (Union(int, tuple[int])): Add extra size to each dimension of the output. Default: 0. + Must be greater than or equal to 0. + has_bias (bool): Specifies whether the layer uses a bias vector. Default: False. + weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel. + It can be a Tensor, a string, an Initializer or a number. When a string is specified, + values from 'TruncatedNormal', 'Normal', 'Uniform', 'HeUniform' and 'XavierUniform' distributions as well + as constant 'One' and 'Zero' distributions are possible. Alias 'xavier_uniform', 'he_uniform', 'ones' + and 'zeros' are acceptable. Uppercase and lowercase are both acceptable. Refer to the values of + Initializer for more details. Default: 'normal'. + bias_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the bias vector. Possible + Initializer and string are the same as 'weight_init'. Refer to the values of + Initializer for more details. Default: 'zeros'. + data_format (str): The optional value for data format. Currently only support 'NCDHW'. + + Inputs: + - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`. + Currently input data type only support float16 and float32. + + Outputs: + Tensor, the shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`. + + Supported Platforms: + ``Ascend`` + + Raise: + TypeError: If `in_channels`, `out_channels` or `group` is not an int. + TypeError: If `kernel_size`, `stride`, `padding` , `dilation` or `output_padding` + is neither an int not a tuple of three. + TypeError: If input data type is not float16 or float32. + ValueError: If `in_channels`, `out_channels`, `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `padding` is less than 0. + ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. + ValueError: If `padding` is a tuple whose length is not equal to 6. + ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0, 0, 0). + ValueError: If `data_format` is not 'NCDHW'. + + Examples: + >>> input = Tensor(np.ones([32, 16, 10, 32, 32]), mindspore.float32) + >>> conv3d_transpose = nn.Conv3dTranspose(in_channels=16, out_channels=3, kernel_size=(4, 6, 2), pad_mode='pad') + >>> output = conv3d_transpose(input) + >>> print(output.shape) + (32, 3, 13, 37, 33) + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + pad_mode='same', + padding=0, + dilation=1, + group=1, + output_padding=0, + has_bias=False, + weight_init='normal', + bias_init='zeros', + data_format='NCDHW'): + kernel_size = triple(kernel_size) + stride = triple(stride) + dilation = triple(dilation) + Validator.check_value_type('padding', padding, (int, tuple), self.cls_name) + if isinstance(padding, tuple): + Validator.check_equal_int(len(padding), 6, 'padding size', self.cls_name) + output_padding = triple(output_padding) + super(Conv3dTranspose, self).__init__( + in_channels, + out_channels, + kernel_size, + stride, + pad_mode, + padding, + dilation, + group, + has_bias, + weight_init, + bias_init, + data_format, + transposed=True) + self.conv3d_transpose = P.Conv3DTranspose(in_channel=self.in_channels, + out_channel=self.out_channels, + kernel_size=self.kernel_size, + mode=1, + pad_mode=self.pad_mode, + pad=self.padding, + stride=self.stride, + dilation=self.dilation, + group=self.group, + output_padding=output_padding, + data_format=self.format) + self.bias_add = P.BiasAdd(data_format=self.format) + self.shape = P.Shape() + + def construct(self, x): + x_shape = self.shape(x) + _check_input_5dims(x_shape, self.cls_name) + output = self.conv3d_transpose(x, self.weight) + if self.has_bias: + output = self.bias_add(output, self.bias) + return output + + def extend_repr(self): + s = 'input_channels={}, output_channels={}, kernel_size={},' \ + 'stride={}, pad_mode={}, padding={}, dilation={}, ' \ + 'group={}, has_bias={},' \ + 'weight_init={}, bias_init={}'.format(self.in_channels, + self.out_channels, + self.kernel_size, + self.stride, + self.pad_mode, + self.padding, + self.dilation, + self.group, + self.has_bias, + self.weight_init, + self.bias_init) + return s + + class Conv2dTranspose(_Conv): r""" 2D transposed convolution layer. @@ -501,7 +851,7 @@ class Conv2dTranspose(_Conv): Args: in_channels (int): The number of channels in the input space. out_channels (int): The number of channels in the output space. - kernel_size (Union[int, tuple]): int or a tuple of 2 integers, which specifies the height + kernel_size (Union[int, tuple]): int or a tuple of 2 integers, which specifies the height and width of the 2D convolution window. Single int means the value is for both the height and the width of the kernel. A tuple of 2 ints means the first value is for the height and the other is for the width of the kernel. diff --git a/mindspore/ops/_grad/grad_nn_ops.py b/mindspore/ops/_grad/grad_nn_ops.py index d3f4a063ee3..a63ba8730b2 100755 --- a/mindspore/ops/_grad/grad_nn_ops.py +++ b/mindspore/ops/_grad/grad_nn_ops.py @@ -89,13 +89,15 @@ def get_bprop_conv3d(self): @bprop_getters.register(nps.Conv3DTranspose) def get_bprop_conv3d_transpose(self): """Grad definition for `Conv3DTranspose` operation.""" + stride = (self.stride[2], self.stride[3], self.stride[4]) + dilation = (self.dilation[2], self.dilation[3], self.dilation[4]) input_grad = nps.Conv3D( out_channel=self.in_channel, kernel_size=self.kernel_size, mode=self.mode, pad_mode="pad", - pad=self.pad, stride=self.stride, dilation=self.dilation, group=self.group, data_format=self.data_format + pad=self.pad_list, stride=stride, dilation=dilation, group=self.group, data_format=self.data_format ) filter_grad = G.Conv3DBackpropFilter( out_channel=self.in_channel, kernel_size=self.kernel_size, mode=self.mode, pad_mode="pad", - pad=self.pad, stride=self.stride, dilation=self.dilation, group=self.group, data_format=self.data_format + pad=self.pad_list, stride=self.stride, dilation=self.dilation, group=self.group, data_format=self.data_format ) def bprop(x, w, out, dout): diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py index 5b2013208d8..0685324e0ff 100644 --- a/mindspore/ops/operations/__init__.py +++ b/mindspore/ops/operations/__init__.py @@ -63,7 +63,7 @@ from .random_ops import (RandomChoiceWithMask, StandardNormal, Gamma, Poisson, U RandomCategorical, StandardLaplace, Multinomial, UniformCandidateSampler, LogUniformCandidateSampler) from .nn_ops import (LSTM, SGD, Adam, FusedSparseAdam, FusedSparseLazyAdam, AdamNoUpdateParam, ApplyMomentum, BatchNorm, - BiasAdd, Conv2D, + BiasAdd, Conv2D, Conv3D, Conv3DTranspose, DepthwiseConv2dNative, DropoutDoMask, Dropout, Dropout2D, Dropout3D, DropoutGenMask, Flatten, InstanceNorm, BNTrainingReduce, BNTrainingUpdate, @@ -140,6 +140,8 @@ __all__ = [ 'Xdivy', 'Xlogy', 'Conv2D', + 'Conv3D', + 'Conv3DTranspose', 'Flatten', 'MaxPoolWithArgmax', 'BNTrainingReduce', diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py index cd3ed5eb557..031c3527d32 100644 --- a/mindspore/ops/operations/nn_ops.py +++ b/mindspore/ops/operations/nn_ops.py @@ -7765,7 +7765,7 @@ class Conv3D(PrimitiveWithInfer): for each sampling location. Its value must be greater or equal to 1 and bounded by the height and width of the input. Default: 1. group (int): Splits filter into groups, `in_ channels` and `out_channels` must be - divisible by the number of groups. Default: 1. + divisible by the number of groups. Default: 1. Only 1 is currently supported. data_format (str): The optional value for data format. Currently only support "NCDHW". Inputs: @@ -7814,10 +7814,9 @@ class Conv3D(PrimitiveWithInfer): """Initialize Conv3D""" self.init_prim_io_names(inputs=['x', 'w'], outputs=['output']) self.kernel_size = _check_3d_int_or_tuple('kernel_size', kernel_size, self.name) - self.stride = _check_3d_int_or_tuple('stride', stride, self.name, allow_five=True, - ret_five=True) + self.stride = _check_3d_int_or_tuple('stride', stride, self.name, allow_five=False, ret_five=True) self.add_prim_attr('strides', self.stride) - self.dilation = _check_3d_int_or_tuple('dilation', dilation, self.name, allow_five=True, + self.dilation = _check_3d_int_or_tuple('dilation', dilation, self.name, allow_five=False, ret_five=True, third_one=True) self.add_prim_attr('dilations', self.dilation) validator.check_value_type('pad', pad, (int, tuple), self.name) @@ -7854,7 +7853,7 @@ class Conv3D(PrimitiveWithInfer): self.format = validator.check_string(data_format, ['NCDHW'], 'format', self.name) self.add_prim_attr('data_format', self.format) self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name) - self.group = validator.check_positive_int(group, 'group', self.name) + self.group = validator.check_equal_int(group, 1, 'group', self.name) self.add_prim_attr('groups', self.group) self.add_prim_attr('offset_x', 0) @@ -8074,8 +8073,17 @@ class Conv3DBackpropInput(PrimitiveWithInfer): return out +def _deconv_output_length(input_length, kernel_size, stride_size, dilation_size): + filter_size = kernel_size + (kernel_size - 1) * (dilation_size - 1) + if filter_size - stride_size > 0: + length = input_length * stride_size + filter_size - stride_size + else: + length = input_length * stride_size + return length + + class Conv3DTranspose(PrimitiveWithInfer): - """ + r""" Compute a 3D transposed convolution, which is also known as a deconvolution (although it is not an actual deconvolution). @@ -8091,24 +8099,38 @@ class Conv3DTranspose(PrimitiveWithInfer): (\text{kernel_size_h} - 1) + \text{output_padding_h} + 1 W_{out} = (W_{in} - 1) \times \text{stride_w} - 2 \times \text{padding_w} + \text{dilation_w} \times - (\text{kernel_size_w} - 1) + 1 + (\text{kernel_size_w} - 1) + \text{output_padding_w} + 1 Args: in_channel (int): The channel of the input x. out_channel (int): The channel of the weight x. kernel_size (Union[int, tuple[int]]): The kernel size of the 3D convolution. mode (int): Modes for different convolutions. Default is 1. Not currently used. + pad_mode (str): Specifies padding mode. The optional values are + "same", "valid", "pad". Default: "valid". + + - same: Adopts the way of completion. The depth, height and width of the output will be the same as + the input. The total number of padding will be calculated in depth, horizontal and vertical + directions and evenly distributed to head and tail, top and bottom, left and right if possible. + Otherwise, the last extra padding will be done from the tail, bottom and the right side. + If this mode is set, `pad` and `output_padding` must be 0. + + - valid: Adopts the way of discarding. The possible largest depth, height and width of output + will be returned without padding. Extra pixels will be discarded. If this mode is set, `pad` + and `output_padding` must be 0. + + - pad: Implicit paddings on both sides of the input in depth, height, width. The number of `pad` will + be padded to the input Tensor borders. `pad` must be greater than or equal to 0. + pad (Union(int, tuple[int])): The pad value to be filled. Default: 0. If `pad` is an integer, the paddings of head, tail, top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal to pad[0], pad[1], pad[2], pad[3], pad[4] and pad[5] correspondingly. stride (Union(int, tuple[int])): The stride to be applied to the convolution filter. Default: 1. dilation (Union(int, tuple[int])): Specifies the space to use between kernel elements. Default: 1. - group (int): Splits input into groups. Default: 1. + group (int): Splits input into groups. Default: 1. Only 1 is currently supported. output_padding (Union(int, tuple[int])): Add extra size to each dimension of the output. Default: 0. data_format (str): The optional value for data format. Currently only support 'NCDHW'. - input_size (tuple[int]): A tuple describes the shape of the input which conforms to the format - :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`. Not currently used. Inputs: - **dout** (Tensor) - the gradients w.r.t the output of the convolution. The shape conforms to the default @@ -8127,7 +8149,7 @@ class Conv3DTranspose(PrimitiveWithInfer): Raise: TypeError: If `in_channel`, `out_channel` or `group` is not an int. - TypeError: If `kernel_size`, `stride`, `pad` or `dilation` is neither an int not a tuple. + TypeError: If `kernel_size`, `stride`, `pad` , `dilation` or `output_padding` is neither an int not a tuple. ValueError: If `in_channel`, `out_channel`, `kernel_size`, `stride` or `dilation` is less than 1. ValueError: If `pad` is less than 0. ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. @@ -8152,6 +8174,7 @@ class Conv3DTranspose(PrimitiveWithInfer): out_channel, kernel_size, mode=1, + pad_mode='valid', pad=0, stride=1, dilation=1, @@ -8165,10 +8188,10 @@ class Conv3DTranspose(PrimitiveWithInfer): self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name) self.add_prim_attr('out_channel', self.out_channel) self.kernel_size = _check_3d_int_or_tuple('kernel_size', kernel_size, self.name) - self.stride = _check_3d_int_or_tuple('stride', stride, self.name, allow_five=True, + self.stride = _check_3d_int_or_tuple('stride', stride, self.name, allow_five=False, ret_five=True) self.add_prim_attr('strides', self.stride) - self.dilation = _check_3d_int_or_tuple('dilation', dilation, self.name, allow_five=True, + self.dilation = _check_3d_int_or_tuple('dilation', dilation, self.name, allow_five=False, ret_five=True, third_one=True) self.add_prim_attr('dilations', self.dilation) validator.check_value_type('pad', pad, (int, tuple), self.name) @@ -8178,8 +8201,15 @@ class Conv3DTranspose(PrimitiveWithInfer): raise ValueError(f"For `conv3d` attr 'pad' should be an positive int number or a tuple of " f"six positive int numbers, but got `{len(pad)}`.") self.pad_list = pad - for item in self.pad_list: - validator.check_non_negative_int(item, 'pad item', self.name) + self.pad_mode = validator.check_string(pad_mode.lower(), ['valid', 'same', 'pad'], 'pad_mode', self.name) + self.add_prim_attr('pad_mode', self.pad_mode) + + if self.pad_mode != 'pad' and pad != (0, 0, 0, 0, 0, 0): + raise ValueError(f"For '{self.name}', when pad is not 0, pad_mode should be set as 'pad'.") + + if self.pad_mode == 'pad': + for item in self.pad_list: + validator.check_non_negative_int(item, 'pad item', self.name) validator.check_int_range(self.pad_list[0], 0, self.kernel_size[0], Rel.INC_LEFT, 'pad_d belonging [0, kernel_size_d)', self.name) validator.check_int_range(self.pad_list[1], 0, self.kernel_size[0], Rel.INC_LEFT, @@ -8194,13 +8224,16 @@ class Conv3DTranspose(PrimitiveWithInfer): 'pad_w belonging [0, kernel_size_w)', self.name) self.mode = validator.check_equal_int(mode, 1, 'mode', self.name) self.add_prim_attr('mode', self.mode) - self.group = validator.check_positive_int(group, 'group', self.name) + self.mode = validator.check_equal_int(group, 1, 'group', self.name) self.add_prim_attr('groups', self.group) self.format = validator.check_string(data_format, ['NCDHW'], 'format', self.name) self.add_prim_attr('data_format', self.format) self.output_padding = _check_3d_int_or_tuple('output_padding', output_padding, self.name, - allow_five=True, ret_five=True, greater_zero=False) + allow_five=False, ret_five=True, greater_zero=False) + output_padding = (self.output_padding[2], self.output_padding[3], self.output_padding[4]) + if self.pad_mode != 'pad' and output_padding != (0, 0, 0): + raise ValueError(f"For '{self.name}', when output_padding is not 0, pad_mode should be set as 'pad'.") validator.check_int_range(self.kernel_size[0]*self.kernel_size[1]*self.kernel_size[2], 1, 343, Rel.INC_BOTH, 'The product of height, width and depth of kernel_size belonging [1, 343]', self.name) validator.check_int_range(self.stride[0]*self.stride[1]*self.stride[2], 1, 343, Rel.INC_BOTH, @@ -8213,7 +8246,6 @@ class Conv3DTranspose(PrimitiveWithInfer): 'output_padding_h belonging [0, max(stride_h,dilation_h))', self.name) validator.check_int_range(self.output_padding[4], 0, max(self.dilation[4], self.stride[4]), Rel.INC_LEFT, 'output_padding_w belonging [0, max(stride_w,dilation_w))', self.name) - self.add_prim_attr('output_padding', self.output_padding) def __infer__(self, x, w, b=None): args = {'x': x['dtype'], 'w': w['dtype']} @@ -8230,14 +8262,47 @@ class Conv3DTranspose(PrimitiveWithInfer): validator.check("filter's batch", w_shape[0], "input x's channel", x_shape[1], Rel.EQ, self.name) + kernel_d, kernel_h, kernel_w = self.kernel_size + _, _, stride_d, stride_h, stride_w = self.stride + _, _, dilation_d, dilation_h, dilation_w = self.dilation + + if self.pad_mode == "valid": + d_out = _deconv_output_length(x_shape[2], kernel_d, stride_d, dilation_d) + h_out = _deconv_output_length(x_shape[3], kernel_h, stride_h, dilation_h) + w_out = _deconv_output_length(x_shape[4], kernel_w, stride_w, dilation_w) + self.pad_list = (0, 0, 0, 0, 0, 0) + self.output_padding = (0, 0, 0, 0, 0) + + elif self.pad_mode == "same": + d_out = x_shape[2] * stride_d + h_out = x_shape[3] * stride_h + w_out = x_shape[4] * stride_w + + pad_needed_d = max(0, (x_shape[2] - 1) * stride_d + dilation_d * (kernel_d - 1) + 1 - d_out) + pad_head = math.floor(pad_needed_d / 2) + pad_tail = pad_needed_d - pad_head + + pad_needed_h = max(0, (x_shape[3] - 1) * stride_h + dilation_h * (kernel_h - 1) + 1 - h_out) + pad_top = math.floor(pad_needed_h / 2) + pad_bottom = pad_needed_h - pad_top + + pad_needed_w = max(0, (x_shape[4] - 1) * stride_w + dilation_w * (kernel_w - 1) + 1 - w_out) + pad_left = math.floor(pad_needed_w / 2) + pad_right = pad_needed_w - pad_left + self.pad_list = (pad_head, pad_tail, pad_top, pad_bottom, pad_left, pad_right) + self.output_padding = (0, 0, 0, 0, 0) + + elif self.pad_mode == 'pad': + pad_head, pad_tail, pad_top, pad_bottom, pad_left, pad_right = self.pad_list + d_out = (x_shape[2] - 1) * self.stride[2] - (pad_head + pad_tail) + self.dilation[2] * \ + (self.kernel_size[0] - 1) + self.output_padding[2] + 1 + h_out = (x_shape[3] - 1) * self.stride[3] - (pad_top + pad_bottom) + self.dilation[3] * \ + (self.kernel_size[1] - 1) + self.output_padding[3] + 1 + w_out = (x_shape[4] - 1) * self.stride[4] - (pad_left + pad_right) + self.dilation[4] * \ + (self.kernel_size[2] - 1) + self.output_padding[4] + 1 + self.add_prim_attr('pad_list', self.pad_list) - pad_head, pad_tail, pad_top, pad_bottom, pad_left, pad_right = self.pad_list - d_out = (x_shape[2] - 1) * self.stride[2] - (pad_head + pad_tail) + self.dilation[2] * \ - (self.kernel_size[0] - 1) + self.output_padding[2] + 1 - h_out = (x_shape[3] - 1) * self.stride[3] - (pad_top + pad_bottom) + self.dilation[3] * \ - (self.kernel_size[1] - 1) + self.output_padding[3] + 1 - w_out = (x_shape[4] - 1) * self.stride[4] - (pad_left + pad_right) + self.dilation[4] * \ - (self.kernel_size[2] - 1) + self.output_padding[4] + 1 + self.add_prim_attr('output_padding', self.output_padding) output_shape = (x_shape[0], w_shape[1]*self.group, d_out, h_out, w_out) self.add_prim_attr('input_size', output_shape) out = {